BRL-CAD
lex.c
Go to the documentation of this file.
1 /* L E X . C
2  * BRL-CAD
3  *
4  * Copyright (c) 2004-2014 United States Government as represented by
5  * the U.S. Army Research Laboratory.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public License
9  * version 2.1 as published by the Free Software Foundation.
10  *
11  * This library is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this file; see the file named COPYING for more
18  * information.
19  */
20 
21 #include "common.h"
22 
23 #include <stdio.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include "bu/log.h"
27 #include "bu/malloc.h"
28 #include "bu/str.h"
29 
30 static int bu_lex_reading_comment = 0;
31 
32 
33 HIDDEN char *
34 lex_getone(int *used, struct bu_vls *rtstr)
35 {
36  register char *cp;
37  register char *sp;
38  register char tc;
39  register char *unit;
40  int number;
41 
42  number = 1;
43  *used = 0;
44 
45  BU_CK_VLS(rtstr);
46  cp = bu_vls_addr(rtstr);
47  while (1) {
48  if (bu_lex_reading_comment) {
49  do {
50  /* looking at two consecutive chars */
51  tc = *cp; cp++;
52  if (!tc) {
53  return 0;
54  }
55  } while (!(tc == '*' && *cp == '/'));
56  cp++; /* skip the '/' */
57  bu_lex_reading_comment = 0;
58  }
59  /* skip leading blanks */
60  while (*cp && isspace((int)(*cp))) cp++;
61  /* is this a comment? '#' to end of line is */
62  if (!*cp || *cp == '#') {
63  return 0;
64 
65  }
66  /* is this a 'C' multi-line comment? */
67  if (*cp == '/' && *(cp+1) == '*') {
68  cp += 2;
69  bu_lex_reading_comment = 1;
70  } else /* we are done reading comments */ {
71  break;
72  }
73  }
74  /* cp points to the first non-blank character */
75  sp = cp; /* start pointer */
76  while (*cp) {
77 
78  tc = *cp; cp++;
79  /*
80  * Numbers come in the following forms
81  * [0-9]*
82  * [0-9]*.[0-9][0-9]*
83  * [0-9]*.[0-9][0-9]*{e|E}{+|-}[0-9][0-9]*
84  */
85  if (number) {
86  /*
87  * We have not seen anything to make this NOT
88  * a number.
89  */
90  if (isdigit((int)tc)) {
91  if (number == 5 || number == 6) number = 7;
92  if (number == 3) number = 4;
93  if (number == 1) number = 2;
94  continue;
95  }
96  if (number == 2 && tc == '.') {
97  /*
98  * [0-9][0-9]*.
99  */
100  number = 3;
101  continue;
102  }
103  if (number == 4 && (tc == 'e' || tc == 'E')) {
104  /*
105  * [0-9][0-9]*.[0-9][0-9]*{e|E}
106  */
107  number = 5;
108  continue;
109  }
110  if (number == 5 && (tc == '+' || tc == '-')) {
111  /*
112  * [0-9][0-9]*.[0-9][0-9]*{e|E}{+|-}
113  */
114  number = 6;
115  continue;
116  }
117  if (number == 3) break;
118  number = 0;
119  }
120  if (!isalnum((int)tc) && tc != '.' && tc != '_') break;
121  }
122  if (number == 6) --cp; /* subtract off the + or - */
123  if (number == 3) --cp; /* subtract off the . */
124  /*
125  * All spaces have been skipped. (sp)
126  * if we had NUMBER. or NUMBERe{+|-} that has be replaced (cp)
127  */
128  *used = cp - sp -1;
129  if (*used == 0)
130  *used = 1;
131 
132  unit = (char *)bu_malloc((size_t)(*used+1), "unit token");
133  bu_strlcpy(unit, sp, (size_t)(*used+1));
134  *used = sp-bu_vls_addr(rtstr) + *used;
135 
136  if (*used == 0)
137  *used = 1;
138 
139  return unit;
140 }
141 
142 
143 int
145  union bu_lex_token *token,
146  struct bu_vls *rtstr,
147  struct bu_lex_key *keywords,
148  struct bu_lex_key *symbols)
149 {
150  char *unit;
151  char *cp;
152  int used;
153 
154  /*
155  * get a unit of information from rtstr.
156  */
157  used = 0;
158  unit = lex_getone(&used, rtstr);
159 
160  /*
161  * Was line empty or commented out.
162  */
163  if (!unit) {
164  if (used) bu_bomb("bu_lex: Null unit, and something used.\n");
165  return BU_LEX_NEED_MORE;
166  }
167 
168  /*
169  * Decide if this unit is a symbol, number or identifier.
170  */
171  if (isdigit((int)(*unit))) {
172  /*
173  * Humm, this could be a number.
174  * octal -- 0[0-7]*
175  * hex -- 0x[0-9a-f]*
176  * dec -- [0-9][0-9]*
177  * dbl -- [0-9][0-9]*.[0-9]*{{E|e}{+|-}[0-9][0-9]*}
178  */
179  if (*unit == '0') {
180  /* any of the above */
181  /*
182  * octal -- 0[0-7]*
183  */
184  for (cp=unit; *cp && *cp>='0' && *cp <='7'; cp++);
185  if (!*cp) {
186  /* We have an octal value */
187  token->type = BU_LEX_INT;
188  sscanf(unit, "%o", (unsigned int *)&token->t_int.value);
189  bu_free(unit, "unit token");
190  return used;
191  }
192  /*
193  * if it is not an octal number, maybe it is
194  * a hex number?"
195  * hex -- 0x[0-9a-f]*
196  */
197  cp=unit+1;
198  if (*cp == 'x' || *cp == 'X') {
199  for (;*cp && isxdigit((int)(*cp));cp++);
200  if (!*cp) {
201  token->type = BU_LEX_INT;
202  sscanf(unit, "%x", (unsigned int *)&token->t_int.value);
203  bu_free(unit, "unit token");
204  return used;
205  }
206  }
207  }
208  /*
209  * This could be a decimal number, a double or an identifier.
210  * dec -- [0-9][0-9]*
211  */
212  for (cp=unit; *cp && isdigit((int)(*cp)); cp++);
213  if (!*cp) {
214  token->type = BU_LEX_INT;
215  sscanf(unit, "%d", &token->t_int.value);
216  bu_free(unit, "unit token");
217  return used;
218  }
219  /*
220  * if we are here, then this is either a double or
221  * an identifier.
222  * dbl -- [0-9][0-9]*.[0-9]*{{E|e}{+|-}[0-9][0-9]*}
223  *
224  * *cp should be a '.'
225  */
226  if (*cp == '.') {
227  for (cp++;*cp &&isdigit((int)(*cp));cp++);
228  if (*cp == 'e' || *cp == 'E') cp++;
229  if (*cp == '+' || *cp == '-') cp++;
230  for (;*cp &&isdigit((int)(*cp));cp++);
231  if (!*cp) {
232  token->type = BU_LEX_DOUBLE;
233  sscanf(unit, "%lg", &token->t_dbl.value);
234  bu_free(unit, "unit token");
235  return used;
236  }
237  }
238  /*
239  * Oh well, I guess it was not a number. That means it
240  * must be something else.
241  */
242  }
243  /*
244  * We either have an identifier, keyword, or symbol.
245  */
246  if (symbols) {
247  if (!*(unit+1)) {
248  /* single character, good choice for a symbol. */
249  register struct bu_lex_key *sp;
250  for (sp=symbols;sp->tok_val;sp++) {
251  if (*sp->string == *unit) {
252  token->type = BU_LEX_SYMBOL;
253  token->t_key.value = sp->tok_val;
254  bu_free(unit, "unit token");
255  return used;
256  }
257  }
258  }
259  }
260  if (keywords) {
261  register struct bu_lex_key *kp;
262  for (kp=keywords;kp->tok_val; kp++) {
263  if (BU_STR_EQUAL(kp->string, unit)) {
264  token->type = BU_LEX_KEYWORD;
265  token->t_key.value = kp->tok_val;
266  bu_free(unit, "unit token");
267  return used;
268  }
269  }
270  }
271  token->type = BU_LEX_IDENT;
272  token->t_id.value = unit;
273  return used;
274 }
275 
276 /*
277  * Local Variables:
278  * mode: C
279  * tab-width: 8
280  * indent-tabs-mode: t
281  * c-file-style: "stroustrup"
282  * End:
283  * ex: shiftwidth=4 tabstop=8
284  */
#define BU_LEX_DOUBLE
Definition: log.h:505
int tok_val
Definition: log.h:526
struct bu_lex_t_id t_id
Definition: log.h:523
struct bu_lex_t_int t_int
Definition: log.h:520
char * value
Definition: log.h:514
Header file for the BRL-CAD common definitions.
int value
Definition: log.h:508
#define HIDDEN
Definition: common.h:86
token
Definition: exists.c:65
void * bu_malloc(size_t siz, const char *str)
Definition: malloc.c:314
#define BU_CK_VLS(_vp)
Definition: vls.h:69
#define bu_strlcpy(dst, src, size)
Definition: str.h:60
#define BU_LEX_IDENT
Definition: log.h:516
char * bu_vls_addr(const struct bu_vls *vp)
Definition: vls.c:111
#define BU_LEX_KEYWORD
Definition: log.h:511
double value
Definition: log.h:503
#define BU_LEX_NEED_MORE
Definition: log.h:529
HIDDEN char * lex_getone(int *used, struct bu_vls *rtstr)
Definition: lex.c:34
int value
Definition: log.h:498
struct bu_lex_t_dbl t_dbl
Definition: log.h:521
void bu_free(void *ptr, const char *str)
Definition: malloc.c:328
int type
Definition: log.h:519
struct bu_lex_t_key t_key
Definition: log.h:522
Definition: vls.h:56
void bu_bomb(const char *str) _BU_ATTR_NORETURN
Definition: bomb.c:91
int bu_lex(union bu_lex_token *token, struct bu_vls *rtstr, struct bu_lex_key *keywords, struct bu_lex_key *symbols)
Definition: lex.c:144
#define BU_LEX_INT
Definition: log.h:500
#define BU_LEX_SYMBOL
Definition: log.h:510
char * string
Definition: log.h:527
#define BU_STR_EQUAL(s1, s2)
Definition: str.h:126