lex.c

Go to the documentation of this file.
00001 /*                           L E X . C
00002  * BRL-CAD
00003  *
00004  * Copyright (c) 2004-2006 United States Government as represented by
00005  * the U.S. Army Research Laboratory.
00006  *
00007  * This library is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License
00009  * as published by the Free Software Foundation; either version 2 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * This library is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Library General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with this file; see the file named COPYING for more
00019  * information.
00020  */
00021 
00022 /** \addtogroup bu_log */
00023 /*@{*/
00024 /** @file lex.c
00025  *  @author
00026  *      Christopher T. Johnson
00027  *
00028  *  @par Source
00029  *      Geometric Solutions, Inc.
00030  */
00031 /*@}*/
00032 
00033 #ifndef lint
00034 static const char RCSid[] = "@(#)$Header: /cvsroot/brlcad/brlcad/src/libbu/lex.c,v 14.10 2006/08/31 23:16:38 lbutler Exp $ (ARL)";
00035 #endif
00036 
00037 #include "common.h"
00038 
00039 
00040 
00041 #include <stdio.h>
00042 #include <ctype.h>
00043 #include <string.h>
00044 #include "machine.h"
00045 #include "bu.h"
00046 
00047 static int bu_lex_reading_comment = 0;
00048 
00049 /**
00050  *                      B U _ L E X _ G E T O N E
00051  */
00052 static char *
00053 bu_lex_getone(int *used, struct bu_vls *rtstr)
00054 {
00055         register char *cp;
00056         register char *sp;
00057         register char *unit;
00058         int     number;
00059 
00060         number = 1;
00061         *used = 0;
00062 
00063         BU_CK_VLS(rtstr);
00064         cp = bu_vls_addr(rtstr);
00065 top:
00066         if (bu_lex_reading_comment) {
00067                 for(;;) {
00068                         register char tc;
00069                         tc = *cp; cp++;
00070                         if (!tc) {
00071                                 return 0;
00072                         }
00073                         if (tc != '*') continue;
00074                         if (*cp != '/') continue;
00075                         cp++;   /* Skip the '/' */
00076                         break;
00077                 }
00078                 bu_lex_reading_comment = 0;
00079         }
00080 
00081         /*
00082          * skip leading blanks
00083          */
00084         for (; *cp && isspace(*cp); cp++);
00085         /*
00086          * Is this a comment?  '#' to end of line is.
00087          */
00088         if (!*cp || *cp == '#') {
00089                 return 0;
00090         }
00091         /*
00092          * Is this a 'C' multi-line comment?
00093          */
00094         if (*cp == '/' && *(cp+1)=='*') {
00095                 cp += 2;
00096                 bu_lex_reading_comment = 1;
00097                 goto top;
00098         }
00099         /*
00100          * cp points to the first non-blank character.
00101          */
00102         sp = cp;                /* start pointer */
00103         while (*cp) {
00104                 register char tc;
00105 
00106                 tc = *cp; cp++;
00107                 /*
00108                  * Numbers come in the following forms
00109                  *      [0-9]*
00110                  *      [0-9]*.[0-9][0-9]*
00111                  *      [0-9]*.[0-9][0-9]*{e|E}{+|-}[0-9][0-9]*
00112                  */
00113                 if (number) {
00114                         /*
00115                          * We have not seen anything to make this NOT
00116                          * a number.
00117                          */
00118                         if (isdigit(tc)) {
00119                                 if (number == 5 || number == 6) number = 7;
00120                                 if (number == 3) number = 4;
00121                                 if (number == 1) number = 2;
00122                                 continue;
00123                         }
00124                         if (number==2 && tc == '.') {
00125                                 /*
00126                                  * [0-9][0-9]*.
00127                                  */
00128                                 number = 3;
00129                                 continue;
00130                         }
00131                         if (number == 4 && (tc == 'e' || tc == 'E')) {
00132                                 /*
00133                                  * [0-9][0-9]*.[0-9][0-9]*{e|E}
00134                                  */
00135                                 number = 5;
00136                                 continue;
00137                         }
00138                         if (number == 5 && (tc == '+' || tc == '-')) {
00139                                 /*
00140                                  * [0-9][0-9]*.[0-9][0-9]*{e|E}{+|-}
00141                                  */
00142                                 number = 6;
00143                                 continue;
00144                         }
00145                         if (number == 3) break;
00146                         number = 0;
00147                 }
00148                 if (!isalnum(tc) && tc != '.' && tc != '_') break;
00149         }
00150         if (number ==  6) --cp; /* subtract off the + or - */
00151         if (number == 3) --cp;  /* subtract off the . */
00152         /*
00153          * All spaces have been skipped. (sp)
00154          * if we had NUMBER. or NUMBERe{+|-} that has be replaced (cp)
00155          */
00156         *used = cp - sp -1;
00157         if (*used == 0) *used = 1;
00158         unit = (char *)bu_malloc(*used+1, "unit token");
00159         strncpy(unit,sp,*used);
00160         unit[*used] = '\0';
00161         *used = sp-bu_vls_addr(rtstr) + *used;
00162         if (*used == 0) *used = 1;
00163         return unit;
00164 }
00165 
00166 /**
00167  *                      B U _ L E X
00168  */
00169 int
00170 bu_lex(
00171         union bu_lex_token *token,
00172         struct bu_vls *rtstr,
00173         struct bu_lex_key *keywords,
00174         struct bu_lex_key *symbols)
00175 {
00176         char *unit;
00177         char *cp;
00178         int used;
00179 
00180         /*
00181          * get a unit of information from rtstr.
00182          */
00183         used = 0;
00184         unit = bu_lex_getone(&used, rtstr);
00185 
00186         /*
00187          * Was line empty or commented out.
00188          */
00189         if (!unit) {
00190                 if (used) bu_bomb("bu_lex: Null unit, and something used.\n");
00191                 return BU_LEX_NEED_MORE;
00192         }
00193 
00194         /*
00195          * Decide if this unit is a symbol, number or identifier.
00196          */
00197         if (isdigit(*unit)) {
00198                 /*
00199                  * Humm, this could be a number.
00200                  *      octal -- 0[0-7]*
00201                  *      hex   -- 0x[0-9a-f]*
00202                  *      dec   -- [0-9][0-9]*
00203                  *      dbl   -- [0-9][0-9]*.[0-9]*{{E|e}{+|-}[0-9][0-9]*}
00204                  */
00205                 if (*unit == '0') {     /* any of the above */
00206                         /*
00207                          *      octal -- 0[0-7]*
00208                          */
00209                         for (cp=unit; *cp && *cp>='0' && *cp <='7'; cp++);
00210                         if (!*cp) {     /* We have an octal value */
00211                                 token->type = BU_LEX_INT;
00212                                 sscanf(unit,"%o", (unsigned int *)&token->t_int.value);
00213                                 bu_free(unit,"unit token");
00214                                 return used;
00215                         }
00216                         /*
00217                          * if it is not an octal number, maybe it is
00218                          * a hex number?"
00219                          *      hex   -- 0x[0-9a-f]*
00220                          */
00221                         cp=unit+1;
00222                         if (*cp == 'x' || *cp == 'X') {
00223                                 for(;*cp && isxdigit(*cp);cp++);
00224                                 if (!*cp) {
00225                                         token->type = BU_LEX_INT;
00226                                         sscanf(unit,"%x",(unsigned int *)&token->t_int.value);
00227                                         bu_free(unit, "unit token");
00228                                         return used;
00229                                 }
00230                         }
00231                 }
00232                 /*
00233                  * This could be a decimal number, a double or an identifier.
00234                  *      dec   -- [0-9][0-9]*
00235                  */
00236                 for (cp=unit; *cp && isdigit(*cp); cp++);
00237                 if (!*cp) {
00238                         token->type = BU_LEX_INT;
00239                         sscanf(unit,"%d", &token->t_int.value);
00240                         bu_free(unit, "unit token");
00241                         return used;
00242                 }
00243                 /*
00244                  * if we are here, then this is either a double or
00245                  * an identifier.
00246                  *      dbl   -- [0-9][0-9]*.[0-9]*{{E|e}{+|-}[0-9][0-9]*}
00247                  *
00248                  * *cp should be a '.'
00249                  */
00250                 if (*cp == '.') {
00251                         for(cp++;*cp &&isdigit(*cp);cp++);
00252                         if (*cp == 'e' || *cp == 'E') cp++;
00253                         if (*cp == '+' || *cp == '-') cp++;
00254                         for(;*cp &&isdigit(*cp);cp++);
00255                         if (!*cp) {
00256                                 token->type = BU_LEX_DOUBLE;
00257                                 sscanf(unit, "%lg", &token->t_dbl.value);
00258                                 bu_free(unit, "unit token");
00259                                 return used;
00260                         }
00261                 }
00262                 /*
00263                  * Oh well, I guess it was not a number.  That means it
00264                  * must be something else.
00265                  */
00266         }
00267         /*
00268          * We either have an identifier, keyword, or symbol.
00269          */
00270         if (symbols) {
00271                 if (!*(unit+1) ) {      /* single character, good choice for a symbol. */
00272                         register struct bu_lex_key *sp;
00273                         for (sp=symbols;sp->tok_val;sp++) {
00274                                 if (*sp->string == *unit) {
00275                                         token->type = BU_LEX_SYMBOL;
00276                                         token->t_key.value = sp->tok_val;
00277                                         bu_free(unit, "unit token");
00278                                         return used;
00279                                 }
00280                         }
00281                 }
00282         }
00283         if (keywords) {
00284                 register struct bu_lex_key *kp;
00285                 for (kp=keywords;kp->tok_val; kp++) {
00286                         if (strcmp(kp->string, unit) == 0) {
00287                                 token->type = BU_LEX_KEYWORD;
00288                                 token->t_key.value = kp->tok_val;
00289                                 bu_free(unit, "unit token");
00290                                 return used;
00291                         }
00292                 }
00293         }
00294         token->type = BU_LEX_IDENT;
00295         token->t_id.value = unit;
00296         return used;
00297 }
00298 
00299 /*@}*/
00300 
00301 /*
00302  * Local Variables:
00303  * mode: C
00304  * tab-width: 8
00305  * c-basic-offset: 4
00306  * indent-tabs-mode: t
00307  * End:
00308  * ex: shiftwidth=4 tabstop=8
00309  */

Generated on Mon Sep 18 01:24:48 2006 for BRL-CAD by  doxygen 1.4.6