Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

wslexer.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  *
00059  * wslexer.c
00060  *
00061  * Author: Markku Rossi <mtr@iki.fi>
00062  *
00063  * Copyright (c) 1999-2000 WAPIT OY LTD.
00064  *       All rights reserved.
00065  *
00066  * Lexical analyzer.
00067  *
00068  */
00069 
00070 #include "wsint.h"
00071 #include "wsstree.h"
00072 #include "wsgram.h"
00073 
00074 /********************* Types and definitions ****************************/
00075 
00076 /* A predicate to check whether the character `ch' is a decimal
00077    digit. */
00078 #define WS_IS_DECIMAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '9')
00079 
00080 /* Convert the decimal digit `ch' to an integer number. */
00081 #define WS_DECIMAL_TO_INT(ch) ((ch) - '0')
00082 
00083 /* A predicate to check whether the character `ch' is a non-zero
00084    decimal digit. */
00085 #define WS_IS_NON_ZERO_DIGIT(ch) ('1' <= (ch) && (ch) <= '9')
00086 
00087 /* A predicate to check whether the character `ch' is an octal digit. */
00088 #define WS_IS_OCTAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '7')
00089 
00090 /* Convert the octal digit `ch' to an integer number. */
00091 #define WS_OCTAL_TO_INT(ch) ((ch) - '0')
00092 
00093 /* A predicate to check whether the character `ch' is a hex digit. */
00094 #define WS_IS_HEX_DIGIT(ch) (('0' <= (ch) && (ch) <= '9')   \
00095                              || ('a' <= (ch) && (ch) <= 'f')    \
00096                              || ('A' <= (ch) && (ch) <= 'F'))
00097 
00098 /* Convert the hex digit `ch' to an integer number. */
00099 #define WS_HEX_TO_INT(ch)       \
00100     ('0' <= (ch) && (ch) <= '9'     \
00101      ? ((ch) - '0')         \
00102      : ('a' <= (ch) && (ch) <= 'f'  \
00103        ? ((ch) - 'a' + 10)      \
00104        : (ch) - 'A' + 10))
00105 
00106 /* A predicate to check whether the character `ch' is an identifier
00107    starter letter. */
00108 #define WS_IS_IDENTIFIER_LETTER(ch) \
00109     (('a' <= (ch) && (ch) <= 'z')       \
00110      || ('A' <= (ch) && (ch) <= 'Z')    \
00111      || (ch) == '_')
00112 
00113 /********************* Prototypes for static functions ******************/
00114 
00115 /* Check whether the identifier `id', `len' is a keyword.  If the
00116    identifier is a keyword, the function returns WS_TRUE and sets the
00117    keywords token ID to `token_return'.  Otherwise the function
00118    returns WS_FALSE. */
00119 static WsBool lookup_keyword(char *id, size_t len, int *token_return);
00120 
00121 /* Convert literal integer number, stored to the buffer `buffer', into
00122    a 32 bit integer number.  The function will report possible integer
00123    overflows to the compiler `compiler'.  The function modifies the
00124    contents of the buffer `buffer' but it does not free it. */
00125 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer);
00126 
00127 /* Read a floating point number from the decimal point to the buffer
00128    `buffer'.  The buffer `buffer' might already contain some leading
00129    digits of the number and it always contains the decimal point.  If
00130    the operation is successful, the function returns WS_TRUE and it
00131    returns the resulting floating point number in `result'.  Otherwise
00132    the function returns WS_FALSE.  The buffer `buffer' must be
00133    initialized before this function is called and it must be
00134    uninitialized by the caller. */
00135 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
00136                                     WsFloat *result);
00137 
00138 /* Read a floating point number from the exponent part to the buffer
00139    `buffer'.  The buffer might already contain some leading digits and
00140    fields of the floating poit number.  Otherwise, the function works
00141    like read_float_from_point(). */
00142 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
00143                                   WsFloat *result);
00144 
00145 /********************* Static variables *********************************/
00146 
00147 /* A helper macro which expands to a strings and its length excluding
00148    the trailing '\0' character. */
00149 #define N(n) n, sizeof(n) - 1
00150 
00151 /* They keywords of the WMLScript language.  This array must be sorted
00152    by the keyword names. */
00153 static struct
00154 {
00155     char *name;
00156     size_t name_len;
00157     int token;
00158 } keywords[] = {
00159         {N("access"), tACCESS},
00160         {N("agent"), tAGENT},
00161         {N("break"), tBREAK},
00162         {N("case"), tCASE},
00163         {N("catch"), tCATCH},
00164         {N("class"), tCLASS},
00165         {N("const"), tCONST},
00166         {N("continue"), tCONTINUE},
00167         {N("debugger"), tDEBUGGER},
00168         {N("default"), tDEFAULT},
00169         {N("delete"), tDELETE},
00170         {N("div"), tIDIV},
00171         {N("do"), tDO},
00172         {N("domain"), tDOMAIN},
00173         {N("else"), tELSE},
00174         {N("enum"), tENUM},
00175         {N("equiv"), tEQUIV},
00176         {N("export"), tEXPORT},
00177         {N("extends"), tEXTENDS},
00178         {N("extern"), tEXTERN},
00179         {N("false"), tFALSE},
00180         {N("finally"), tFINALLY},
00181         {N("for"), tFOR},
00182         {N("function"), tFUNCTION},
00183         {N("header"), tHEADER},
00184         {N("http"), tHTTP},
00185         {N("if"), tIF},
00186         {N("import"), tIMPORT},
00187         {N("in"), tIN},
00188         {N("invalid"), tINVALID},
00189         {N("isvalid"), tISVALID},
00190         {N("lib"), tLIB},
00191         {N("meta"), tMETA},
00192         {N("name"), tNAME},
00193         {N("new"), tNEW},
00194         {N("null"), tNULL},
00195         {N("path"), tPATH},
00196         {N("private"), tPRIVATE},
00197         {N("public"), tPUBLIC},
00198         {N("return"), tRETURN},
00199         {N("sizeof"), tSIZEOF},
00200         {N("struct"), tSTRUCT},
00201         {N("super"), tSUPER},
00202         {N("switch"), tSWITCH},
00203         {N("this"), tTHIS},
00204         {N("throw"), tTHROW},
00205         {N("true"), tTRUE},
00206         {N("try"), tTRY},
00207         {N("typeof"), tTYPEOF},
00208         {N("url"), tURL},
00209         {N("use"), tUSE},
00210         {N("user"), tUSER},
00211         {N("var"), tVAR},
00212         {N("void"), tVOID},
00213         {N("while"), tWHILE},
00214         {N("with"), tWITH},
00215 };
00216 
00217 static int num_keywords = sizeof(keywords) / sizeof(keywords[0]);
00218 
00219 /********************* Global functions *********************************/
00220 
00221 int ws_yy_lex(YYSTYPE *yylval, YYLTYPE *yylloc, void *context)
00222 {
00223     WsCompiler *compiler = (WsCompiler *) context;
00224     WsUInt32 ch, ch2;
00225     WsBuffer buffer;
00226     unsigned char *p;
00227     WsBool success;
00228 
00229     /* Just check that we get the correct amount of arguments. */
00230     gw_assert(compiler->magic == COMPILER_MAGIC);
00231 
00232     while (ws_stream_getc(compiler->input, &ch)) {
00233         /* Save the token's line number. */
00234         yylloc->first_line = compiler->linenum;
00235 
00236         switch (ch) {
00237         case '\t':      /* Whitespace characters. */
00238         case '\v':
00239         case '\f':
00240         case ' ':
00241             continue;
00242 
00243         case '\n':      /* Line terminators. */
00244         case '\r':
00245             if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) {
00246                 if (ch2 != '\n')
00247                     ws_stream_ungetc(compiler->input, ch2);
00248             }
00249             compiler->linenum++;
00250             continue;
00251 
00252         case '!':       /* !, != */
00253             if (ws_stream_getc(compiler->input, &ch2)) {
00254                 if (ch2 == '=')
00255                     return tNE;
00256 
00257                 ws_stream_ungetc(compiler->input, ch2);
00258             }
00259             return '!';
00260 
00261         case '%':       /* %, %= */
00262             if (ws_stream_getc(compiler->input, &ch2)) {
00263                 if (ch2 == '=')
00264                     return tREMA;
00265 
00266                 ws_stream_ungetc(compiler->input, ch2);
00267             }
00268             return '%';
00269 
00270         case '&':       /* &, &&, &= */
00271             if (ws_stream_getc(compiler->input, &ch2)) {
00272                 if (ch2 == '&')
00273                     return tAND;
00274                 if (ch2 == '=')
00275                     return tANDA;
00276 
00277                 ws_stream_ungetc(compiler->input, ch2);
00278             }
00279             return '&';
00280 
00281         case '*':       /* *, *= */
00282             if (ws_stream_getc(compiler->input, &ch2)) {
00283                 if (ch2 == '=')
00284                     return tMULA;
00285 
00286                 ws_stream_ungetc(compiler->input, ch2);
00287             }
00288             return '*';
00289 
00290         case '+':       /* +, ++, += */
00291             if (ws_stream_getc(compiler->input, &ch2)) {
00292                 if (ch2 == '+')
00293                     return tPLUSPLUS;
00294                 if (ch2 == '=')
00295                     return tADDA;
00296 
00297                 ws_stream_ungetc(compiler->input, ch2);
00298             }
00299             return '+';
00300 
00301         case '-':       /* -, --, -= */
00302             if (ws_stream_getc(compiler->input, &ch2)) {
00303                 if (ch2 == '-')
00304                     return tMINUSMINUS;
00305                 if (ch2 == '=')
00306                     return tSUBA;
00307 
00308                 ws_stream_ungetc(compiler->input, ch2);
00309             }
00310             return '-';
00311 
00312         case '.':
00313             if (ws_stream_getc(compiler->input, &ch2)) {
00314                 if (WS_IS_DECIMAL_DIGIT(ch2)) {
00315                     /* DecimalFloatLiteral. */
00316                     ws_buffer_init(&buffer);
00317 
00318                     if (!ws_buffer_append_space(&buffer, &p, 2)) {
00319                         ws_error_memory(compiler);
00320                         ws_buffer_uninit(&buffer);
00321                         return EOF;
00322                     }
00323 
00324                     p[0] = '.';
00325                     p[1] = (unsigned char) ch2;
00326 
00327                     success = read_float_from_point(compiler, &buffer,
00328                                                     &yylval->vfloat);
00329                     ws_buffer_uninit(&buffer);
00330 
00331                     if (!success)
00332                         return EOF;
00333 
00334                     return tFLOAT;
00335                 }
00336 
00337                 ws_stream_ungetc(compiler->input, ch2);
00338             }
00339             return '.';
00340 
00341         case '/':       /* /, /=, block or a single line comment */
00342             if (ws_stream_getc(compiler->input, &ch2)) {
00343                 if (ch2 == '*') {
00344                     /* Block comment. */
00345                     while (1) {
00346                         if (!ws_stream_getc(compiler->input, &ch)) {
00347                             ws_src_error(compiler, 0, "EOF in comment");
00348                             return EOF;
00349                         }
00350 
00351                         if (ch == '\n' || ch == '\r') {
00352                             /* Line terminators. */
00353                             if (ch == '\r' && ws_stream_getc(compiler->input,
00354                                                              &ch2)) {
00355                                 if (ch2 != '\n')
00356                                     ws_stream_ungetc(compiler->input, ch2);
00357                             }
00358                             compiler->linenum++;
00359 
00360                             /* Continue reading the block comment. */
00361                             continue;
00362                         }
00363 
00364                         if (ch == '*' && ws_stream_getc(compiler->input, &ch2)) {
00365                             if (ch2 == '/')
00366                                 /* The end of the comment found. */
00367                                 break;
00368                             ws_stream_ungetc(compiler->input, ch2);
00369                         }
00370                     }
00371                     /* Continue after the comment. */
00372                     continue;
00373                 }
00374                 if (ch2 == '/') {
00375                     /* Single line comment. */
00376                     while (1) {
00377                         if (!ws_stream_getc(compiler->input, &ch))
00378                             /* The end of input stream reached.  We accept
00379                                this as a valid comment terminator. */
00380                             break;
00381 
00382                         if (ch == '\n' || ch == '\r') {
00383                             /* Line terminators. */
00384                             if (ch == '\r' && ws_stream_getc(compiler->input,
00385                                                              &ch2)) {
00386                                 if (ch2 != '\n')
00387                                     ws_stream_ungetc(compiler->input, ch2);
00388                             }
00389                             /* The end of the line (and the comment)
00390                                                     reached. */
00391                             compiler->linenum++;
00392                             break;
00393                         }
00394                     }
00395                     /* Continue after the comment. */
00396                     continue;
00397                 }
00398                 if (ch2 == '=')
00399                     return tDIVA;
00400 
00401                 ws_stream_ungetc(compiler->input, ch2);
00402             }
00403             return '/';
00404 
00405         case '<':       /* <, <<, <<=, <= */
00406             if (ws_stream_getc(compiler->input, &ch2)) {
00407                 if (ch2 == '<') {
00408                     if (ws_stream_getc(compiler->input, &ch2)) {
00409                         if (ch2 == '=')
00410                             return tLSHIFTA;
00411 
00412                         ws_stream_ungetc(compiler->input, ch2);
00413                     }
00414                     return tLSHIFT;
00415                 }
00416                 if (ch2 == '=')
00417                     return tLE;
00418 
00419                 ws_stream_ungetc(compiler->input, ch2);
00420             }
00421             return '<';
00422 
00423         case '=':       /* =, == */
00424             if (ws_stream_getc(compiler->input, &ch2)) {
00425                 if (ch2 == '=')
00426                     return tEQ;
00427 
00428                 ws_stream_ungetc(compiler->input, ch2);
00429             }
00430             return '=';
00431 
00432         case '>':       /* >, >=, >>, >>=, >>>, >>>= */
00433             if (ws_stream_getc(compiler->input, &ch2)) {
00434                 if (ch2 == '>') {
00435                     if (ws_stream_getc(compiler->input, &ch2)) {
00436                         if (ch2 == '>') {
00437                             if (ws_stream_getc(compiler->input, &ch2)) {
00438                                 if (ch2 == '=')
00439                                     return tRSZSHIFTA;
00440 
00441                                 ws_stream_ungetc(compiler->input, ch2);
00442                             }
00443                             return tRSZSHIFT;
00444                         }
00445                         if (ch2 == '=')
00446                             return tRSSHIFTA;
00447 
00448                         ws_stream_ungetc(compiler->input, ch2);
00449                     }
00450                     return tRSSHIFT;
00451                 }
00452                 if (ch2 == '=')
00453                     return tGE;
00454 
00455                 ws_stream_ungetc(compiler->input, ch2);
00456             }
00457             return '>';
00458 
00459         case '^':       /* ^, ^= */
00460             if (ws_stream_getc(compiler->input, &ch2)) {
00461                 if (ch2 == '=')
00462                     return tXORA;
00463 
00464                 ws_stream_ungetc(compiler->input, ch2);
00465             }
00466             return '^';
00467 
00468         case '|':       /* |, |=, || */
00469             if (ws_stream_getc(compiler->input, &ch2)) {
00470                 if (ch2 == '=')
00471                     return tORA;
00472                 if (ch2 == '|')
00473                     return tOR;
00474 
00475                 ws_stream_ungetc(compiler->input, ch2);
00476             }
00477             return '|';
00478 
00479         case '#':       /* The simple cases. */
00480         case '(':
00481         case ')':
00482         case ',':
00483         case ':':
00484         case ';':
00485         case '?':
00486         case '{':
00487         case '}':
00488         case '~':
00489             return (int) ch;
00490 
00491         case '\'':      /* String literals. */
00492         case '"':
00493             {
00494                 WsUInt32 string_end_ch = ch;
00495                 WsUtf8String *str = ws_utf8_alloc();
00496 
00497                 if (str == NULL) {
00498                     ws_error_memory(compiler);
00499                     return EOF;
00500                 }
00501 
00502                 while (1) {
00503                     if (!ws_stream_getc(compiler->input, &ch)) {
00504 eof_in_string_literal:
00505                         ws_src_error(compiler, 0, "EOF in string literal");
00506                         ws_utf8_free(str);
00507                         return EOF;
00508                     }
00509                     if (ch == string_end_ch)
00510                         /* The end of string reached. */
00511                         break;
00512 
00513                     if (ch == '\\') {
00514                         /* An escape sequence. */
00515                         if (!ws_stream_getc(compiler->input, &ch))
00516                             goto eof_in_string_literal;
00517 
00518                         switch (ch) {
00519                         case '\'':
00520                         case '"':
00521                         case '\\':
00522                         case '/':
00523                             /* The character as-is. */
00524                             break;
00525 
00526                         case 'b':
00527                             ch = '\b';
00528                             break;
00529 
00530                         case 'f':
00531                             ch = '\f';
00532                             break;
00533 
00534                         case 'n':
00535                             ch = '\n';
00536                             break;
00537 
00538                         case 'r':
00539                             ch = '\r';
00540                             break;
00541 
00542                         case 't':
00543                             ch = '\t';
00544                             break;
00545 
00546                         case 'x':
00547                         case 'u':
00548                             {
00549                                 int i, len;
00550                                 int type = ch;
00551 
00552                                 if (ch == 'x')
00553                                     len = 2;
00554                                 else
00555                                     len = 4;
00556 
00557                                 ch = 0;
00558                                 for (i = 0; i < len; i++) {
00559                                     if (!ws_stream_getc(compiler->input, &ch2))
00560                                         goto eof_in_string_literal;
00561                                     if (!WS_IS_HEX_DIGIT(ch2)) {
00562                                         ws_src_error(compiler, 0,
00563                                                      "malformed `\\%c' escape in "
00564                                                      "string literal", (char) type);
00565                                         ch = 0;
00566                                         break;
00567                                     }
00568                                     ch *= 16;
00569                                     ch += WS_HEX_TO_INT(ch2);
00570                                 }
00571                             }
00572                             break;
00573 
00574                         default:
00575                             if (WS_IS_OCTAL_DIGIT(ch)) {
00576                                 int i;
00577                                 int limit = 3;
00578 
00579                                 ch = WS_OCTAL_TO_INT(ch);
00580                                 if (ch > 3)
00581                                     limit = 2;
00582 
00583                                 for (i = 1; i < limit; i++) {
00584                                     if (!ws_stream_getc(compiler->input, &ch2))
00585                                         goto eof_in_string_literal;
00586                                     if (!WS_IS_OCTAL_DIGIT(ch2)) {
00587                                         ws_stream_ungetc(compiler->input, ch2);
00588                                         break;
00589                                     }
00590 
00591                                     ch *= 8;
00592                                     ch += WS_OCTAL_TO_INT(ch2);
00593                                 }
00594                             } else {
00595                                 ws_src_error(compiler, 0,
00596                                              "unknown escape sequence `\\%c' in "
00597                                              "string literal", (char) ch);
00598                                 ch = 0;
00599                             }
00600                             break;
00601                         }
00602                         /* FALLTHROUGH */
00603                     }
00604 
00605                     if (!ws_utf8_append_char(str, ch)) {
00606                         ws_error_memory(compiler);
00607                         ws_utf8_free(str);
00608                         return EOF;
00609                     }
00610                 }
00611 
00612                 if (!ws_lexer_register_utf8(compiler, str)) {
00613                     ws_error_memory(compiler);
00614                     ws_utf8_free(str);
00615                     return EOF;
00616                 }
00617 
00618                 gw_assert(str != NULL);
00619                 yylval->string = str;
00620 
00621                 return tSTRING;
00622             }
00623             break;
00624 
00625         default:
00626             /* Identifiers, keywords and number constants. */
00627 
00628             if (WS_IS_IDENTIFIER_LETTER(ch)) {
00629                 WsBool got;
00630                 int token;
00631                 unsigned char *p;
00632                 unsigned char *np;
00633                 size_t len = 0;
00634 
00635                 /* An identifier or a keyword.  We start with a 256
00636                  * bytes long buffer but it is expanded dynamically if
00637                  * needed.  However, 256 should be enought for most
00638                  * cases since the byte-code format limits the function
00639                  * names to 255 characters. */
00640                 p = ws_malloc(256);
00641                 if (p == NULL) {
00642                     ws_error_memory(compiler);
00643                     return EOF;
00644                 }
00645 
00646                 do {
00647                     /* Add one extra for the possible terminator
00648                        character. */
00649                     np = ws_realloc(p, len + 2);
00650                     if (np == NULL) {
00651                         ws_error_memory(compiler);
00652                         ws_free(p);
00653                         return EOF;
00654                     }
00655 
00656                     p = np;
00657 
00658                     /* This is ok since the only valid identifier names
00659                      * can be written in 7 bit ASCII. */
00660                     p[len++] = (unsigned char) ch;
00661                 } while ((got = ws_stream_getc(compiler->input, &ch))
00662                          && (WS_IS_IDENTIFIER_LETTER(ch)
00663                              || WS_IS_DECIMAL_DIGIT(ch)));
00664 
00665                 if (got)
00666                     /* Put back the terminator character. */
00667                     ws_stream_ungetc(compiler->input, ch);
00668 
00669                 /* Is it a keyword? */
00670                 if (lookup_keyword((char *) p, len, &token)) {
00671                     /* Yes it is... */
00672                     ws_free(p);
00673 
00674                     /* ...except one case: `div='. */
00675                     if (token == tIDIV) {
00676                         if (ws_stream_getc(compiler->input, &ch)) {
00677                             if (ch == '=')
00678                                 return tIDIVA;
00679 
00680                             ws_stream_ungetc(compiler->input, ch);
00681                         }
00682                     }
00683 
00684                     /* Return the token value. */
00685                     return token;
00686                 }
00687 
00688                 /* It is a normal identifier.  Let's pad the name with a
00689                           null-character.  We have already allocated space for
00690                           it. */
00691                 p[len] = '\0';
00692 
00693                 if (!ws_lexer_register_block(compiler, p)) {
00694                     ws_error_memory(compiler);
00695                     ws_free(p);
00696                     return EOF;
00697                 }
00698 
00699                 gw_assert(p != NULL);
00700                 yylval->identifier = (char *) p;
00701 
00702                 return tIDENTIFIER;
00703             }
00704 
00705             if (WS_IS_NON_ZERO_DIGIT(ch)) {
00706                 /* A decimal integer literal or a decimal float
00707                           literal. */
00708 
00709                 ws_buffer_init(&buffer);
00710                 if (!ws_buffer_append_space(&buffer, &p, 1)) {
00711 number_error_memory:
00712                     ws_error_memory(compiler);
00713                     ws_buffer_uninit(&buffer);
00714                     return EOF;
00715                 }
00716                 p[0] = ch;
00717 
00718                 while (ws_stream_getc(compiler->input, &ch)) {
00719                     if (WS_IS_DECIMAL_DIGIT(ch)) {
00720                         if (!ws_buffer_append_space(&buffer, &p, 1))
00721                             goto number_error_memory;
00722                         p[0] = ch;
00723                     } else if (ch == '.' || ch == 'e' || ch == 'E') {
00724                         /* DecimalFloatLiteral. */
00725                         if (ch == '.') {
00726                             if (!ws_buffer_append_space(&buffer, &p, 1))
00727                                 goto number_error_memory;
00728                             p[0] = '.';
00729 
00730                             success = read_float_from_point(compiler, &buffer,
00731                                                             &yylval->vfloat);
00732                         } else {
00733                             ws_stream_ungetc(compiler->input, ch);
00734 
00735                             success = read_float_from_exp(compiler, &buffer,
00736                                                           &yylval->vfloat);
00737                         }
00738                         ws_buffer_uninit(&buffer);
00739 
00740                         if (!success)
00741                             return EOF;
00742 
00743                         return tFLOAT;
00744                     } else {
00745                         ws_stream_ungetc(compiler->input, ch);
00746                         break;
00747                     }
00748                 }
00749 
00750                 /* Now the buffer contains an integer number as a
00751                           string.  Let's convert it to an integer number. */
00752                 yylval->integer = buffer_to_int(compiler, &buffer);
00753                 ws_buffer_uninit(&buffer);
00754 
00755                 /* Read a DecimalIntegerLiteral. */
00756                 return tINTEGER;
00757             }
00758 
00759             if (ch == '0') {
00760                 /* The integer constant 0, an octal number or a
00761                    HexIntegerLiteral. */
00762                 if (ws_stream_getc(compiler->input, &ch2)) {
00763                     if (ch2 == 'x' || ch2 == 'X') {
00764                         /* HexIntegerLiteral. */
00765 
00766                         ws_buffer_init(&buffer);
00767                         if (!ws_buffer_append_space(&buffer, &p, 2))
00768                             goto number_error_memory;
00769 
00770                         p[0] = '0';
00771                         p[1] = 'x';
00772 
00773                         while (ws_stream_getc(compiler->input, &ch)) {
00774                             if (WS_IS_HEX_DIGIT(ch)) {
00775                                 if (!ws_buffer_append_space(&buffer, &p, 1))
00776                                     goto number_error_memory;
00777                                 p[0] = ch;
00778                             } else {
00779                                 ws_stream_ungetc(compiler->input, ch);
00780                                 break;
00781                             }
00782                         }
00783 
00784                         if (ws_buffer_len(&buffer) == 2) {
00785                             ws_buffer_uninit(&buffer);
00786                             ws_src_error(compiler, 0,
00787                                          "numeric constant with no digits");
00788                             yylval->integer = 0;
00789                             return tINTEGER;
00790                         }
00791 
00792                         /* Now the buffer contains an integer number as
00793                          * a string.  Let's convert it to an integer
00794                          * number. */
00795                         yylval->integer = buffer_to_int(compiler, &buffer);
00796                         ws_buffer_uninit(&buffer);
00797 
00798                         /* Read a HexIntegerLiteral. */
00799                         return tINTEGER;
00800                     }
00801                     if (WS_IS_OCTAL_DIGIT(ch2)) {
00802                         /* OctalIntegerLiteral. */
00803 
00804                         ws_buffer_init(&buffer);
00805                         if (!ws_buffer_append_space(&buffer, &p, 2))
00806                             goto number_error_memory;
00807 
00808                         p[0] = '0';
00809                         p[1] = ch2;
00810 
00811                         while (ws_stream_getc(compiler->input, &ch)) {
00812                             if (WS_IS_OCTAL_DIGIT(ch)) {
00813                                 if (!ws_buffer_append_space(&buffer, &p, 1))
00814                                     goto number_error_memory;
00815                                 p[0] = ch;
00816                             } else {
00817                                 ws_stream_ungetc(compiler->input, ch);
00818                                 break;
00819                             }
00820                         }
00821 
00822                         /* Convert the buffer into an intger number. */
00823                         yylval->integer = buffer_to_int(compiler, &buffer);
00824                         ws_buffer_uninit(&buffer);
00825 
00826                         /* Read an OctalIntegerLiteral. */
00827                         return tINTEGER;
00828                     }
00829                     if (ch2 == '.' || ch2 == 'e' || ch2 == 'E') {
00830                         /* DecimalFloatLiteral. */
00831                         ws_buffer_init(&buffer);
00832 
00833                         if (ch2 == '.') {
00834                             if (!ws_buffer_append_space(&buffer, &p, 1))
00835                                 goto number_error_memory;
00836                             p[0] = '.';
00837 
00838                             success = read_float_from_point(compiler, &buffer,
00839                                                             &yylval->vfloat);
00840                         } else {
00841                             ws_stream_ungetc(compiler->input, ch);
00842 
00843                             success = read_float_from_exp(compiler, &buffer,
00844                                                           &yylval->vfloat);
00845                         }
00846                         ws_buffer_uninit(&buffer);
00847 
00848                         if (!success)
00849                             return EOF;
00850 
00851                         return tFLOAT;
00852                     }
00853 
00854                     ws_stream_ungetc(compiler->input, ch2);
00855                 }
00856 
00857                 /* Integer literal 0. */
00858                 yylval->integer = 0;
00859                 return tINTEGER;
00860             }
00861 
00862             /* Garbage found from the input stream. */
00863             ws_src_error(compiler, 0,
00864                          "garbage found from the input stream: character=0x%x",
00865                          ch);
00866             return EOF;
00867             break;
00868         }
00869     }
00870 
00871     return EOF;
00872 }
00873 
00874 /********************* Static functions *********************************/
00875 
00876 static WsBool lookup_keyword(char *id, size_t len, int *token_return)
00877 {
00878     int left = 0, center, right = num_keywords;
00879 
00880     while (left < right) {
00881         size_t l;
00882         int result;
00883 
00884         center = left + (right - left) / 2;
00885 
00886         l = keywords[center].name_len;
00887         if (len < l)
00888             l = len;
00889 
00890         result = memcmp(id, keywords[center].name, l);
00891         if (result < 0 || (result == 0 && len < keywords[center].name_len))
00892             /* The possible match is smaller. */
00893             right = center;
00894         else if (result > 0 || (result == 0 && len > keywords[center].name_len))
00895             /* The possible match is bigger. */
00896             left = center + 1;
00897         else {
00898             /* Found a match. */
00899             *token_return = keywords[center].token;
00900             return WS_TRUE;
00901         }
00902     }
00903 
00904     /* No match. */
00905     return WS_FALSE;
00906 }
00907 
00908 
00909 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer)
00910 {
00911     unsigned char *p;
00912     unsigned long value;
00913 
00914     /* Terminate the string. */
00915     if (!ws_buffer_append_space(buffer, &p, 1)) {
00916         ws_error_memory(compiler);
00917         return 0;
00918     }
00919     p[0] = '\0';
00920 
00921     /* Convert the buffer into an integer number.  The base is taken
00922        from the bufer. */
00923     errno = 0;
00924     value = strtoul((char *) ws_buffer_ptr(buffer), NULL, 0);
00925 
00926     /* Check for overflow.  We accept WS_INT32_MAX + 1 because we might
00927      * be parsing the numeric part of '-2147483648'. */
00928     if (errno == ERANGE || value > (WsUInt32) WS_INT32_MAX + 1)
00929         ws_src_error(compiler, 0, "integer literal too large");
00930 
00931     /* All done. */
00932     return (WsUInt32) value;
00933 }
00934 
00935 
00936 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
00937                                     WsFloat *result)
00938 {
00939     WsUInt32 ch;
00940     unsigned char *p;
00941 
00942     while (ws_stream_getc(compiler->input, &ch)) {
00943         if (WS_IS_DECIMAL_DIGIT(ch)) {
00944             if (!ws_buffer_append_space(buffer, &p, 1)) {
00945                 ws_error_memory(compiler);
00946                 return WS_FALSE;
00947             }
00948             p[0] = (unsigned char) ch;
00949         } else {
00950             ws_stream_ungetc(compiler->input, ch);
00951             break;
00952         }
00953     }
00954 
00955     return read_float_from_exp(compiler, buffer, result);
00956 }
00957 
00958 
00959 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
00960                                   WsFloat *result)
00961 {
00962     WsUInt32 ch;
00963     unsigned char *p;
00964     int sign = '+';
00965     unsigned char buf[4];
00966 
00967     /* Do we have an exponent part. */
00968     if (!ws_stream_getc(compiler->input, &ch))
00969         goto done;
00970     if (ch != 'e' && ch != 'E') {
00971         /* No exponent part. */
00972         ws_stream_ungetc(compiler->input, ch);
00973         goto done;
00974     }
00975 
00976     /* Sign. */
00977     if (!ws_stream_getc(compiler->input, &ch)) {
00978         /* This is an error. */
00979         ws_src_error(compiler, 0, "truncated float literal");
00980         return WS_FALSE;
00981     }
00982     if (ch == '-')
00983         sign = '-';
00984     else if (ch == '+')
00985         sign = '+';
00986     else
00987         ws_stream_ungetc(compiler->input, ch);
00988 
00989     /* DecimalDigits. */
00990     if (!ws_stream_getc(compiler->input, &ch)) {
00991         ws_src_error(compiler, 0, "truncated float literal");
00992         return WS_FALSE;
00993     }
00994     if (!WS_IS_DECIMAL_DIGIT(ch)) {
00995         ws_src_error(compiler, 0, "no decimal digits in exponent part");
00996         return WS_FALSE;
00997     }
00998 
00999     /* Append exponent part read so far. */
01000     if (!ws_buffer_append_space(buffer, &p, 2)) {
01001         ws_error_memory(compiler);
01002         return WS_FALSE;
01003     }
01004     p[0] = 'e';
01005     p[1] = sign;
01006 
01007     /* Read decimal digits. */
01008     while (WS_IS_DECIMAL_DIGIT(ch)) {
01009         if (!ws_buffer_append_space(buffer, &p, 1)) {
01010             ws_error_memory(compiler);
01011             return WS_FALSE;
01012         }
01013         p[0] = (unsigned char) ch;
01014 
01015         if (!ws_stream_getc(compiler->input, &ch))
01016             /* EOF.  This is ok. */
01017             goto done;
01018     }
01019     /* Unget the extra character. */
01020     ws_stream_ungetc(compiler->input, ch);
01021 
01022     /* FALLTHROUGH */
01023 
01024 done:
01025 
01026     if (!ws_buffer_append_space(buffer, &p, 1)) {
01027         ws_error_memory(compiler);
01028         return WS_FALSE;
01029     }
01030     p[0] = 0;
01031 
01032     /* Now the buffer contains a valid floating point number. */
01033     *result = (WsFloat) strtod((char *) ws_buffer_ptr(buffer), NULL);
01034 
01035     /* Check that the generated floating point number fits to
01036        `float32'. */
01037     if (*result == HUGE_VAL || *result == -HUGE_VAL
01038         || ws_ieee754_encode_single(*result, buf) != WS_IEEE754_OK)
01039         ws_src_error(compiler, 0, "floating point literal too large");
01040 
01041     return WS_TRUE;
01042 }
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.