kjs Library API Documentation

lexer.cpp

00001 // -*- c-basic-offset: 2 -*- 00002 /* 00003 * This file is part of the KDE libraries 00004 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Library General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Library General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Library General Public License 00017 * along with this library; see the file COPYING.LIB. If not, write to 00018 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00019 * Boston, MA 02111-1307, USA. 00020 * 00021 */ 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <ctype.h> 00028 #include <stdlib.h> 00029 #include <stdio.h> 00030 #include <string.h> 00031 #include <assert.h> 00032 00033 #include "value.h" 00034 #include "object.h" 00035 #include "types.h" 00036 #include "interpreter.h" 00037 #include "nodes.h" 00038 #include "lexer.h" 00039 #include "identifier.h" 00040 #include "lookup.h" 00041 #include "internal.h" 00042 00043 // we can't specify the namespace in yacc's C output, so do it here 00044 using namespace KJS; 00045 00046 static Lexer *currLexer = 0; 00047 00048 #ifndef KDE_USE_FINAL 00049 #include "grammar.h" 00050 #endif 00051 00052 #include "lexer.lut.h" 00053 00054 extern YYLTYPE yylloc; // global bison variable holding token info 00055 00056 // a bridge for yacc from the C world to C++ 00057 int kjsyylex() 00058 { 00059 return Lexer::curr()->lex(); 00060 } 00061 00062 Lexer::Lexer() 00063 : yylineno(1), 00064 size8(128), size16(128), restrKeyword(false), 00065 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0), 00066 code(0), length(0), 00067 #ifndef KJS_PURE_ECMA 00068 bol(true), 00069 #endif 00070 current(0), next1(0), next2(0), next3(0), 00071 strings(0), numStrings(0), stringsCapacity(0), 00072 identifiers(0), numIdentifiers(0), identifiersCapacity(0) 00073 { 00074 // allocate space for read buffers 00075 buffer8 = new char[size8]; 00076 buffer16 = new UChar[size16]; 00077 currLexer = this; 00078 } 00079 00080 Lexer::~Lexer() 00081 { 00082 delete [] buffer8; 00083 delete [] buffer16; 00084 } 00085 00086 Lexer *Lexer::curr() 00087 { 00088 if (!currLexer) { 00089 // create singleton instance 00090 currLexer = new Lexer(); 00091 } 00092 return currLexer; 00093 } 00094 00095 #ifdef KJS_DEBUG_MEM 00096 void Lexer::globalClear() 00097 { 00098 delete currLexer; 00099 currLexer = 0L; 00100 } 00101 #endif 00102 00103 void Lexer::setCode(const UChar *c, unsigned int len) 00104 { 00105 yylineno = 1; 00106 restrKeyword = false; 00107 delimited = false; 00108 eatNextIdentifier = false; 00109 stackToken = -1; 00110 lastToken = -1; 00111 foundBad = false; 00112 pos = 0; 00113 code = c; 00114 length = len; 00115 skipLF = false; 00116 skipCR = false; 00117 #ifndef KJS_PURE_ECMA 00118 bol = true; 00119 #endif 00120 00121 // read first characters 00122 current = (length > 0) ? code[0].uc : 0; 00123 next1 = (length > 1) ? code[1].uc : 0; 00124 next2 = (length > 2) ? code[2].uc : 0; 00125 next3 = (length > 3) ? code[3].uc : 0; 00126 } 00127 00128 void Lexer::shift(unsigned int p) 00129 { 00130 while (p--) { 00131 pos++; 00132 current = next1; 00133 next1 = next2; 00134 next2 = next3; 00135 next3 = (pos + 3 < length) ? code[pos+3].uc : 0; 00136 } 00137 } 00138 00139 // called on each new line 00140 void Lexer::nextLine() 00141 { 00142 yylineno++; 00143 #ifndef KJS_PURE_ECMA 00144 bol = true; 00145 #endif 00146 } 00147 00148 void Lexer::setDone(State s) 00149 { 00150 state = s; 00151 done = true; 00152 } 00153 00154 int Lexer::lex() 00155 { 00156 int token = 0; 00157 state = Start; 00158 unsigned short stringType = 0; // either single or double quotes 00159 pos8 = pos16 = 0; 00160 done = false; 00161 terminator = false; 00162 skipLF = false; 00163 skipCR = false; 00164 00165 // did we push a token on the stack previously ? 00166 // (after an automatic semicolon insertion) 00167 if (stackToken >= 0) { 00168 setDone(Other); 00169 token = stackToken; 00170 stackToken = 0; 00171 } 00172 00173 while (!done) { 00174 if (skipLF && current != '\n') // found \r but not \n afterwards 00175 skipLF = false; 00176 if (skipCR && current != '\r') // found \n but not \r afterwards 00177 skipCR = false; 00178 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one 00179 { 00180 skipLF = false; 00181 skipCR = false; 00182 shift(1); 00183 } 00184 00185 bool cr = (current == '\r'); 00186 bool lf = (current == '\n'); 00187 if (cr) 00188 skipLF = true; 00189 else if (lf) 00190 skipCR = true; 00191 bool isLineTerminator = cr || lf; 00192 00193 switch (state) { 00194 case Start: 00195 if (isWhiteSpace(current)) { 00196 // do nothing 00197 } else if (current == '/' && next1 == '/') { 00198 shift(1); 00199 state = InSingleLineComment; 00200 } else if (current == '/' && next1 == '*') { 00201 shift(1); 00202 state = InMultiLineComment; 00203 } else if (current == 0) { 00204 if (!terminator && !delimited) { 00205 // automatic semicolon insertion if program incomplete 00206 token = ';'; 00207 stackToken = 0; 00208 setDone(Other); 00209 } else 00210 setDone(Eof); 00211 } else if (isLineTerminator) { 00212 nextLine(); 00213 terminator = true; 00214 if (restrKeyword) { 00215 token = ';'; 00216 setDone(Other); 00217 } 00218 } else if (current == '"' || current == '\'') { 00219 state = InString; 00220 stringType = current; 00221 } else if (isIdentLetter(current)) { 00222 record16(current); 00223 state = InIdentifier; 00224 } else if (current == '0') { 00225 record8(current); 00226 state = InNum0; 00227 } else if (isDecimalDigit(current)) { 00228 record8(current); 00229 state = InNum; 00230 } else if (current == '.' && isDecimalDigit(next1)) { 00231 record8(current); 00232 state = InDecimal; 00233 #ifndef KJS_PURE_ECMA 00234 // <!-- marks the beginning of a line comment (for www usage) 00235 } else if (current == '<' && next1 == '!' && 00236 next2 == '-' && next3 == '-') { 00237 shift(3); 00238 state = InSingleLineComment; 00239 // same for --> 00240 } else if (bol && current == '-' && next1 == '-' && next2 == '>') { 00241 shift(2); 00242 state = InSingleLineComment; 00243 #endif 00244 } else { 00245 token = matchPunctuator(current, next1, next2, next3); 00246 if (token != -1) { 00247 setDone(Other); 00248 } else { 00249 // cerr << "encountered unknown character" << endl; 00250 setDone(Bad); 00251 } 00252 } 00253 break; 00254 case InString: 00255 if (current == stringType) { 00256 shift(1); 00257 setDone(String); 00258 } else if (current == 0 || isLineTerminator) { 00259 setDone(Bad); 00260 } else if (current == '\\') { 00261 state = InEscapeSequence; 00262 } else { 00263 record16(current); 00264 } 00265 break; 00266 // Escape Sequences inside of strings 00267 case InEscapeSequence: 00268 if (isOctalDigit(current)) { 00269 if (current >= '0' && current <= '3' && 00270 isOctalDigit(next1) && isOctalDigit(next2)) { 00271 record16(convertOctal(current, next1, next2)); 00272 shift(2); 00273 state = InString; 00274 } else if (isOctalDigit(current) && isOctalDigit(next1)) { 00275 record16(convertOctal('0', current, next1)); 00276 shift(1); 00277 state = InString; 00278 } else if (isOctalDigit(current)) { 00279 record16(convertOctal('0', '0', current)); 00280 state = InString; 00281 } else { 00282 setDone(Bad); 00283 } 00284 } else if (current == 'x') 00285 state = InHexEscape; 00286 else if (current == 'u') 00287 state = InUnicodeEscape; 00288 else { 00289 if (isLineTerminator) 00290 nextLine(); 00291 record16(singleEscape(current)); 00292 state = InString; 00293 } 00294 break; 00295 case InHexEscape: 00296 if (isHexDigit(current) && isHexDigit(next1)) { 00297 state = InString; 00298 record16(convertHex(current, next1)); 00299 shift(1); 00300 } else if (current == stringType) { 00301 record16('x'); 00302 shift(1); 00303 setDone(String); 00304 } else { 00305 record16('x'); 00306 record16(current); 00307 state = InString; 00308 } 00309 break; 00310 case InUnicodeEscape: 00311 if (isHexDigit(current) && isHexDigit(next1) && 00312 isHexDigit(next2) && isHexDigit(next3)) { 00313 record16(convertUnicode(current, next1, next2, next3)); 00314 shift(3); 00315 state = InString; 00316 } else if (current == stringType) { 00317 record16('u'); 00318 shift(1); 00319 setDone(String); 00320 } else { 00321 setDone(Bad); 00322 } 00323 break; 00324 case InSingleLineComment: 00325 if (isLineTerminator) { 00326 nextLine(); 00327 terminator = true; 00328 if (restrKeyword) { 00329 token = ';'; 00330 setDone(Other); 00331 } else 00332 state = Start; 00333 } else if (current == 0) { 00334 setDone(Eof); 00335 } 00336 break; 00337 case InMultiLineComment: 00338 if (current == 0) { 00339 setDone(Bad); 00340 } else if (isLineTerminator) { 00341 nextLine(); 00342 } else if (current == '*' && next1 == '/') { 00343 state = Start; 00344 shift(1); 00345 } 00346 break; 00347 case InIdentifier: 00348 if (isIdentLetter(current) || isDecimalDigit(current)) { 00349 record16(current); 00350 break; 00351 } 00352 setDone(Identifier); 00353 break; 00354 case InNum0: 00355 if (current == 'x' || current == 'X') { 00356 record8(current); 00357 state = InHex; 00358 } else if (current == '.') { 00359 record8(current); 00360 state = InDecimal; 00361 } else if (current == 'e' || current == 'E') { 00362 record8(current); 00363 state = InExponentIndicator; 00364 } else if (isOctalDigit(current)) { 00365 record8(current); 00366 state = InOctal; 00367 } else if (isDecimalDigit(current)) { 00368 record8(current); 00369 state = InDecimal; 00370 } else { 00371 setDone(Number); 00372 } 00373 break; 00374 case InHex: 00375 if (isHexDigit(current)) { 00376 record8(current); 00377 } else { 00378 setDone(Hex); 00379 } 00380 break; 00381 case InOctal: 00382 if (isOctalDigit(current)) { 00383 record8(current); 00384 } 00385 else if (isDecimalDigit(current)) { 00386 record8(current); 00387 state = InDecimal; 00388 } else 00389 setDone(Octal); 00390 break; 00391 case InNum: 00392 if (isDecimalDigit(current)) { 00393 record8(current); 00394 } else if (current == '.') { 00395 record8(current); 00396 state = InDecimal; 00397 } else if (current == 'e' || current == 'E') { 00398 record8(current); 00399 state = InExponentIndicator; 00400 } else 00401 setDone(Number); 00402 break; 00403 case InDecimal: 00404 if (isDecimalDigit(current)) { 00405 record8(current); 00406 } else if (current == 'e' || current == 'E') { 00407 record8(current); 00408 state = InExponentIndicator; 00409 } else 00410 setDone(Number); 00411 break; 00412 case InExponentIndicator: 00413 if (current == '+' || current == '-') { 00414 record8(current); 00415 } else if (isDecimalDigit(current)) { 00416 record8(current); 00417 state = InExponent; 00418 } else 00419 setDone(Bad); 00420 break; 00421 case InExponent: 00422 if (isDecimalDigit(current)) { 00423 record8(current); 00424 } else 00425 setDone(Number); 00426 break; 00427 default: 00428 assert(!"Unhandled state in switch statement"); 00429 } 00430 00431 // move on to the next character 00432 if (!done) 00433 shift(1); 00434 #ifndef KJS_PURE_ECMA 00435 if (state != Start && state != InSingleLineComment) 00436 bol = false; 00437 #endif 00438 } 00439 00440 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad 00441 if ((state == Number || state == Octal || state == Hex) 00442 && isIdentLetter(current)) 00443 state = Bad; 00444 00445 // terminate string 00446 buffer8[pos8] = '\0'; 00447 00448 #ifdef KJS_DEBUG_LEX 00449 fprintf(stderr, "line: %d ", lineNo()); 00450 fprintf(stderr, "yytext (%x): ", buffer8[0]); 00451 fprintf(stderr, "%s ", buffer8); 00452 #endif 00453 00454 long double dval = 0; 00455 if (state == Number) { 00456 dval = strtod(buffer8, 0L); 00457 } else if (state == Hex) { // scan hex numbers 00458 dval = 0; 00459 if (buffer8[0] == '0' && (buffer8[1] == 'x' || buffer8[1] == 'X')) { 00460 for (const char *p = buffer8+2; *p; p++) { 00461 if (!isHexDigit(*p)) { 00462 dval = 0; 00463 break; 00464 } 00465 dval = dval * 16 + convertHex(*p); 00466 } 00467 } 00468 state = Number; 00469 } else if (state == Octal) { // scan octal number 00470 dval = 0; 00471 if (buffer8[0] == '0') { 00472 for (const char *p = buffer8+1; *p; p++) { 00473 if (*p < '0' || *p > '7') { 00474 dval = 0; 00475 break; 00476 } 00477 dval = dval * 8 + *p - '0'; 00478 } 00479 } 00480 state = Number; 00481 } 00482 00483 #ifdef KJS_DEBUG_LEX 00484 switch (state) { 00485 case Eof: 00486 printf("(EOF)\n"); 00487 break; 00488 case Other: 00489 printf("(Other)\n"); 00490 break; 00491 case Identifier: 00492 printf("(Identifier)/(Keyword)\n"); 00493 break; 00494 case String: 00495 printf("(String)\n"); 00496 break; 00497 case Number: 00498 printf("(Number)\n"); 00499 break; 00500 default: 00501 printf("(unknown)"); 00502 } 00503 #endif 00504 00505 if (state != Identifier && eatNextIdentifier) 00506 eatNextIdentifier = false; 00507 00508 restrKeyword = false; 00509 delimited = false; 00510 yylloc.first_line = yylineno; // ??? 00511 yylloc.last_line = yylineno; 00512 00513 switch (state) { 00514 case Eof: 00515 token = 0; 00516 break; 00517 case Other: 00518 if(token == '}' || token == ';') { 00519 delimited = true; 00520 } 00521 break; 00522 case Identifier: 00523 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) { 00524 // Lookup for keyword failed, means this is an identifier 00525 // Apply anonymous-function hack below (eat the identifier) 00526 if (eatNextIdentifier) { 00527 eatNextIdentifier = false; 00528 #ifdef KJS_VERBOSE 00529 UString debugstr(buffer16, pos16); fprintf(stderr,"Anonymous function hack: eating identifier %s\n",debugstr.ascii()); 00530 #endif 00531 token = lex(); 00532 break; 00533 } 00534 /* TODO: close leak on parse error. same holds true for String */ 00535 kjsyylval.ident = makeIdentifier(buffer16, pos16); 00536 token = IDENT; 00537 break; 00538 } 00539 00540 eatNextIdentifier = false; 00541 // Hack for "f = function somename() { ... }", too hard to get into the grammar 00542 // Same for building an array with function pointers ( 'name', func1, 'name2', func2 ) 00543 // There are lots of other uses, we really have to get this into the grammar 00544 if ( token == FUNCTION && 00545 ( lastToken == '=' || lastToken == ',' || lastToken == '(' ) ) 00546 eatNextIdentifier = true; 00547 00548 if (token == CONTINUE || token == BREAK || 00549 token == RETURN || token == THROW) 00550 restrKeyword = true; 00551 break; 00552 case String: 00553 kjsyylval.ustr = makeUString(buffer16, pos16); 00554 token = STRING; 00555 break; 00556 case Number: 00557 kjsyylval.dval = dval; 00558 token = NUMBER; 00559 break; 00560 case Bad: 00561 foundBad = true; 00562 return -1; 00563 default: 00564 assert(!"unhandled numeration value in switch"); 00565 return -1; 00566 } 00567 lastToken = token; 00568 return token; 00569 } 00570 00571 bool Lexer::isWhiteSpace(unsigned short c) 00572 { 00573 return (c == ' ' || c == '\t' || 00574 c == 0x0b || c == 0x0c || c == 0xa0); 00575 } 00576 00577 bool Lexer::isIdentLetter(unsigned short c) 00578 { 00579 /* TODO: allow other legitimate unicode chars */ 00580 return (c >= 'a' && c <= 'z' || 00581 c >= 'A' && c <= 'Z' || 00582 c == '$' || c == '_'); 00583 } 00584 00585 bool Lexer::isDecimalDigit(unsigned short c) 00586 { 00587 return (c >= '0' && c <= '9'); 00588 } 00589 00590 bool Lexer::isHexDigit(unsigned short c) 00591 { 00592 return (c >= '0' && c <= '9' || 00593 c >= 'a' && c <= 'f' || 00594 c >= 'A' && c <= 'F'); 00595 } 00596 00597 bool Lexer::isOctalDigit(unsigned short c) 00598 { 00599 return (c >= '0' && c <= '7'); 00600 } 00601 00602 int Lexer::matchPunctuator(unsigned short c1, unsigned short c2, 00603 unsigned short c3, unsigned short c4) 00604 { 00605 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { 00606 shift(4); 00607 return URSHIFTEQUAL; 00608 } else if (c1 == '=' && c2 == '=' && c3 == '=') { 00609 shift(3); 00610 return STREQ; 00611 } else if (c1 == '!' && c2 == '=' && c3 == '=') { 00612 shift(3); 00613 return STRNEQ; 00614 } else if (c1 == '>' && c2 == '>' && c3 == '>') { 00615 shift(3); 00616 return URSHIFT; 00617 } else if (c1 == '<' && c2 == '<' && c3 == '=') { 00618 shift(3); 00619 return LSHIFTEQUAL; 00620 } else if (c1 == '>' && c2 == '>' && c3 == '=') { 00621 shift(3); 00622 return RSHIFTEQUAL; 00623 } else if (c1 == '<' && c2 == '=') { 00624 shift(2); 00625 return LE; 00626 } else if (c1 == '>' && c2 == '=') { 00627 shift(2); 00628 return GE; 00629 } else if (c1 == '!' && c2 == '=') { 00630 shift(2); 00631 return NE; 00632 } else if (c1 == '+' && c2 == '+') { 00633 shift(2); 00634 if (terminator) 00635 return AUTOPLUSPLUS; 00636 else 00637 return PLUSPLUS; 00638 } else if (c1 == '-' && c2 == '-') { 00639 shift(2); 00640 if (terminator) 00641 return AUTOMINUSMINUS; 00642 else 00643 return MINUSMINUS; 00644 } else if (c1 == '=' && c2 == '=') { 00645 shift(2); 00646 return EQEQ; 00647 } else if (c1 == '+' && c2 == '=') { 00648 shift(2); 00649 return PLUSEQUAL; 00650 } else if (c1 == '-' && c2 == '=') { 00651 shift(2); 00652 return MINUSEQUAL; 00653 } else if (c1 == '*' && c2 == '=') { 00654 shift(2); 00655 return MULTEQUAL; 00656 } else if (c1 == '/' && c2 == '=') { 00657 shift(2); 00658 return DIVEQUAL; 00659 } else if (c1 == '&' && c2 == '=') { 00660 shift(2); 00661 return ANDEQUAL; 00662 } else if (c1 == '^' && c2 == '=') { 00663 shift(2); 00664 return XOREQUAL; 00665 } else if (c1 == '%' && c2 == '=') { 00666 shift(2); 00667 return MODEQUAL; 00668 } else if (c1 == '|' && c2 == '=') { 00669 shift(2); 00670 return OREQUAL; 00671 } else if (c1 == '<' && c2 == '<') { 00672 shift(2); 00673 return LSHIFT; 00674 } else if (c1 == '>' && c2 == '>') { 00675 shift(2); 00676 return RSHIFT; 00677 } else if (c1 == '&' && c2 == '&') { 00678 shift(2); 00679 return AND; 00680 } else if (c1 == '|' && c2 == '|') { 00681 shift(2); 00682 return OR; 00683 } 00684 00685 switch(c1) { 00686 case '=': 00687 case '>': 00688 case '<': 00689 case ',': 00690 case '!': 00691 case '~': 00692 case '?': 00693 case ':': 00694 case '.': 00695 case '+': 00696 case '-': 00697 case '*': 00698 case '/': 00699 case '&': 00700 case '|': 00701 case '^': 00702 case '%': 00703 case '(': 00704 case ')': 00705 case '{': 00706 case '}': 00707 case '[': 00708 case ']': 00709 case ';': 00710 shift(1); 00711 return static_cast<int>(c1); 00712 default: 00713 return -1; 00714 } 00715 } 00716 00717 unsigned short Lexer::singleEscape(unsigned short c) const 00718 { 00719 switch(c) { 00720 case 'b': 00721 return 0x08; 00722 case 't': 00723 return 0x09; 00724 case 'n': 00725 return 0x0A; 00726 case 'v': 00727 return 0x0B; 00728 case 'f': 00729 return 0x0C; 00730 case 'r': 00731 return 0x0D; 00732 case '"': 00733 return 0x22; 00734 case '\'': 00735 return 0x27; 00736 case '\\': 00737 return 0x5C; 00738 default: 00739 return c; 00740 } 00741 } 00742 00743 unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2, 00744 unsigned short c3) const 00745 { 00746 return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); 00747 } 00748 00749 unsigned char Lexer::convertHex(unsigned short c) 00750 { 00751 if (c >= '0' && c <= '9') 00752 return (c - '0'); 00753 else if (c >= 'a' && c <= 'f') 00754 return (c - 'a' + 10); 00755 else 00756 return (c - 'A' + 10); 00757 } 00758 00759 unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2) 00760 { 00761 return ((convertHex(c1) << 4) + convertHex(c2)); 00762 } 00763 00764 UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2, 00765 unsigned short c3, unsigned short c4) 00766 { 00767 return UChar((convertHex(c1) << 4) + convertHex(c2), 00768 (convertHex(c3) << 4) + convertHex(c4)); 00769 } 00770 00771 void Lexer::record8(unsigned short c) 00772 { 00773 assert(c <= 0xff); 00774 00775 // enlarge buffer if full 00776 if (pos8 >= size8 - 1) { 00777 char *tmp = new char[2 * size8]; 00778 memcpy(tmp, buffer8, size8 * sizeof(char)); 00779 delete [] buffer8; 00780 buffer8 = tmp; 00781 size8 *= 2; 00782 } 00783 00784 buffer8[pos8++] = (char) c; 00785 } 00786 00787 void Lexer::record16(UChar c) 00788 { 00789 // enlarge buffer if full 00790 if (pos16 >= size16 - 1) { 00791 UChar *tmp = new UChar[2 * size16]; 00792 memcpy(tmp, buffer16, size16 * sizeof(UChar)); 00793 delete [] buffer16; 00794 buffer16 = tmp; 00795 size16 *= 2; 00796 } 00797 00798 buffer16[pos16++] = c; 00799 } 00800 00801 bool Lexer::scanRegExp() 00802 { 00803 pos16 = 0; 00804 bool lastWasEscape = false; 00805 bool inBrackets = false; 00806 00807 while (1) { 00808 if (current == '\r' || current == '\n' || current == 0) 00809 return false; 00810 else if (current != '/' || lastWasEscape == true || inBrackets == true) 00811 { 00812 // keep track of '[' and ']' 00813 if ( !lastWasEscape ) { 00814 if ( current == '[' && !inBrackets ) 00815 inBrackets = true; 00816 if ( current == ']' && inBrackets ) 00817 inBrackets = false; 00818 } 00819 record16(current); 00820 lastWasEscape = 00821 !lastWasEscape && (current == '\\'); 00822 } 00823 else { // end of regexp 00824 pattern = UString(buffer16, pos16); 00825 pos16 = 0; 00826 shift(1); 00827 break; 00828 } 00829 shift(1); 00830 } 00831 00832 while (isIdentLetter(current)) { 00833 record16(current); 00834 shift(1); 00835 } 00836 flags = UString(buffer16, pos16); 00837 00838 return true; 00839 } 00840 00841 00842 void Lexer::doneParsing() 00843 { 00844 for (unsigned i = 0; i < numIdentifiers; i++) { 00845 delete identifiers[i]; 00846 } 00847 free(identifiers); 00848 identifiers = 0; 00849 numIdentifiers = 0; 00850 identifiersCapacity = 0; 00851 00852 for (unsigned i = 0; i < numStrings; i++) { 00853 delete strings[i]; 00854 } 00855 free(strings); 00856 strings = 0; 00857 numStrings = 0; 00858 stringsCapacity = 0; 00859 } 00860 00861 const int initialCapacity = 64; 00862 const int growthFactor = 2; 00863 00864 Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos) 00865 { 00866 if (numIdentifiers == identifiersCapacity) { 00867 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor; 00868 identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity); 00869 } 00870 00871 KJS::Identifier *identifier = new KJS::Identifier(buffer, pos); 00872 identifiers[numIdentifiers++] = identifier; 00873 return identifier; 00874 } 00875 00876 UString *Lexer::makeUString(UChar *buffer, unsigned int pos) 00877 { 00878 if (numStrings == stringsCapacity) { 00879 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor; 00880 strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity); 00881 } 00882 00883 UString *string = new UString(buffer, pos); 00884 strings[numStrings++] = string; 00885 return string; 00886 }
KDE Logo
This file is part of the documentation for kjs Library Version 3.4.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Tue Apr 12 23:06:59 2005 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003