00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <antlr/config.hpp>
00012
00013 #include <map>
00014
00015 #ifdef HAS_NOT_CCTYPE_H
00016 #include <ctype.h>
00017 #else
00018 #include <cctype>
00019 #endif
00020
00021 #if ( _MSC_VER == 1200 )
00022
00023
00024 # include <stdio.h>
00025 #endif
00026
00027 #include <antlr/TokenStream.hpp>
00028 #include <antlr/RecognitionException.hpp>
00029 #include <antlr/SemanticException.hpp>
00030 #include <antlr/MismatchedCharException.hpp>
00031 #include <antlr/InputBuffer.hpp>
00032 #include <antlr/BitSet.hpp>
00033 #include <antlr/LexerSharedInputState.hpp>
00034
00035 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00036 namespace antlr {
00037 #endif
00038
00039 class ANTLR_API CharScanner;
00040
00041 ANTLR_C_USING(tolower)
00042
00043 #ifdef ANTLR_REALLY_NO_STRCASECMP
00044
00045
00046 inline int strcasecmp(const char *s1, const char *s2)
00047 {
00048 while (true)
00049 {
00050 char c1 = tolower(*s1++),
00051 c2 = tolower(*s2++);
00052 if (c1 < c2) return -1;
00053 if (c1 > c2) return 1;
00054 if (c1 == 0) return 0;
00055 }
00056 }
00057 #else
00058 #ifdef NO_STRCASECMP
00059 ANTLR_C_USING(stricmp)
00060 #else
00061 ANTLR_C_USING(strcasecmp)
00062 #endif
00063 #endif
00064
00067 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00068 private:
00069 const CharScanner* scanner;
00070 public:
00071 #ifdef NO_TEMPLATE_PARTS
00072 CharScannerLiteralsLess() {}
00073 #endif
00074 CharScannerLiteralsLess(const CharScanner* theScanner)
00075 : scanner(theScanner)
00076 {
00077 }
00078 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00079
00080
00081
00082 };
00083
00086 class ANTLR_API CharScanner : public TokenStream {
00087 protected:
00088 typedef RefToken (*factory_type)();
00089 public:
00090 CharScanner(InputBuffer& cb, bool case_sensitive );
00091 CharScanner(InputBuffer* cb, bool case_sensitive );
00092 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00093
00094 virtual ~CharScanner()
00095 {
00096 }
00097
00098 virtual int LA(unsigned int i);
00099
00100 virtual void append(char c)
00101 {
00102 if (saveConsumedInput)
00103 {
00104 size_t l = text.length();
00105
00106 if ((l%256) == 0)
00107 text.reserve(l+256);
00108
00109 text.replace(l,0,&c,1);
00110 }
00111 }
00112
00113 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00114 {
00115 if( saveConsumedInput )
00116 text += s;
00117 }
00118
00119 virtual void commit()
00120 {
00121 inputState->getInput().commit();
00122 }
00123
00127 virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
00128 {
00129 consume();
00130 consumeUntil(tokenSet);
00131 }
00132
00133 virtual void consume()
00134 {
00135 if (inputState->guessing == 0)
00136 {
00137 int c = LA(1);
00138 if (caseSensitive)
00139 {
00140 append(c);
00141 }
00142 else
00143 {
00144
00145
00146 append(inputState->getInput().LA(1));
00147 }
00148
00149
00150 if (c == '\t')
00151 tab();
00152 else
00153 inputState->column++;
00154 }
00155 inputState->getInput().consume();
00156 }
00157
00159 virtual void consumeUntil(int c)
00160 {
00161 for(;;)
00162 {
00163 int la_1 = LA(1);
00164 if( la_1 == EOF_CHAR || la_1 == c )
00165 break;
00166 consume();
00167 }
00168 }
00169
00171 virtual void consumeUntil(const BitSet& set)
00172 {
00173 for(;;)
00174 {
00175 int la_1 = LA(1);
00176 if( la_1 == EOF_CHAR || set.member(la_1) )
00177 break;
00178 consume();
00179 }
00180 }
00181
00183 virtual unsigned int mark()
00184 {
00185 return inputState->getInput().mark();
00186 }
00188 virtual void rewind(unsigned int pos)
00189 {
00190 inputState->getInput().rewind(pos);
00191 }
00192
00194 virtual void match(int c)
00195 {
00196 int la_1 = LA(1);
00197 if ( la_1 != c )
00198 throw MismatchedCharException(la_1, c, false, this);
00199 consume();
00200 }
00201
00205 virtual void match(const BitSet& b)
00206 {
00207 int la_1 = LA(1);
00208
00209 if ( !b.member(la_1) )
00210 throw MismatchedCharException( la_1, b, false, this );
00211 consume();
00212 }
00213
00217 virtual void match( const char* s )
00218 {
00219 while( *s != '\0' )
00220 {
00221
00222 int la_1 = LA(1), c = (*s++ & 0xFF);
00223
00224 if ( la_1 != c )
00225 throw MismatchedCharException(la_1, c, false, this);
00226
00227 consume();
00228 }
00229 }
00233 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00234 {
00235 size_t len = s.length();
00236
00237 for (size_t i = 0; i < len; i++)
00238 {
00239
00240 int la_1 = LA(1), c = (s[i] & 0xFF);
00241
00242 if ( la_1 != c )
00243 throw MismatchedCharException(la_1, c, false, this);
00244
00245 consume();
00246 }
00247 }
00251 virtual void matchNot(int c)
00252 {
00253 int la_1 = LA(1);
00254
00255 if ( la_1 == c )
00256 throw MismatchedCharException(la_1, c, true, this);
00257
00258 consume();
00259 }
00263 virtual void matchRange(int c1, int c2)
00264 {
00265 int la_1 = LA(1);
00266
00267 if ( la_1 < c1 || la_1 > c2 )
00268 throw MismatchedCharException(la_1, c1, c2, false, this);
00269
00270 consume();
00271 }
00272
00273 virtual bool getCaseSensitive() const
00274 {
00275 return caseSensitive;
00276 }
00277
00278 virtual void setCaseSensitive(bool t)
00279 {
00280 caseSensitive = t;
00281 }
00282
00283 virtual bool getCaseSensitiveLiterals() const=0;
00284
00286 virtual int getLine() const
00287 {
00288 return inputState->line;
00289 }
00290
00292 virtual void setLine(int l)
00293 {
00294 inputState->line = l;
00295 }
00296
00298 virtual int getColumn() const
00299 {
00300 return inputState->column;
00301 }
00303 virtual void setColumn(int c)
00304 {
00305 inputState->column = c;
00306 }
00307
00309 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00310 {
00311 return inputState->filename;
00312 }
00314 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00315 {
00316 inputState->filename = f;
00317 }
00318
00319 virtual bool getCommitToPath() const
00320 {
00321 return commitToPath;
00322 }
00323
00324 virtual void setCommitToPath(bool commit)
00325 {
00326 commitToPath = commit;
00327 }
00328
00330 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00331 {
00332 return text;
00333 }
00334
00335 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00336 {
00337 text = s;
00338 }
00339
00340 virtual void resetText()
00341 {
00342 text = "";
00343 inputState->tokenStartColumn = inputState->column;
00344 inputState->tokenStartLine = inputState->line;
00345 }
00346
00347 virtual RefToken getTokenObject() const
00348 {
00349 return _returnToken;
00350 }
00351
00355 virtual void newline()
00356 {
00357 ++inputState->line;
00358 inputState->column = 1;
00359 }
00360
00365 virtual void tab()
00366 {
00367 int c = getColumn();
00368 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;
00369 setColumn( nc );
00370 }
00372 int setTabsize( int size )
00373 {
00374 int oldsize = tabsize;
00375 tabsize = size;
00376 return oldsize;
00377 }
00379 int getTabSize() const
00380 {
00381 return tabsize;
00382 }
00383
00385 virtual void reportError(const RecognitionException& e);
00386
00388 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00389
00391 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00392
00393 virtual InputBuffer& getInputBuffer()
00394 {
00395 return inputState->getInput();
00396 }
00397
00398 virtual LexerSharedInputState getInputState()
00399 {
00400 return inputState;
00401 }
00402
00405 virtual void setInputState(LexerSharedInputState state)
00406 {
00407 inputState = state;
00408 }
00409
00411 virtual void setTokenObjectFactory(factory_type factory)
00412 {
00413 tokenFactory = factory;
00414 }
00415
00419 virtual int testLiteralsTable(int ttype) const
00420 {
00421 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00422 if (i != literals.end())
00423 ttype = (*i).second;
00424 return ttype;
00425 }
00426
00432 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00433 {
00434 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00435 if (i != literals.end())
00436 ttype = (*i).second;
00437 return ttype;
00438 }
00439
00441 virtual int toLower(int c) const
00442 {
00443
00444
00445
00446 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00447 }
00448
00464 virtual void uponEOF()
00465 {
00466 }
00467
00469 virtual void traceIndent();
00470 virtual void traceIn(const char* rname);
00471 virtual void traceOut(const char* rname);
00472
00473 #ifndef NO_STATIC_CONSTS
00474 static const int EOF_CHAR = EOF;
00475 #else
00476 enum {
00477 EOF_CHAR = EOF
00478 };
00479 #endif
00480 protected:
00481 ANTLR_USE_NAMESPACE(std)string text;
00482
00483 bool saveConsumedInput;
00484 factory_type tokenFactory;
00485 bool caseSensitive;
00486 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals;
00487
00488 RefToken _returnToken;
00489
00491 LexerSharedInputState inputState;
00492
00497 bool commitToPath;
00498
00499 int tabsize;
00500
00502 virtual RefToken makeToken(int t)
00503 {
00504 RefToken tok = tokenFactory();
00505 tok->setType(t);
00506 tok->setColumn(inputState->tokenStartColumn);
00507 tok->setLine(inputState->tokenStartLine);
00508 return tok;
00509 }
00510
00513 class Tracer {
00514 private:
00515 CharScanner* parser;
00516 const char* text;
00517
00518 Tracer(const Tracer& other);
00519 Tracer& operator=(const Tracer& other);
00520 public:
00521 Tracer( CharScanner* p,const char* t )
00522 : parser(p), text(t)
00523 {
00524 parser->traceIn(text);
00525 }
00526 ~Tracer()
00527 {
00528 parser->traceOut(text);
00529 }
00530 };
00531
00532 int traceDepth;
00533 private:
00534 CharScanner( const CharScanner& other );
00535 CharScanner& operator=( const CharScanner& other );
00536
00537 #ifndef NO_STATIC_CONSTS
00538 static const int NO_CHAR = 0;
00539 #else
00540 enum {
00541 NO_CHAR = 0
00542 };
00543 #endif
00544 };
00545
00546 inline int CharScanner::LA(unsigned int i)
00547 {
00548 int c = inputState->getInput().LA(i);
00549
00550 if ( caseSensitive )
00551 return c;
00552 else
00553 return toLower(c);
00554 }
00555
00556 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00557 {
00558 if (scanner->getCaseSensitiveLiterals())
00559 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00560 else
00561 {
00562 #ifdef NO_STRCASECMP
00563 return (stricmp(x.c_str(),y.c_str())<0);
00564 #else
00565 return (strcasecmp(x.c_str(),y.c_str())<0);
00566 #endif
00567 }
00568 }
00569
00570 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00571 }
00572 #endif
00573
00574 #endif //INC_CharScanner_hpp__