neo/idlib/Lexer.h

   1 /*
   2 ===========================================================================
   3
   4 Doom 3 GPL Source Code
   5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
   6
   7 This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
   8
   9 Doom 3 Source Code is free software: you can redistribute it and/or modify
  10 it under the terms of the GNU General Public License as published by
  11 the Free Software Foundation, either version 3 of the License, or
  12 (at your option) any later version.
  13
  14 Doom 3 Source Code is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with Doom 3 Source Code.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code.  If not, please request a copy in writing from id Software at the address below.
  23
  24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  25
  26 ===========================================================================
  27 */
  28
  29 #ifndef __LEXER_H__
  30 #define __LEXER_H__
  31
  32 /*
  33 ===============================================================================
  34
  35         Lexicographical parser
  36
  37         Does not use memory allocation during parsing. The lexer uses no
  38         memory allocation if a source is loaded with LoadMemory().
  39         However, idToken may still allocate memory for large strings.
  40
  41         A number directly following the escape character '\' in a string is
  42         assumed to be in decimal format instead of octal. Binary numbers of
  43         the form 0b.. or 0B.. can also be used.
  44
  45 ===============================================================================
  46 */
  47
  48 // lexer flags
  49 typedef enum {
  50         LEXFL_NOERRORS                                          = BIT(0),       // don't print any errors
  51         LEXFL_NOWARNINGS                                        = BIT(1),       // don't print any warnings
  52         LEXFL_NOFATALERRORS                                     = BIT(2),       // errors aren't fatal
  53         LEXFL_NOSTRINGCONCAT                            = BIT(3),       // multiple strings seperated by whitespaces are not concatenated
  54         LEXFL_NOSTRINGESCAPECHARS                       = BIT(4),       // no escape characters inside strings
  55         LEXFL_NODOLLARPRECOMPILE                        = BIT(5),       // don't use the $ sign for precompilation
  56         LEXFL_NOBASEINCLUDES                            = BIT(6),       // don't include files embraced with < >
  57         LEXFL_ALLOWPATHNAMES                            = BIT(7),       // allow path seperators in names
  58         LEXFL_ALLOWNUMBERNAMES                          = BIT(8),       // allow names to start with a number
  59         LEXFL_ALLOWIPADDRESSES                          = BIT(9),       // allow ip addresses to be parsed as numbers
  60         LEXFL_ALLOWFLOATEXCEPTIONS                      = BIT(10),      // allow float exceptions like 1.#INF or 1.#IND to be parsed
  61         LEXFL_ALLOWMULTICHARLITERALS            = BIT(11),      // allow multi character literals
  62         LEXFL_ALLOWBACKSLASHSTRINGCONCAT        = BIT(12),      // allow multiple strings seperated by '\' to be concatenated
  63         LEXFL_ONLYSTRINGS                                       = BIT(13)       // parse as whitespace deliminated strings (quoted strings keep quotes)
  64 } lexerFlags_t;
  65
  66 // punctuation ids
  67 #define P_RSHIFT_ASSIGN                         1
  68 #define P_LSHIFT_ASSIGN                         2
  69 #define P_PARMS                                         3
  70 #define P_PRECOMPMERGE                          4
  71
  72 #define P_LOGIC_AND                                     5
  73 #define P_LOGIC_OR                                      6
  74 #define P_LOGIC_GEQ                                     7
  75 #define P_LOGIC_LEQ                                     8
  76 #define P_LOGIC_EQ                                      9
  77 #define P_LOGIC_UNEQ                            10
  78
  79 #define P_MUL_ASSIGN                            11
  80 #define P_DIV_ASSIGN                            12
  81 #define P_MOD_ASSIGN                            13
  82 #define P_ADD_ASSIGN                            14
  83 #define P_SUB_ASSIGN                            15
  84 #define P_INC                                           16
  85 #define P_DEC                                           17
  86
  87 #define P_BIN_AND_ASSIGN                        18
  88 #define P_BIN_OR_ASSIGN                         19
  89 #define P_BIN_XOR_ASSIGN                        20
  90 #define P_RSHIFT                                        21
  91 #define P_LSHIFT                                        22
  92
  93 #define P_POINTERREF                            23
  94 #define P_CPP1                                          24
  95 #define P_CPP2                                          25
  96 #define P_MUL                                           26
  97 #define P_DIV                                           27
  98 #define P_MOD                                           28
  99 #define P_ADD                                           29
 100 #define P_SUB                                           30
 101 #define P_ASSIGN                                        31
 102
 103 #define P_BIN_AND                                       32
 104 #define P_BIN_OR                                        33
 105 #define P_BIN_XOR                                       34
 106 #define P_BIN_NOT                                       35
 107
 108 #define P_LOGIC_NOT                                     36
 109 #define P_LOGIC_GREATER                         37
 110 #define P_LOGIC_LESS                            38
 111
 112 #define P_REF                                           39
 113 #define P_COMMA                                         40
 114 #define P_SEMICOLON                                     41
 115 #define P_COLON                                         42
 116 #define P_QUESTIONMARK                          43
 117
 118 #define P_PARENTHESESOPEN                       44
 119 #define P_PARENTHESESCLOSE                      45
 120 #define P_BRACEOPEN                                     46
 121 #define P_BRACECLOSE                            47
 122 #define P_SQBRACKETOPEN                         48
 123 #define P_SQBRACKETCLOSE                        49
 124 #define P_BACKSLASH                                     50
 125
 126 #define P_PRECOMP                                       51
 127 #define P_DOLLAR                                        52
 128
 129 // punctuation
 130 typedef struct punctuation_s
 131 {
 132         char *p;                                                // punctuation character(s)
 133         int n;                                                  // punctuation id
 134 } punctuation_t;
 135
 136
 137 class idLexer {
 138
 139         friend class idParser;
 140
 141 public:
 142                                         // constructor
 143                                         idLexer();
 144                                         idLexer( int flags );
 145                                         idLexer( const char *filename, int flags = 0, bool OSPath = false );
 146                                         idLexer( const char *ptr, int length, const char *name, int flags = 0 );
 147                                         // destructor
 148                                         ~idLexer();
 149                                         // load a script from the given file at the given offset with the given length
 150         int                             LoadFile( const char *filename, bool OSPath = false );
 151                                         // load a script from the given memory with the given length and a specified line offset,
 152                                         // so source strings extracted from a file can still refer to proper line numbers in the file
 153                                         // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0'
 154         int                             LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 );
 155                                         // free the script
 156         void                    FreeSource( void );
 157                                         // returns true if a script is loaded
 158         int                             IsLoaded( void ) { return idLexer::loaded; };
 159                                         // read a token
 160         int                             ReadToken( idToken *token );
 161                                         // expect a certain token, reads the token when available
 162         int                             ExpectTokenString( const char *string );
 163                                         // expect a certain token type
 164         int                             ExpectTokenType( int type, int subtype, idToken *token );
 165                                         // expect a token
 166         int                             ExpectAnyToken( idToken *token );
 167                                         // returns true when the token is available
 168         int                             CheckTokenString( const char *string );
 169                                         // returns true an reads the token when a token with the given type is available
 170         int                             CheckTokenType( int type, int subtype, idToken *token );
 171                                         // returns true if the next token equals the given string but does not remove the token from the source
 172         int                             PeekTokenString( const char *string );
 173                                         // returns true if the next token equals the given type but does not remove the token from the source
 174         int                             PeekTokenType( int type, int subtype, idToken *token );
 175                                         // skip tokens until the given token string is read
 176         int                             SkipUntilString( const char *string );
 177                                         // skip the rest of the current line
 178         int                             SkipRestOfLine( void );
 179                                         // skip the braced section
 180         int                             SkipBracedSection( bool parseFirstBrace = true );
 181                                         // unread the given token
 182         void                    UnreadToken( const idToken *token );
 183                                         // read a token only if on the same line
 184         int                             ReadTokenOnLine( idToken *token );
 185
 186                                         //Returns the rest of the current line
 187         const char*             ReadRestOfLine(idStr& out);
 188
 189                                         // read a signed integer
 190         int                             ParseInt( void );
 191                                         // read a boolean
 192         bool                    ParseBool( void );
 193                                         // read a floating point number.  If errorFlag is NULL, a non-numeric token will
 194                                         // issue an Error().  If it isn't NULL, it will issue a Warning() and set *errorFlag = true
 195         float                   ParseFloat( bool *errorFlag = NULL );
 196                                         // parse matrices with floats
 197         int                             Parse1DMatrix( int x, float *m );
 198         int                             Parse2DMatrix( int y, int x, float *m );
 199         int                             Parse3DMatrix( int z, int y, int x, float *m );
 200                                         // parse a braced section into a string
 201         const char *    ParseBracedSection( idStr &out );
 202                                         // parse a braced section into a string, maintaining indents and newlines
 203         const char *    ParseBracedSectionExact ( idStr &out, int tabs = -1 );
 204                                         // parse the rest of the line
 205         const char *    ParseRestOfLine( idStr &out );
 206                                         // retrieves the white space characters before the last read token
 207         int                             GetLastWhiteSpace( idStr &whiteSpace ) const;
 208                                         // returns start index into text buffer of last white space
 209         int                             GetLastWhiteSpaceStart( void ) const;
 210                                         // returns end index into text buffer of last white space
 211         int                             GetLastWhiteSpaceEnd( void ) const;
 212                                         // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example
 213         void                    SetPunctuations( const punctuation_t *p );
 214                                         // returns a pointer to the punctuation with the given id
 215         const char *    GetPunctuationFromId( int id );
 216                                         // get the id for the given punctuation
 217         int                             GetPunctuationId( const char *p );
 218                                         // set lexer flags
 219         void                    SetFlags( int flags );
 220                                         // get lexer flags
 221         int                             GetFlags( void );
 222                                         // reset the lexer
 223         void                    Reset( void );
 224                                         // returns true if at the end of the file
 225         int                             EndOfFile( void );
 226                                         // returns the current filename
 227         const char *    GetFileName( void );
 228                                         // get offset in script
 229         const int               GetFileOffset( void );
 230                                         // get file time
 231         const ID_TIME_T GetFileTime( void );
 232                                         // returns the current line number
 233         const int               GetLineNum( void );
 234                                         // print an error message
 235         void                    Error( const char *str, ... ) id_attribute((format(printf,2,3)));
 236                                         // print a warning message
 237         void                    Warning( const char *str, ... ) id_attribute((format(printf,2,3)));
 238                                         // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set
 239         bool                    HadError( void ) const;
 240
 241                                         // set the base folder to load files from
 242         static void             SetBaseFolder( const char *path );
 243
 244 private:
 245         int                             loaded;                                 // set when a script file is loaded from file or memory
 246         idStr                   filename;                               // file name of the script
 247         int                             allocated;                              // true if buffer memory was allocated
 248         const char *    buffer;                                 // buffer containing the script
 249         const char *    script_p;                               // current pointer in the script
 250         const char *    end_p;                                  // pointer to the end of the script
 251         const char *    lastScript_p;                   // script pointer before reading token
 252         const char *    whiteSpaceStart_p;              // start of last white space
 253         const char *    whiteSpaceEnd_p;                // end of last white space
 254         ID_TIME_T                       fileTime;                               // file time
 255         int                             length;                                 // length of the script in bytes
 256         int                             line;                                   // current line in script
 257         int                             lastline;                               // line before reading token
 258         int                             tokenavailable;                 // set by unreadToken
 259         int                             flags;                                  // several script flags
 260         const punctuation_t *punctuations;              // the punctuations used in the script
 261         int *                   punctuationtable;               // ASCII table with punctuations
 262         int *                   nextpunctuation;                // next punctuation in chain
 263         idToken                 token;                                  // available token
 264         idLexer *               next;                                   // next script in a chain
 265         bool                    hadError;                               // set by idLexer::Error, even if the error is supressed
 266
 267         static char             baseFolder[ 256 ];              // base folder to load files from
 268
 269 private:
 270         void                    CreatePunctuationTable( const punctuation_t *punctuations );
 271         int                             ReadWhiteSpace( void );
 272         int                             ReadEscapeCharacter( char *ch );
 273         int                             ReadString( idToken *token, int quote );
 274         int                             ReadName( idToken *token );
 275         int                             ReadNumber( idToken *token );
 276         int                             ReadPunctuation( idToken *token );
 277         int                             ReadPrimitive( idToken *token );
 278         int                             CheckString( const char *str ) const;
 279         int                             NumLinesCrossed( void );
 280 };
 281
 282 ID_INLINE const char *idLexer::GetFileName( void ) {
 283         return idLexer::filename;
 284 }
 285
 286 ID_INLINE const int idLexer::GetFileOffset( void ) {
 287         return idLexer::script_p - idLexer::buffer;
 288 }
 289
 290 ID_INLINE const ID_TIME_T idLexer::GetFileTime( void ) {
 291         return idLexer::fileTime;
 292 }
 293
 294 ID_INLINE const int idLexer::GetLineNum( void ) {
 295         return idLexer::line;
 296 }
 297
 298 ID_INLINE void idLexer::SetFlags( int flags ) {
 299         idLexer::flags = flags;
 300 }
 301
 302 ID_INLINE int idLexer::GetFlags( void ) {
 303         return idLexer::flags;
 304 }
 305
 306 #endif /* !__LEXER_H__ */
 307