l.js
1 'use strict'; 2 3 var Lexer = exports.Lexer = function() { 4 this.pos = 0; 5 this.buf = null; 6 this.buflen = 0; 7 8 // Operator table, mapping operator -> token name 9 this.optable = { 10 '+': 'PLUS', 11 '-': 'MINUS', 12 '*': 'MULTIPLY', 13 '.': 'PERIOD', 14 '\\': 'BACKSLASH', 15 ':': 'COLON', 16 '%': 'PERCENT', 17 '|': 'PIPE', 18 '!': 'EXCLAMATION', 19 '?': 'QUESTION', 20 '#': 'POUND', 21 '&': 'AMPERSAND', 22 ';': 'SEMI', 23 ',': 'COMMA', 24 '(': 'L_PAREN', 25 ')': 'R_PAREN', 26 '<': 'L_ANG', 27 '>': 'R_ANG', 28 '{': 'L_BRACE', 29 '}': 'R_BRACE', 30 '[': 'L_BRACKET', 31 ']': 'R_BRACKET', 32 '=': 'EQUALS' 33 }; 34 } 35 36 // Initialize the Lexer's buffer. This resets the lexer's internal 37 // state and subsequent tokens will be returned starting with the 38 // beginning of the new buffer. 39 Lexer.prototype.input = function(buf) { 40 this.pos = 0; 41 this.buf = buf; 42 this.buflen = buf.length; 43 } 44 45 // Get the next token from the current buffer. A token is an object with 46 // the following properties: 47 // - name: name of the pattern that this token matched (taken from rules). 48 // - value: actual string value of the token. 49 // - pos: offset in the current buffer where the token starts. 50 // 51 // If there are no more tokens in the buffer, returns null. In case of 52 // an error throws Error. 53 Lexer.prototype.token = function() { 54 this._skipnontokens(); 55 if (this.pos >= this.buflen) { 56 return null; 57 } 58 59 // The char at this.pos is part of a real token. Figure out which. 60 var c = this.buf.charAt(this.pos); 61 62 // '/' is treated specially, because it starts a comment if followed by 63 // another '/'. If not followed by another '/', it's the DIVIDE 64 // operator. 65 if (c === '/') { 66 var next_c = this.buf.charAt(this.pos + 1); 67 if (next_c === '/') { 68 return this._process_comment(); 69 } else { 70 return {name: 'DIVIDE', value: '/', pos: this.pos++}; 71 } 72 } else { 73 // Look it up in the table of operators 74 var op = this.optable[c]; 75 if (op !== undefined) { 76 return {name: op, value: c, pos: this.pos++}; 77 } else { 78 // Not an operator - so it's the beginning of another token. 79 if (Lexer._isalpha(c)) { 80 return this._process_identifier(); 81 } else if (Lexer._isdigit(c)) { 82 return this._process_number(); 83 } else if (c === '"') { 84 return this._process_quote(); 85 } else { 86 throw Error('Token error at ' + this.pos); 87 } 88 } 89 } 90 } 91 92 Lexer._isnewline = function(c) { 93 return c === '\r' || c === '\n'; 94 } 95 96 Lexer._isdigit = function(c) { 97 return c >= '0' && c <= '9'; 98 } 99 100 Lexer._isalpha = function(c) { 101 return (c >= 'a' && c <= 'z') || 102 (c >= 'A' && c <= 'Z') || 103 c === '_' || c === '$'; 104 } 105 106 Lexer._isalphanum = function(c) { 107 return (c >= 'a' && c <= 'z') || 108 (c >= 'A' && c <= 'Z') || 109 (c >= '0' && c <= '9') || 110 c === '_' || c === '$'; 111 } 112 113 Lexer.prototype._process_number = function() { 114 var endpos = this.pos + 1; 115 while (endpos < this.buflen && 116 Lexer._isdigit(this.buf.charAt(endpos))) { 117 endpos++; 118 } 119 120 var tok = { 121 name: 'NUMBER', 122 value: this.buf.substring(this.pos, endpos), 123 pos: this.pos 124 }; 125 this.pos = endpos; 126 return tok; 127 } 128 129 Lexer.prototype._process_comment = function() { 130 var endpos = this.pos + 2; 131 // Skip until the end of the line 132 var c = this.buf.charAt(this.pos + 2); 133 while (endpos < this.buflen && 134 !Lexer._isnewline(this.buf.charAt(endpos))) { 135 endpos++; 136 } 137 138 var tok = { 139 name: 'COMMENT', 140 value: this.buf.substring(this.pos, endpos), 141 pos: this.pos 142 }; 143 this.pos = endpos + 1; 144 return tok; 145 } 146 147 Lexer.prototype._process_identifier = function() { 148 var endpos = this.pos + 1; 149 while (endpos < this.buflen && 150 Lexer._isalphanum(this.buf.charAt(endpos))) { 151 endpos++; 152 } 153 154 var tok = { 155 name: 'IDENTIFIER', 156 value: this.buf.substring(this.pos, endpos), 157 pos: this.pos 158 }; 159 this.pos = endpos; 160 return tok; 161 } 162 163 Lexer.prototype._process_quote = function() { 164 // this.pos points at the opening quote. Find the ending quote. 165 var end_index = this.buf.indexOf('"', this.pos + 1); 166 167 if (end_index === -1) { 168 throw Error('Unterminated quote at ' + this.pos); 169 } else { 170 var tok = { 171 name: 'QUOTE', 172 value: this.buf.substring(this.pos, end_index + 1), 173 pos: this.pos 174 }; 175 this.pos = end_index + 1; 176 return tok; 177 } 178 } 179 180 Lexer.prototype._skipnontokens = function() { 181 while (this.pos < this.buflen) { 182 var c = this.buf.charAt(this.pos); 183 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { 184 this.pos++; 185 } else { 186 break; 187 } 188 } 189 }