l.js
  1  'use strict';
  2  
  3  var Lexer = exports.Lexer = function() {
  4    this.pos = 0;
  5    this.buf = null;
  6    this.buflen = 0;
  7  
  8    // Operator table, mapping operator -> token name
  9    this.optable = {
 10      '+':  'PLUS',
 11      '-':  'MINUS',
 12      '*':  'MULTIPLY',
 13      '.':  'PERIOD',
 14      '\\': 'BACKSLASH',
 15      ':':  'COLON',
 16      '%':  'PERCENT',
 17      '|':  'PIPE',
 18      '!':  'EXCLAMATION',
 19      '?':  'QUESTION',
 20      '#':  'POUND',
 21      '&':  'AMPERSAND',
 22      ';':  'SEMI',
 23      ',':  'COMMA',
 24      '(':  'L_PAREN',
 25      ')':  'R_PAREN',
 26      '<':  'L_ANG',
 27      '>':  'R_ANG',
 28      '{':  'L_BRACE',
 29      '}':  'R_BRACE',
 30      '[':  'L_BRACKET',
 31      ']':  'R_BRACKET',
 32      '=':  'EQUALS'
 33    };
 34  }
 35  
 36  // Initialize the Lexer's buffer. This resets the lexer's internal
 37  // state and subsequent tokens will be returned starting with the
 38  // beginning of the new buffer.
 39  Lexer.prototype.input = function(buf) {
 40    this.pos = 0;
 41    this.buf = buf;
 42    this.buflen = buf.length;
 43  }
 44  
 45  // Get the next token from the current buffer. A token is an object with
 46  // the following properties:
 47  // - name: name of the pattern that this token matched (taken from rules).
 48  // - value: actual string value of the token.
 49  // - pos: offset in the current buffer where the token starts.
 50  //
 51  // If there are no more tokens in the buffer, returns null. In case of
 52  // an error throws Error.
 53  Lexer.prototype.token = function() {
 54    this._skipnontokens();
 55    if (this.pos >= this.buflen) {
 56      return null;
 57    }
 58  
 59    // The char at this.pos is part of a real token. Figure out which.
 60    var c = this.buf.charAt(this.pos);
 61  
 62    // '/' is treated specially, because it starts a comment if followed by
 63    // another '/'. If not followed by another '/', it's the DIVIDE
 64    // operator.
 65    if (c === '/') {
 66      var next_c = this.buf.charAt(this.pos + 1);
 67      if (next_c === '/') {
 68        return this._process_comment();
 69      } else {
 70        return {name: 'DIVIDE', value: '/', pos: this.pos++};
 71      }
 72    } else {
 73      // Look it up in the table of operators
 74      var op = this.optable[c];
 75      if (op !== undefined) {
 76        return {name: op, value: c, pos: this.pos++};
 77      } else {
 78        // Not an operator - so it's the beginning of another token.
 79        if (Lexer._isalpha(c)) {
 80          return this._process_identifier();
 81        } else if (Lexer._isdigit(c)) {
 82          return this._process_number();
 83        } else if (c === '"') {
 84          return this._process_quote();
 85        } else {
 86          throw Error('Token error at ' + this.pos);
 87        }
 88      }
 89    }
 90  }
 91  
 92  Lexer._isnewline = function(c) {
 93    return c === '\r' || c === '\n';
 94  }
 95  
 96  Lexer._isdigit = function(c) {
 97    return c >= '0' && c <= '9';
 98  }
 99  
100  Lexer._isalpha = function(c) {
101    return (c >= 'a' && c <= 'z') ||
102           (c >= 'A' && c <= 'Z') ||
103           c === '_' || c === '$';
104  }
105  
106  Lexer._isalphanum = function(c) {
107    return (c >= 'a' && c <= 'z') ||
108           (c >= 'A' && c <= 'Z') ||
109           (c >= '0' && c <= '9') ||
110           c === '_' || c === '$';
111  }
112  
113  Lexer.prototype._process_number = function() {
114    var endpos = this.pos + 1;
115    while (endpos < this.buflen &&
116           Lexer._isdigit(this.buf.charAt(endpos))) {
117      endpos++;
118    }
119  
120    var tok = {
121      name: 'NUMBER',
122      value: this.buf.substring(this.pos, endpos),
123      pos: this.pos
124    };
125    this.pos = endpos;
126    return tok;
127  }
128  
129  Lexer.prototype._process_comment = function() {
130    var endpos = this.pos + 2;
131    // Skip until the end of the line
132    var c = this.buf.charAt(this.pos + 2);
133    while (endpos < this.buflen &&
134           !Lexer._isnewline(this.buf.charAt(endpos))) {
135      endpos++;
136    }
137  
138    var tok = {
139      name: 'COMMENT',
140      value: this.buf.substring(this.pos, endpos),
141      pos: this.pos
142    };
143    this.pos = endpos + 1;
144    return tok;
145  }
146  
147  Lexer.prototype._process_identifier = function() {
148    var endpos = this.pos + 1;
149    while (endpos < this.buflen &&
150           Lexer._isalphanum(this.buf.charAt(endpos))) {
151      endpos++;
152    }
153  
154    var tok = {
155      name: 'IDENTIFIER',
156      value: this.buf.substring(this.pos, endpos),
157      pos: this.pos
158    };
159    this.pos = endpos;
160    return tok;
161  }
162  
163  Lexer.prototype._process_quote = function() {
164    // this.pos points at the opening quote. Find the ending quote.
165    var end_index = this.buf.indexOf('"', this.pos + 1);
166  
167    if (end_index === -1) {
168      throw Error('Unterminated quote at ' + this.pos);
169    } else {
170      var tok = {
171        name: 'QUOTE',
172        value: this.buf.substring(this.pos, end_index + 1),
173        pos: this.pos
174      };
175      this.pos = end_index + 1;
176      return tok;
177    }
178  }
179  
180  Lexer.prototype._skipnontokens = function() {
181    while (this.pos < this.buflen) {
182      var c = this.buf.charAt(this.pos);
183      if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
184        this.pos++;
185      } else {
186        break;
187      }
188    }
189  }