scan.js
  1  'use strict'
  2  
  3  var licenses = []
  4    .concat(require('spdx-license-ids'))
  5    .concat(require('spdx-license-ids/deprecated'))
  6  var exceptions = require('spdx-exceptions')
  7  
  8  module.exports = function (source) {
  9    var index = 0
 10  
 11    function hasMore () {
 12      return index < source.length
 13    }
 14  
 15    // `value` can be a regexp or a string.
 16    // If it is recognized, the matching source string is returned and
 17    // the index is incremented. Otherwise `undefined` is returned.
 18    function read (value) {
 19      if (value instanceof RegExp) {
 20        var chars = source.slice(index)
 21        var match = chars.match(value)
 22        if (match) {
 23          index += match[0].length
 24          return match[0]
 25        }
 26      } else {
 27        if (source.indexOf(value, index) === index) {
 28          index += value.length
 29          return value
 30        }
 31      }
 32    }
 33  
 34    function skipWhitespace () {
 35      read(/[ ]*/)
 36    }
 37  
 38    function operator () {
 39      var string
 40      var possibilities = ['WITH', 'AND', 'OR', '(', ')', ':', '+']
 41      for (var i = 0; i < possibilities.length; i++) {
 42        string = read(possibilities[i])
 43        if (string) {
 44          break
 45        }
 46      }
 47  
 48      if (string === '+' && index > 1 && source[index - 2] === ' ') {
 49        throw new Error('Space before `+`')
 50      }
 51  
 52      return string && {
 53        type: 'OPERATOR',
 54        string: string
 55      }
 56    }
 57  
 58    function idstring () {
 59      return read(/[A-Za-z0-9-.]+/)
 60    }
 61  
 62    function expectIdstring () {
 63      var string = idstring()
 64      if (!string) {
 65        throw new Error('Expected idstring at offset ' + index)
 66      }
 67      return string
 68    }
 69  
 70    function documentRef () {
 71      if (read('DocumentRef-')) {
 72        var string = expectIdstring()
 73        return { type: 'DOCUMENTREF', string: string }
 74      }
 75    }
 76  
 77    function licenseRef () {
 78      if (read('LicenseRef-')) {
 79        var string = expectIdstring()
 80        return { type: 'LICENSEREF', string: string }
 81      }
 82    }
 83  
 84    function identifier () {
 85      var begin = index
 86      var string = idstring()
 87  
 88      if (licenses.indexOf(string) !== -1) {
 89        return {
 90          type: 'LICENSE',
 91          string: string
 92        }
 93      } else if (exceptions.indexOf(string) !== -1) {
 94        return {
 95          type: 'EXCEPTION',
 96          string: string
 97        }
 98      }
 99  
100      index = begin
101    }
102  
103    // Tries to read the next token. Returns `undefined` if no token is
104    // recognized.
105    function parseToken () {
106      // Ordering matters
107      return (
108        operator() ||
109        documentRef() ||
110        licenseRef() ||
111        identifier()
112      )
113    }
114  
115    var tokens = []
116    while (hasMore()) {
117      skipWhitespace()
118      if (!hasMore()) {
119        break
120      }
121  
122      var token = parseToken()
123      if (!token) {
124        throw new Error('Unexpected `' + source[index] +
125                        '` at offset ' + index)
126      }
127  
128      tokens.push(token)
129    }
130    return tokens
131  }