lexer.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. var utils = require('./utils')
  2. /**
  3. * A lexer token.
  4. * @typedef {object} LexerToken
  5. * @property {string} match The string that was matched.
  6. * @property {number} type Lexer type enum.
  7. * @property {number} length Length of the original string processed.
  8. */
  9. /**
  10. * Enum for token types.
  11. * @readonly
  12. * @enum {number}
  13. */
  14. var TYPES = {
  15. /** Whitespace */
  16. WHITESPACE: 0,
  17. /** Plain string */
  18. STRING: 1,
  19. /** Variable filter */
  20. FILTER: 2,
  21. /** Empty variable filter */
  22. FILTEREMPTY: 3,
  23. /** Function */
  24. FUNCTION: 4,
  25. /** Function with no arguments */
  26. FUNCTIONEMPTY: 5,
  27. /** Open parenthesis */
  28. PARENOPEN: 6,
  29. /** Close parenthesis */
  30. PARENCLOSE: 7,
  31. /** Comma */
  32. COMMA: 8,
  33. /** Variable */
  34. VAR: 9,
  35. /** Number */
  36. NUMBER: 10,
  37. /** Math operator */
  38. OPERATOR: 11,
  39. /** Open square bracket */
  40. BRACKETOPEN: 12,
  41. /** Close square bracket */
  42. BRACKETCLOSE: 13,
  43. /** Key on an object using dot-notation */
  44. DOTKEY: 14,
  45. /** Start of an array */
  46. ARRAYOPEN: 15,
  47. /** End of an array
  48. * Currently unused
  49. ARRAYCLOSE: 16, */
  50. /** Open curly brace */
  51. CURLYOPEN: 17,
  52. /** Close curly brace */
  53. CURLYCLOSE: 18,
  54. /** Colon (:) */
  55. COLON: 19,
  56. /** JavaScript-valid comparator */
  57. COMPARATOR: 20,
  58. /** Boolean logic */
  59. LOGIC: 21,
  60. /** Boolean logic "not" */
  61. NOT: 22,
  62. /** true or false */
  63. BOOL: 23,
  64. /** Variable assignment */
  65. ASSIGNMENT: 24,
  66. /** Start of a method */
  67. METHODOPEN: 25,
  68. /** End of a method
  69. * Currently unused
  70. METHODEND: 26, */
  71. /** Unknown type */
  72. UNKNOWN: 100
  73. }
  74. var rules = [
  75. {
  76. type: TYPES.WHITESPACE,
  77. regex: [/^\s+/]
  78. },
  79. {
  80. type: TYPES.STRING,
  81. regex: [/^""/, /^".*?[^\\]"/, /^''/, /^'.*?[^\\]'/]
  82. },
  83. {
  84. type: TYPES.FILTER,
  85. regex: [/^\|\s*(\w+)\(/],
  86. idx: 1
  87. },
  88. {
  89. type: TYPES.FILTEREMPTY,
  90. regex: [/^\|\s*(\w+)/],
  91. idx: 1
  92. },
  93. {
  94. type: TYPES.FUNCTIONEMPTY,
  95. regex: [/^\s*(\w+)\(\)/],
  96. idx: 1
  97. },
  98. {
  99. type: TYPES.FUNCTION,
  100. regex: [/^\s*(\w+)\(/],
  101. idx: 1
  102. },
  103. {
  104. type: TYPES.PARENOPEN,
  105. regex: [/^\(/]
  106. },
  107. {
  108. type: TYPES.PARENCLOSE,
  109. regex: [/^\)/]
  110. },
  111. {
  112. type: TYPES.COMMA,
  113. regex: [/^,/]
  114. },
  115. {
  116. type: TYPES.LOGIC,
  117. regex: [/^(&&|\|\|)\s*/, /^(and|or)\s+/],
  118. idx: 1,
  119. replace: {
  120. and: '&&',
  121. or: '||'
  122. }
  123. },
  124. {
  125. type: TYPES.COMPARATOR,
  126. regex: [/^(===|==|!==|!=|<=|<|>=|>|in\s|gte\s|gt\s|lte\s|lt\s)\s*/],
  127. idx: 1,
  128. replace: {
  129. gte: '>=',
  130. gt: '>',
  131. lte: '<=',
  132. lt: '<'
  133. }
  134. },
  135. {
  136. type: TYPES.ASSIGNMENT,
  137. regex: [/^(=|\+=|-=|\*=|\/=)/]
  138. },
  139. {
  140. type: TYPES.NOT,
  141. regex: [/^!\s*/, /^not\s+/],
  142. replace: {
  143. not: '!'
  144. }
  145. },
  146. {
  147. type: TYPES.BOOL,
  148. regex: [/^(true|false)\s+/, /^(true|false)$/],
  149. idx: 1
  150. },
  151. {
  152. type: TYPES.VAR,
  153. regex: [/^[a-zA-Z_$]\w*((\.\$?\w*)+)?/, /^[a-zA-Z_$]\w*/]
  154. },
  155. {
  156. type: TYPES.BRACKETOPEN,
  157. regex: [/^\[/]
  158. },
  159. {
  160. type: TYPES.BRACKETCLOSE,
  161. regex: [/^\]/]
  162. },
  163. {
  164. type: TYPES.CURLYOPEN,
  165. regex: [/^\{/]
  166. },
  167. {
  168. type: TYPES.COLON,
  169. regex: [/^:/]
  170. },
  171. {
  172. type: TYPES.CURLYCLOSE,
  173. regex: [/^\}/]
  174. },
  175. {
  176. type: TYPES.DOTKEY,
  177. regex: [/^\.(\w+)/],
  178. idx: 1
  179. },
  180. {
  181. type: TYPES.NUMBER,
  182. regex: [/^[+-]?\d+(\.\d+)?/]
  183. },
  184. {
  185. type: TYPES.OPERATOR,
  186. regex: [/^(\+|-|\/|\*|%)/]
  187. }
  188. ]
  189. exports.types = TYPES
  190. /**
  191. * Return the token type object for a single chunk of a string.
  192. * @param {string} str String chunk.
  193. * @return {LexerToken} Defined type, potentially stripped or replaced with more suitable content.
  194. * @private
  195. */
  196. function reader (str) {
  197. var matched
  198. utils.some(rules, function (rule) {
  199. return utils.some(rule.regex, function (regex) {
  200. var match = str.match(regex)
  201. var normalized
  202. if (!match) {
  203. return
  204. }
  205. normalized = match[rule.idx || 0].replace(/\s*$/, '')
  206. normalized =
  207. rule.hasOwnProperty('replace') &&
  208. rule.replace.hasOwnProperty(normalized)
  209. ? rule.replace[normalized]
  210. : normalized
  211. matched = {
  212. match: normalized,
  213. type: rule.type,
  214. length: match[0].length
  215. }
  216. return true
  217. })
  218. })
  219. if (!matched) {
  220. matched = {
  221. match: str,
  222. type: TYPES.UNKNOWN,
  223. length: str.length
  224. }
  225. }
  226. return matched
  227. }
  228. /**
  229. * Read a string and break it into separate token types.
  230. * @param {string} str
  231. * @return {Array.LexerToken} Array of defined types, potentially stripped or replaced with more suitable content.
  232. * @private
  233. */
  234. exports.read = function (str) {
  235. var offset = 0
  236. var tokens = []
  237. var substr
  238. var match
  239. while (offset < str.length) {
  240. substr = str.substring(offset)
  241. match = reader(substr)
  242. offset += match.length
  243. tokens.push(match)
  244. }
  245. return tokens
  246. }