/ src / theme / languages / python.js
python.js
  1  /*! `python` grammar compiled for Highlight.js 11.10.0 */
  2    (function(){
  3      var hljsGrammar = (function () {
  4    'use strict';
  5  
  6    /*
  7    Language: Python
  8    Description: Python is an interpreted, object-oriented, high-level programming language with dynamic semantics.
  9    Website: https://www.python.org
 10    Category: common
 11    */
 12  
 13    function python(hljs) {
 14      const regex = hljs.regex;
 15      const IDENT_RE = /[\p{XID_Start}_]\p{XID_Continue}*/u;
 16      const RESERVED_WORDS = [
 17        'and',
 18        'as',
 19        'assert',
 20        'async',
 21        'await',
 22        'break',
 23        'case',
 24        'class',
 25        'continue',
 26        'def',
 27        'del',
 28        'elif',
 29        'else',
 30        'except',
 31        'finally',
 32        'for',
 33        'from',
 34        'global',
 35        'if',
 36        'import',
 37        'in',
 38        'is',
 39        'lambda',
 40        'match',
 41        'nonlocal|10',
 42        'not',
 43        'or',
 44        'pass',
 45        'raise',
 46        'return',
 47        'try',
 48        'while',
 49        'with',
 50        'yield'
 51      ];
 52  
 53      const BUILT_INS = [
 54        '__import__',
 55        'abs',
 56        'all',
 57        'any',
 58        'ascii',
 59        'bin',
 60        'bool',
 61        'breakpoint',
 62        'bytearray',
 63        'bytes',
 64        'callable',
 65        'chr',
 66        'classmethod',
 67        'compile',
 68        'complex',
 69        'delattr',
 70        'dict',
 71        'dir',
 72        'divmod',
 73        'enumerate',
 74        'eval',
 75        'exec',
 76        'filter',
 77        'float',
 78        'format',
 79        'frozenset',
 80        'getattr',
 81        'globals',
 82        'hasattr',
 83        'hash',
 84        'help',
 85        'hex',
 86        'id',
 87        'input',
 88        'int',
 89        'isinstance',
 90        'issubclass',
 91        'iter',
 92        'len',
 93        'list',
 94        'locals',
 95        'map',
 96        'max',
 97        'memoryview',
 98        'min',
 99        'next',
100        'object',
101        'oct',
102        'open',
103        'ord',
104        'pow',
105        'print',
106        'property',
107        'range',
108        'repr',
109        'reversed',
110        'round',
111        'set',
112        'setattr',
113        'slice',
114        'sorted',
115        'staticmethod',
116        'str',
117        'sum',
118        'super',
119        'tuple',
120        'type',
121        'vars',
122        'zip'
123      ];
124  
125      const LITERALS = [
126        '__debug__',
127        'Ellipsis',
128        'False',
129        'None',
130        'NotImplemented',
131        'True'
132      ];
133  
134      // https://docs.python.org/3/library/typing.html
135      // TODO: Could these be supplemented by a CamelCase matcher in certain
136      // contexts, leaving these remaining only for relevance hinting?
137      const TYPES = [
138        "Any",
139        "Callable",
140        "Coroutine",
141        "Dict",
142        "List",
143        "Literal",
144        "Generic",
145        "Optional",
146        "Sequence",
147        "Set",
148        "Tuple",
149        "Type",
150        "Union"
151      ];
152  
153      const KEYWORDS = {
154        $pattern: /[A-Za-z]\w+|__\w+__/,
155        keyword: RESERVED_WORDS,
156        built_in: BUILT_INS,
157        literal: LITERALS,
158        type: TYPES
159      };
160  
161      const PROMPT = {
162        className: 'meta',
163        begin: /^(>>>|\.\.\.) /
164      };
165  
166      const SUBST = {
167        className: 'subst',
168        begin: /\{/,
169        end: /\}/,
170        keywords: KEYWORDS,
171        illegal: /#/
172      };
173  
174      const LITERAL_BRACKET = {
175        begin: /\{\{/,
176        relevance: 0
177      };
178  
179      const STRING = {
180        className: 'string',
181        contains: [ hljs.BACKSLASH_ESCAPE ],
182        variants: [
183          {
184            begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,
185            end: /'''/,
186            contains: [
187              hljs.BACKSLASH_ESCAPE,
188              PROMPT
189            ],
190            relevance: 10
191          },
192          {
193            begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,
194            end: /"""/,
195            contains: [
196              hljs.BACKSLASH_ESCAPE,
197              PROMPT
198            ],
199            relevance: 10
200          },
201          {
202            begin: /([fF][rR]|[rR][fF]|[fF])'''/,
203            end: /'''/,
204            contains: [
205              hljs.BACKSLASH_ESCAPE,
206              PROMPT,
207              LITERAL_BRACKET,
208              SUBST
209            ]
210          },
211          {
212            begin: /([fF][rR]|[rR][fF]|[fF])"""/,
213            end: /"""/,
214            contains: [
215              hljs.BACKSLASH_ESCAPE,
216              PROMPT,
217              LITERAL_BRACKET,
218              SUBST
219            ]
220          },
221          {
222            begin: /([uU]|[rR])'/,
223            end: /'/,
224            relevance: 10
225          },
226          {
227            begin: /([uU]|[rR])"/,
228            end: /"/,
229            relevance: 10
230          },
231          {
232            begin: /([bB]|[bB][rR]|[rR][bB])'/,
233            end: /'/
234          },
235          {
236            begin: /([bB]|[bB][rR]|[rR][bB])"/,
237            end: /"/
238          },
239          {
240            begin: /([fF][rR]|[rR][fF]|[fF])'/,
241            end: /'/,
242            contains: [
243              hljs.BACKSLASH_ESCAPE,
244              LITERAL_BRACKET,
245              SUBST
246            ]
247          },
248          {
249            begin: /([fF][rR]|[rR][fF]|[fF])"/,
250            end: /"/,
251            contains: [
252              hljs.BACKSLASH_ESCAPE,
253              LITERAL_BRACKET,
254              SUBST
255            ]
256          },
257          hljs.APOS_STRING_MODE,
258          hljs.QUOTE_STRING_MODE
259        ]
260      };
261  
262      // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
263      const digitpart = '[0-9](_?[0-9])*';
264      const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
265      // Whitespace after a number (or any lexical token) is needed only if its absence
266      // would change the tokenization
267      // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
268      // We deviate slightly, requiring a word boundary or a keyword
269      // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
270      const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
271      const NUMBER = {
272        className: 'number',
273        relevance: 0,
274        variants: [
275          // exponentfloat, pointfloat
276          // https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals
277          // optionally imaginary
278          // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
279          // Note: no leading \b because floats can start with a decimal point
280          // and we don't want to mishandle e.g. `fn(.5)`,
281          // no trailing \b for pointfloat because it can end with a decimal point
282          // and we don't want to mishandle e.g. `0..hex()`; this should be safe
283          // because both MUST contain a decimal point and so cannot be confused with
284          // the interior part of an identifier
285          {
286            begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
287          },
288          {
289            begin: `(${pointfloat})[jJ]?`
290          },
291  
292          // decinteger, bininteger, octinteger, hexinteger
293          // https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals
294          // optionally "long" in Python 2
295          // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
296          // decinteger is optionally imaginary
297          // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
298          {
299            begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
300          },
301          {
302            begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
303          },
304          {
305            begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
306          },
307          {
308            begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
309          },
310  
311          // imagnumber (digitpart-based)
312          // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
313          {
314            begin: `\\b(${digitpart})[jJ](?=${lookahead})`
315          }
316        ]
317      };
318      const COMMENT_TYPE = {
319        className: "comment",
320        begin: regex.lookahead(/# type:/),
321        end: /$/,
322        keywords: KEYWORDS,
323        contains: [
324          { // prevent keywords from coloring `type`
325            begin: /# type:/
326          },
327          // comment within a datatype comment includes no keywords
328          {
329            begin: /#/,
330            end: /\b\B/,
331            endsWithParent: true
332          }
333        ]
334      };
335      const PARAMS = {
336        className: 'params',
337        variants: [
338          // Exclude params in functions without params
339          {
340            className: "",
341            begin: /\(\s*\)/,
342            skip: true
343          },
344          {
345            begin: /\(/,
346            end: /\)/,
347            excludeBegin: true,
348            excludeEnd: true,
349            keywords: KEYWORDS,
350            contains: [
351              'self',
352              PROMPT,
353              NUMBER,
354              STRING,
355              hljs.HASH_COMMENT_MODE
356            ]
357          }
358        ]
359      };
360      SUBST.contains = [
361        STRING,
362        NUMBER,
363        PROMPT
364      ];
365  
366      return {
367        name: 'Python',
368        aliases: [
369          'py',
370          'gyp',
371          'ipython'
372        ],
373        unicodeRegex: true,
374        keywords: KEYWORDS,
375        illegal: /(<\/|\?)|=>/,
376        contains: [
377          PROMPT,
378          NUMBER,
379          {
380            // very common convention
381            scope: 'variable.language',
382            match: /\bself\b/
383          },
384          {
385            // eat "if" prior to string so that it won't accidentally be
386            // labeled as an f-string
387            beginKeywords: "if",
388            relevance: 0
389          },
390          { match: /\bor\b/, scope: "keyword" },
391          STRING,
392          COMMENT_TYPE,
393          hljs.HASH_COMMENT_MODE,
394          {
395            match: [
396              /\bdef/, /\s+/,
397              IDENT_RE,
398            ],
399            scope: {
400              1: "keyword",
401              3: "title.function"
402            },
403            contains: [ PARAMS ]
404          },
405          {
406            variants: [
407              {
408                match: [
409                  /\bclass/, /\s+/,
410                  IDENT_RE, /\s*/,
411                  /\(\s*/, IDENT_RE,/\s*\)/
412                ],
413              },
414              {
415                match: [
416                  /\bclass/, /\s+/,
417                  IDENT_RE
418                ],
419              }
420            ],
421            scope: {
422              1: "keyword",
423              3: "title.class",
424              6: "title.class.inherited",
425            }
426          },
427          {
428            className: 'meta',
429            begin: /^[\t ]*@/,
430            end: /(?=#)|$/,
431            contains: [
432              NUMBER,
433              PARAMS,
434              STRING
435            ]
436          }
437        ]
438      };
439    }
440  
441    return python;
442  
443  })();
444  
445      hljs.registerLanguage('python', hljsGrammar);
446    })();