/ basant / build / mistune.py
mistune.py
   1  # coding: utf-8
   2  """
   3      mistune
   4      ~~~~~~~
   5      The fastest markdown parser in pure Python with renderer feature.
   6      :copyright: (c) 2014 - 2018 by Hsiaoming Yang.
   7  """
   8  
   9  import re
  10  import inspect
  11  
  12  __version__ = '0.8.4'
  13  __author__ = 'Hsiaoming Yang <me@lepture.com>'
  14  __all__ = [
  15      'BlockGrammar', 'BlockLexer',
  16      'InlineGrammar', 'InlineLexer',
  17      'Renderer', 'Markdown',
  18      'markdown', 'escape',
  19  ]
  20  
  21  
  22  _key_pattern = re.compile(r'\s+')
  23  _nonalpha_pattern = re.compile(r'\W')
  24  _escape_pattern = re.compile(r'&(?!#?\w+;)')
  25  _newline_pattern = re.compile(r'\r\n|\r')
  26  _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
  27  _block_code_leading_pattern = re.compile(r'^ {4}', re.M)
  28  _inline_tags = [
  29      'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
  30      'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
  31      'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
  32      'img', 'font',
  33  ]
  34  _pre_tags = ['pre', 'script', 'style']
  35  _valid_end = r'(?!:/|[^\w\s@]*@)\b'
  36  _valid_attr = r'''\s*[a-zA-Z\-](?:\s*\=\s*(?:"[^"]*"|'[^']*'|[^\s'">]+))?'''
  37  _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
  38  _scheme_blacklist = ('javascript:', 'vbscript:')
  39  
  40  
  41  def _pure_pattern(regex):
  42      pattern = regex.pattern
  43      if pattern.startswith('^'):
  44          pattern = pattern[1:]
  45      return pattern
  46  
  47  
  48  def _keyify(key):
  49      key = escape(key.lower(), quote=True)
  50      return _key_pattern.sub(' ', key)
  51  
  52  
  53  def escape(text, quote=False, smart_amp=True):
  54      """Replace special characters "&", "<" and ">" to HTML-safe sequences.
  55      The original cgi.escape will always escape "&", but you can control
  56      this one for a smart escape amp.
  57      :param quote: if set to True, " and ' will be escaped.
  58      :param smart_amp: if set to False, & will always be escaped.
  59      """
  60      if smart_amp:
  61          text = _escape_pattern.sub('&amp;', text)
  62      else:
  63          text = text.replace('&', '&amp;')
  64      text = text.replace('<', '&lt;')
  65      text = text.replace('>', '&gt;')
  66      if quote:
  67          text = text.replace('"', '&quot;')
  68          text = text.replace("'", '&#39;')
  69      return text
  70  
  71  
  72  def escape_link(url):
  73      """Remove dangerous URL schemes like javascript: and escape afterwards."""
  74      lower_url = url.lower().strip('\x00\x1a \n\r\t')
  75  
  76      for scheme in _scheme_blacklist:
  77          if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme):
  78              return ''
  79      return escape(url, quote=True, smart_amp=False)
  80  
  81  
  82  def preprocessing(text, tab=4):
  83      text = _newline_pattern.sub('\n', text)
  84      text = text.expandtabs(tab)
  85      text = text.replace('\u2424', '\n')
  86      pattern = re.compile(r'^ +$', re.M)
  87      return pattern.sub('', text)
  88  
  89  
  90  class BlockGrammar(object):
  91      """Grammars for block level tokens."""
  92  
  93      def_links = re.compile(
  94          r'^ *\[([^^\]]+)\]: *'  # [key]:
  95          r'<?([^\s>]+)>?'  # <link> or link
  96          r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
  97      )
  98      def_footnotes = re.compile(
  99          r'^\[\^([^\]]+)\]: *('
 100          r'[^\n]*(?:\n+|$)'  # [^key]:
 101          r'(?: {1,}[^\n]*(?:\n+|$))*'
 102          r')'
 103      )
 104  
 105      newline = re.compile(r'^\n+')
 106      block_code = re.compile(r'^( {4}[^\n]+\n*)+')
 107      fences = re.compile(
 108          r'^ *(`{3,}|~{3,}) *([^`\s]+)? *\n'  # ```lang
 109          r'([\s\S]+?)\s*'
 110          r'\1 *(?:\n+|$)'  # ```
 111      )
 112      hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
 113      heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
 114      lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
 115      block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
 116      list_block = re.compile(
 117          r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?'
 118          r'(?:'
 119          r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))'  # hrule
 120          r'|\n+(?=%s)'  # def links
 121          r'|\n+(?=%s)'  # def footnotes\
 122          r'|\n+(?=\1(?(3)\d+\.|[*+-]) )'   # heterogeneous bullet
 123          r'|\n{2,}'
 124          r'(?! )'
 125          r'(?!\1(?:[*+-]|\d+\.) )\n*'
 126          r'|'
 127          r'\s*$)' % (
 128              _pure_pattern(def_links),
 129              _pure_pattern(def_footnotes),
 130          )
 131      )
 132      list_item = re.compile(
 133          r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
 134          r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
 135          flags=re.M
 136      )
 137      list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
 138      paragraph = re.compile(
 139          r'^((?:[^\n]+\n?(?!'
 140          r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
 141          r'))+)\n*' % (
 142              _pure_pattern(fences).replace(r'\1', r'\2'),
 143              _pure_pattern(list_block).replace(r'\1', r'\3'),
 144              _pure_pattern(hrule),
 145              _pure_pattern(heading),
 146              _pure_pattern(lheading),
 147              _pure_pattern(block_quote),
 148              _pure_pattern(def_links),
 149              _pure_pattern(def_footnotes),
 150              '<' + _block_tag,
 151          )
 152      )
 153      block_html = re.compile(
 154          r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
 155              r'<!--[\s\S]*?-->',
 156              r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
 157              r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
 158          )
 159      )
 160      table = re.compile(
 161          r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
 162      )
 163      nptable = re.compile(
 164          r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
 165      )
 166      text = re.compile(r'^[^\n]+')
 167  
 168  
 169  class BlockLexer(object):
 170      """Block level lexer for block grammars."""
 171      grammar_class = BlockGrammar
 172  
 173      default_rules = [
 174          'newline', 'hrule', 'block_code', 'fences', 'heading',
 175          'nptable', 'lheading', 'block_quote',
 176          'list_block', 'block_html', 'def_links',
 177          'def_footnotes', 'table', 'paragraph', 'text'
 178      ]
 179  
 180      list_rules = (
 181          'newline', 'block_code', 'fences', 'lheading', 'hrule',
 182          'block_quote', 'list_block', 'block_html', 'text',
 183      )
 184  
 185      footnote_rules = (
 186          'newline', 'block_code', 'fences', 'heading',
 187          'nptable', 'lheading', 'hrule', 'block_quote',
 188          'list_block', 'block_html', 'table', 'paragraph', 'text'
 189      )
 190  
 191      def __init__(self, rules=None, **kwargs):
 192          self.tokens = []
 193          self.def_links = {}
 194          self.def_footnotes = {}
 195  
 196          if not rules:
 197              rules = self.grammar_class()
 198  
 199          self.rules = rules
 200          self._max_recursive_depth = kwargs.get('max_recursive_depth', 6)
 201          self._list_depth = 0
 202          self._blockquote_depth = 0
 203  
 204      def __call__(self, text, rules=None):
 205          return self.parse(text, rules)
 206  
 207      def parse(self, text, rules=None):
 208          text = text.rstrip('\n')
 209  
 210          if not rules:
 211              rules = self.default_rules
 212  
 213          def manipulate(text):
 214              for key in rules:
 215                  rule = getattr(self.rules, key)
 216                  m = rule.match(text)
 217                  if not m:
 218                      continue
 219                  getattr(self, 'parse_%s' % key)(m)
 220                  return m
 221              return False  # pragma: no cover
 222  
 223          while text:
 224              m = manipulate(text)
 225              if m is not False:
 226                  text = text[len(m.group(0)):]
 227                  continue
 228              if text:  # pragma: no cover
 229                  raise RuntimeError('Infinite loop at: %s' % text)
 230          return self.tokens
 231  
 232      def parse_newline(self, m):
 233          length = len(m.group(0))
 234          if length > 1:
 235              self.tokens.append({'type': 'newline'})
 236  
 237      def parse_block_code(self, m):
 238          # clean leading whitespace
 239          code = _block_code_leading_pattern.sub('', m.group(0))
 240          self.tokens.append({
 241              'type': 'code',
 242              'lang': None,
 243              'text': code,
 244          })
 245  
 246      def parse_fences(self, m):
 247          self.tokens.append({
 248              'type': 'code',
 249              'lang': m.group(2),
 250              'text': m.group(3),
 251          })
 252  
 253      def parse_heading(self, m):
 254          self.tokens.append({
 255              'type': 'heading',
 256              'level': len(m.group(1)),
 257              'text': m.group(2),
 258          })
 259  
 260      def parse_lheading(self, m):
 261          """Parse setext heading."""
 262          self.tokens.append({
 263              'type': 'heading',
 264              'level': 1 if m.group(2) == '=' else 2,
 265              'text': m.group(1),
 266          })
 267  
 268      def parse_hrule(self, m):
 269          self.tokens.append({'type': 'hrule'})
 270  
 271      def parse_list_block(self, m):
 272          bull = m.group(2)
 273          self.tokens.append({
 274              'type': 'list_start',
 275              'ordered': '.' in bull,
 276          })
 277          self._list_depth += 1
 278          if self._list_depth > self._max_recursive_depth:
 279              self.tokens.append({'type': 'list_item_start'})
 280              self.parse_text(m)
 281              self.tokens.append({'type': 'list_item_end'})
 282          else:
 283              cap = m.group(0)
 284              self._process_list_item(cap, bull)
 285          self.tokens.append({'type': 'list_end'})
 286          self._list_depth -= 1
 287  
 288      def _process_list_item(self, cap, bull):
 289          cap = self.rules.list_item.findall(cap)
 290  
 291          _next = False
 292          length = len(cap)
 293  
 294          for i in range(length):
 295              item = cap[i][0]
 296  
 297              # remove the bullet
 298              space = len(item)
 299              item = self.rules.list_bullet.sub('', item)
 300  
 301              # outdent
 302              if '\n ' in item:
 303                  space = space - len(item)
 304                  pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
 305                  item = pattern.sub('', item)
 306  
 307              # determine whether item is loose or not
 308              loose = _next
 309              if not loose and re.search(r'\n\n(?!\s*$)', item):
 310                  loose = True
 311  
 312              rest = len(item)
 313              if i != length - 1 and rest:
 314                  _next = item[rest-1] == '\n'
 315                  if not loose:
 316                      loose = _next
 317  
 318              if loose:
 319                  t = 'loose_item_start'
 320              else:
 321                  t = 'list_item_start'
 322  
 323              self.tokens.append({'type': t})
 324              # recurse
 325              self.parse(item, self.list_rules)
 326              self.tokens.append({'type': 'list_item_end'})
 327  
 328      def parse_block_quote(self, m):
 329          self.tokens.append({'type': 'block_quote_start'})
 330          self._blockquote_depth += 1
 331          if self._blockquote_depth > self._max_recursive_depth:
 332              self.parse_text(m)
 333          else:
 334              # clean leading >
 335              cap = _block_quote_leading_pattern.sub('', m.group(0))
 336              self.parse(cap)
 337          self.tokens.append({'type': 'block_quote_end'})
 338          self._blockquote_depth -= 1
 339  
 340      def parse_def_links(self, m):
 341          key = _keyify(m.group(1))
 342          self.def_links[key] = {
 343              'link': m.group(2),
 344              'title': m.group(3),
 345          }
 346  
 347      def parse_def_footnotes(self, m):
 348          key = _keyify(m.group(1))
 349          if key in self.def_footnotes:
 350              # footnote is already defined
 351              return
 352  
 353          self.def_footnotes[key] = 0
 354  
 355          self.tokens.append({
 356              'type': 'footnote_start',
 357              'key': key,
 358          })
 359  
 360          text = m.group(2)
 361  
 362          if '\n' in text:
 363              lines = text.split('\n')
 364              whitespace = None
 365              for line in lines[1:]:
 366                  space = len(line) - len(line.lstrip())
 367                  if space and (not whitespace or space < whitespace):
 368                      whitespace = space
 369              newlines = [lines[0]]
 370              for line in lines[1:]:
 371                  newlines.append(line[whitespace:])
 372              text = '\n'.join(newlines)
 373  
 374          self.parse(text, self.footnote_rules)
 375  
 376          self.tokens.append({
 377              'type': 'footnote_end',
 378              'key': key,
 379          })
 380  
 381      def parse_table(self, m):
 382          item = self._process_table(m)
 383  
 384          cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
 385          cells = cells.split('\n')
 386          for i, v in enumerate(cells):
 387              v = re.sub(r'^ *\| *| *\| *$', '', v)
 388              cells[i] = re.split(r' *(?<!\\)\| *', v)
 389  
 390          item['cells'] = self._process_cells(cells)
 391          self.tokens.append(item)
 392  
 393      def parse_nptable(self, m):
 394          item = self._process_table(m)
 395  
 396          cells = re.sub(r'\n$', '', m.group(3))
 397          cells = cells.split('\n')
 398          for i, v in enumerate(cells):
 399              cells[i] = re.split(r' *(?<!\\)\| *', v)
 400  
 401          item['cells'] = self._process_cells(cells)
 402          self.tokens.append(item)
 403  
 404      def _process_table(self, m):
 405          header = re.sub(r'^ *| *\| *$', '', m.group(1))
 406          header = re.split(r' *\| *', header)
 407          align = re.sub(r' *|\| *$', '', m.group(2))
 408          align = re.split(r' *\| *', align)
 409  
 410          for i, v in enumerate(align):
 411              if re.search(r'^ *-+: *$', v):
 412                  align[i] = 'right'
 413              elif re.search(r'^ *:-+: *$', v):
 414                  align[i] = 'center'
 415              elif re.search(r'^ *:-+ *$', v):
 416                  align[i] = 'left'
 417              else:
 418                  align[i] = None
 419  
 420          item = {
 421              'type': 'table',
 422              'header': header,
 423              'align': align,
 424          }
 425          return item
 426  
 427      def _process_cells(self, cells):
 428          for i, line in enumerate(cells):
 429              for c, cell in enumerate(line):
 430                  # de-escape any pipe inside the cell here
 431                  cells[i][c] = re.sub('\\\\\|', '|', cell)
 432  
 433          return cells
 434  
 435      def parse_block_html(self, m):
 436          tag = m.group(1)
 437          if not tag:
 438              text = m.group(0)
 439              self.tokens.append({
 440                  'type': 'close_html',
 441                  'text': text
 442              })
 443          else:
 444              attr = m.group(2)
 445              text = m.group(3)
 446              self.tokens.append({
 447                  'type': 'open_html',
 448                  'tag': tag,
 449                  'extra': attr,
 450                  'text': text
 451              })
 452  
 453      def parse_paragraph(self, m):
 454          text = m.group(1).rstrip('\n')
 455          self.tokens.append({'type': 'paragraph', 'text': text})
 456  
 457      def parse_text(self, m):
 458          text = m.group(0)
 459          self.tokens.append({'type': 'text', 'text': text})
 460  
 461  
 462  class InlineGrammar(object):
 463      """Grammars for inline level tokens."""
 464  
 465      escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])')  # \* \+ \! ....
 466      inline_html = re.compile(
 467          r'^(?:%s|%s|%s)' % (
 468              r'<!--[\s\S]*?-->',
 469              r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % (
 470                  _valid_end, _valid_attr),
 471              r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr),
 472          )
 473      )
 474      autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
 475      link = re.compile(
 476          r'^!?\[('
 477          r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
 478          r')\]\('
 479          r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
 480          r'\)'
 481      )
 482      reflink = re.compile(
 483          r'^!?\[('
 484          r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
 485          r')\]\s*\[([^^\]]*)\]'
 486      )
 487      nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
 488      url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
 489      double_emphasis = re.compile(
 490          r'^_{2}([\s\S]+?)_{2}(?!_)'  # __word__
 491          r'|'
 492          r'^\*{2}([\s\S]+?)\*{2}(?!\*)'  # **word**
 493      )
 494      emphasis = re.compile(
 495          r'^\b_((?:__|[^_])+?)_\b'  # _word_
 496          r'|'
 497          r'^\*((?:\*\*|[^\*])+?)\*(?!\*)'  # *word*
 498      )
 499      code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)')  # `code`
 500      linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
 501      strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~')  # ~~word~~
 502      footnote = re.compile(r'^\[\^([^\]]+)\]')
 503      text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
 504  
 505      def hard_wrap(self):
 506          """Grammar for hard wrap linebreak. You don't need to add two
 507          spaces at the end of a line.
 508          """
 509          self.linebreak = re.compile(r'^ *\n(?!\s*$)')
 510          self.text = re.compile(
 511              r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
 512          )
 513  
 514  
 515  class InlineLexer(object):
 516      """Inline level lexer for inline grammars."""
 517      grammar_class = InlineGrammar
 518  
 519      default_rules = [
 520          'escape', 'inline_html', 'autolink', 'url',
 521          'footnote', 'link', 'reflink', 'nolink',
 522          'double_emphasis', 'emphasis', 'code',
 523          'linebreak', 'strikethrough', 'text',
 524      ]
 525      inline_html_rules = [
 526          'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink',
 527          'nolink', 'double_emphasis', 'emphasis', 'code',
 528          'linebreak', 'strikethrough', 'text',
 529      ]
 530  
 531      def __init__(self, renderer, rules=None, **kwargs):
 532          self.renderer = renderer
 533          self.links = {}
 534          self.footnotes = {}
 535          self.footnote_index = 0
 536  
 537          if not rules:
 538              rules = self.grammar_class()
 539  
 540          kwargs.update(self.renderer.options)
 541          if kwargs.get('hard_wrap'):
 542              rules.hard_wrap()
 543  
 544          self.rules = rules
 545  
 546          self._in_link = False
 547          self._in_footnote = False
 548          self._parse_inline_html = kwargs.get('parse_inline_html')
 549  
 550      def __call__(self, text, rules=None):
 551          return self.output(text, rules)
 552  
 553      def setup(self, links, footnotes):
 554          self.footnote_index = 0
 555          self.links = links or {}
 556          self.footnotes = footnotes or {}
 557  
 558      def output(self, text, rules=None):
 559          text = text.rstrip('\n')
 560          if not rules:
 561              rules = list(self.default_rules)
 562  
 563          if self._in_footnote and 'footnote' in rules:
 564              rules.remove('footnote')
 565  
 566          output = self.renderer.placeholder()
 567  
 568          def manipulate(text):
 569              for key in rules:
 570                  pattern = getattr(self.rules, key)
 571                  m = pattern.match(text)
 572                  if not m:
 573                      continue
 574                  self.line_match = m
 575                  out = getattr(self, 'output_%s' % key)(m)
 576                  if out is not None:
 577                      return m, out
 578              return False  # pragma: no cover
 579  
 580          while text:
 581              ret = manipulate(text)
 582              if ret is not False:
 583                  m, out = ret
 584                  output += out
 585                  text = text[len(m.group(0)):]
 586                  continue
 587              if text:  # pragma: no cover
 588                  raise RuntimeError('Infinite loop at: %s' % text)
 589  
 590          return output
 591  
 592      def output_escape(self, m):
 593          text = m.group(1)
 594          return self.renderer.escape(text)
 595  
 596      def output_autolink(self, m):
 597          link = m.group(1)
 598          if m.group(2) == '@':
 599              is_email = True
 600          else:
 601              is_email = False
 602          return self.renderer.autolink(link, is_email)
 603  
 604      def output_url(self, m):
 605          link = m.group(1)
 606          if self._in_link:
 607              return self.renderer.text(link)
 608          return self.renderer.autolink(link, False)
 609  
 610      def output_inline_html(self, m):
 611          tag = m.group(1)
 612          if self._parse_inline_html and tag in _inline_tags:
 613              text = m.group(3)
 614              if tag == 'a':
 615                  self._in_link = True
 616                  text = self.output(text, rules=self.inline_html_rules)
 617                  self._in_link = False
 618              else:
 619                  text = self.output(text, rules=self.inline_html_rules)
 620              extra = m.group(2) or ''
 621              html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
 622          else:
 623              html = m.group(0)
 624          return self.renderer.inline_html(html)
 625  
 626      def output_footnote(self, m):
 627          key = _keyify(m.group(1))
 628          if key not in self.footnotes:
 629              return None
 630          if self.footnotes[key]:
 631              return None
 632          self.footnote_index += 1
 633          self.footnotes[key] = self.footnote_index
 634          return self.renderer.footnote_ref(key, self.footnote_index)
 635  
 636      def output_link(self, m):
 637          return self._process_link(m, m.group(3), m.group(4))
 638  
 639      def output_reflink(self, m):
 640          key = _keyify(m.group(2) or m.group(1))
 641          if key not in self.links:
 642              return None
 643          ret = self.links[key]
 644          return self._process_link(m, ret['link'], ret['title'])
 645  
 646      def output_nolink(self, m):
 647          key = _keyify(m.group(1))
 648          if key not in self.links:
 649              return None
 650          ret = self.links[key]
 651          return self._process_link(m, ret['link'], ret['title'])
 652  
 653      def _process_link(self, m, link, title=None):
 654          line = m.group(0)
 655          text = m.group(1)
 656          if line[0] == '!':
 657              return self.renderer.image(link, title, text)
 658  
 659          self._in_link = True
 660          text = self.output(text)
 661          self._in_link = False
 662          return self.renderer.link(link, title, text)
 663  
 664      def output_double_emphasis(self, m):
 665          text = m.group(2) or m.group(1)
 666          text = self.output(text)
 667          return self.renderer.double_emphasis(text)
 668  
 669      def output_emphasis(self, m):
 670          text = m.group(2) or m.group(1)
 671          text = self.output(text)
 672          return self.renderer.emphasis(text)
 673  
 674      def output_code(self, m):
 675          text = m.group(2)
 676          return self.renderer.codespan(text)
 677  
 678      def output_linebreak(self, m):
 679          return self.renderer.linebreak()
 680  
 681      def output_strikethrough(self, m):
 682          text = self.output(m.group(1))
 683          return self.renderer.strikethrough(text)
 684  
 685      def output_text(self, m):
 686          text = m.group(0)
 687          return self.renderer.text(text)
 688  
 689  
 690  class Renderer(object):
 691      """The default HTML renderer for rendering Markdown.
 692      """
 693  
 694      def __init__(self, **kwargs):
 695          self.options = kwargs
 696  
 697      def placeholder(self):
 698          """Returns the default, empty output value for the renderer.
 699          All renderer methods use the '+=' operator to append to this value.
 700          Default is a string so rendering HTML can build up a result string with
 701          the rendered Markdown.
 702          Can be overridden by Renderer subclasses to be types like an empty
 703          list, allowing the renderer to create a tree-like structure to
 704          represent the document (which can then be reprocessed later into a
 705          separate format like docx or pdf).
 706          """
 707          return ''
 708  
 709      def block_code(self, code, lang=None):
 710          """Rendering block level code. ``pre > code``.
 711          :param code: text content of the code block.
 712          :param lang: language of the given code.
 713          """
 714          code = code.rstrip('\n')
 715          if not lang:
 716              code = escape(code, smart_amp=False)
 717              return '<pre><code>%s\n</code></pre>\n' % code
 718          code = escape(code, quote=True, smart_amp=False)
 719          return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
 720  
 721      def block_quote(self, text):
 722          """Rendering <blockquote> with the given text.
 723          :param text: text content of the blockquote.
 724          """
 725          return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
 726  
 727      def block_html(self, html):
 728          """Rendering block level pure html content.
 729          :param html: text content of the html snippet.
 730          """
 731          if self.options.get('skip_style') and \
 732             html.lower().startswith('<style'):
 733              return ''
 734          if self.options.get('escape'):
 735              return escape(html)
 736          return html
 737  
 738      def header(self, text, level, raw=None):
 739          """Rendering header/heading tags like ``<h1>`` ``<h2>``.
 740          :param text: rendered text content for the header.
 741          :param level: a number for the header level, for example: 1.
 742          :param raw: raw text content of the header.
 743          """
 744          return '<h%d>%s</h%d>\n' % (level, text, level)
 745  
 746      def hrule(self):
 747          """Rendering method for ``<hr>`` tag."""
 748          if self.options.get('use_xhtml'):
 749              return '<hr />\n'
 750          return '<hr>\n'
 751  
 752      def list(self, body, ordered=True):
 753          """Rendering list tags like ``<ul>`` and ``<ol>``.
 754          :param body: body contents of the list.
 755          :param ordered: whether this list is ordered or not.
 756          """
 757          tag = 'ul'
 758          if ordered:
 759              tag = 'ol'
 760          return '<%s>\n%s</%s>\n' % (tag, body, tag)
 761  
 762      def list_item(self, text):
 763          """Rendering list item snippet. Like ``<li>``."""
 764          return '<li>%s</li>\n' % text
 765  
 766      def paragraph(self, text):
 767          """Rendering paragraph tags. Like ``<p>``."""
 768          return '<p>%s</p>\n' % text.strip(' ')
 769  
 770      def table(self, header, body):
 771          """Rendering table element. Wrap header and body in it.
 772          :param header: header part of the table.
 773          :param body: body part of the table.
 774          """
 775          return (
 776              '<table>\n<thead>%s</thead>\n'
 777              '<tbody>\n%s</tbody>\n</table>\n'
 778          ) % (header, body)
 779  
 780      def table_row(self, content):
 781          """Rendering a table row. Like ``<tr>``.
 782          :param content: content of current table row.
 783          """
 784          return '<tr>\n%s</tr>\n' % content
 785  
 786      def table_cell(self, content, **flags):
 787          """Rendering a table cell. Like ``<th>`` ``<td>``.
 788          :param content: content of current table cell.
 789          :param header: whether this is header or not.
 790          :param align: align of current table cell.
 791          """
 792          if flags['header']:
 793              tag = 'th'
 794          else:
 795              tag = 'td'
 796          align = flags['align']
 797          if not align:
 798              return '<%s>%s</%s>\n' % (tag, content, tag)
 799          return '<%s style="text-align:%s">%s</%s>\n' % (
 800              tag, align, content, tag
 801          )
 802  
 803      def double_emphasis(self, text):
 804          """Rendering **strong** text.
 805          :param text: text content for emphasis.
 806          """
 807          return '<strong>%s</strong>' % text
 808  
 809      def emphasis(self, text):
 810          """Rendering *emphasis* text.
 811          :param text: text content for emphasis.
 812          """
 813          return '<em>%s</em>' % text
 814  
 815      def codespan(self, text):
 816          """Rendering inline `code` text.
 817          :param text: text content for inline code.
 818          """
 819          text = escape(text.rstrip(), smart_amp=False)
 820          return '<code>%s</code>' % text
 821  
 822      def linebreak(self):
 823          """Rendering line break like ``<br>``."""
 824          if self.options.get('use_xhtml'):
 825              return '<br />\n'
 826          return '<br>\n'
 827  
 828      def strikethrough(self, text):
 829          """Rendering ~~strikethrough~~ text.
 830          :param text: text content for strikethrough.
 831          """
 832          return '<del>%s</del>' % text
 833  
 834      def text(self, text):
 835          """Rendering unformatted text.
 836          :param text: text content.
 837          """
 838          if self.options.get('parse_block_html'):
 839              return text
 840          return escape(text)
 841  
 842      def escape(self, text):
 843          """Rendering escape sequence.
 844          :param text: text content.
 845          """
 846          return escape(text)
 847  
 848      def autolink(self, link, is_email=False):
 849          """Rendering a given link or email address.
 850          :param link: link content or email address.
 851          :param is_email: whether this is an email or not.
 852          """
 853          text = link = escape_link(link)
 854          if is_email:
 855              link = 'mailto:%s' % link
 856          return '<a href="%s">%s</a>' % (link, text)
 857  
 858      def link(self, link, title, text):
 859          """Rendering a given link with content and title.
 860          :param link: href link for ``<a>`` tag.
 861          :param title: title content for `title` attribute.
 862          :param text: text content for description.
 863          """
 864          link = escape_link(link)
 865          if not title:
 866              return '<a href="%s">%s</a>' % (link, text)
 867          title = escape(title, quote=True)
 868          return '<a href="%s" title="%s">%s</a>' % (link, title, text)
 869  
 870      def image(self, src, title, text):
 871          """Rendering a image with title and text.
 872          :param src: source link of the image.
 873          :param title: title text of the image.
 874          :param text: alt text of the image.
 875          """
 876          src = escape_link(src)
 877          text = escape(text, quote=True)
 878          if title:
 879              title = escape(title, quote=True)
 880              html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
 881          else:
 882              html = '<img src="%s" alt="%s"' % (src, text)
 883          if self.options.get('use_xhtml'):
 884              return '%s />' % html
 885          return '%s>' % html
 886  
 887      def inline_html(self, html):
 888          """Rendering span level pure html content.
 889          :param html: text content of the html snippet.
 890          """
 891          if self.options.get('escape'):
 892              return escape(html)
 893          return html
 894  
 895      def newline(self):
 896          """Rendering newline element."""
 897          return ''
 898  
 899      def footnote_ref(self, key, index):
 900          """Rendering the ref anchor of a footnote.
 901          :param key: identity key for the footnote.
 902          :param index: the index count of current footnote.
 903          """
 904          html = (
 905              '<sup class="footnote-ref" id="fnref-%s">'
 906              '<a href="#fn-%s">%d</a></sup>'
 907          ) % (escape(key), escape(key), index)
 908          return html
 909  
 910      def footnote_item(self, key, text):
 911          """Rendering a footnote item.
 912          :param key: identity key for the footnote.
 913          :param text: text content of the footnote.
 914          """
 915          back = (
 916              '<a href="#fnref-%s" class="footnote">&#8617;</a>'
 917          ) % escape(key)
 918          text = text.rstrip()
 919          if text.endswith('</p>'):
 920              text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
 921          else:
 922              text = '%s<p>%s</p>' % (text, back)
 923          html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
 924          return html
 925  
 926      def footnotes(self, text):
 927          """Wrapper for all footnotes.
 928          :param text: contents of all footnotes.
 929          """
 930          html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
 931          return html % (self.hrule(), text)
 932  
 933  
 934  class Markdown(object):
 935      """The Markdown parser.
 936      :param renderer: An instance of ``Renderer``.
 937      :param inline: An inline lexer class or instance.
 938      :param block: A block lexer class or instance.
 939      """
 940      def __init__(self, renderer=None, inline=None, block=None, **kwargs):
 941          if not renderer:
 942              renderer = Renderer(**kwargs)
 943          else:
 944              kwargs.update(renderer.options)
 945  
 946          self.renderer = renderer
 947  
 948          if inline and inspect.isclass(inline):
 949              inline = inline(renderer, **kwargs)
 950          if block and inspect.isclass(block):
 951              block = block(**kwargs)
 952  
 953          if inline:
 954              self.inline = inline
 955          else:
 956              self.inline = InlineLexer(renderer, **kwargs)
 957  
 958          self.block = block or BlockLexer(BlockGrammar())
 959          self.footnotes = []
 960          self.tokens = []
 961  
 962          # detect if it should parse text in block html
 963          self._parse_block_html = kwargs.get('parse_block_html')
 964  
 965      def __call__(self, text):
 966          return self.parse(text)
 967  
 968      def render(self, text):
 969          """Render the Markdown text.
 970          :param text: markdown formatted text content.
 971          """
 972          return self.parse(text)
 973  
 974      def parse(self, text):
 975          out = self.output(preprocessing(text))
 976  
 977          keys = self.block.def_footnotes
 978  
 979          # reset block
 980          self.block.def_links = {}
 981          self.block.def_footnotes = {}
 982  
 983          # reset inline
 984          self.inline.links = {}
 985          self.inline.footnotes = {}
 986  
 987          if not self.footnotes:
 988              return out
 989  
 990          footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
 991          self.footnotes = sorted(
 992              footnotes, key=lambda o: keys.get(o['key']), reverse=True
 993          )
 994  
 995          body = self.renderer.placeholder()
 996          while self.footnotes:
 997              note = self.footnotes.pop()
 998              body += self.renderer.footnote_item(
 999                  note['key'], note['text']
1000              )
1001  
1002          out += self.renderer.footnotes(body)
1003          return out
1004  
1005      def pop(self):
1006          if not self.tokens:
1007              return None
1008          self.token = self.tokens.pop()
1009          return self.token
1010  
1011      def peek(self):
1012          if self.tokens:
1013              return self.tokens[-1]
1014          return None  # pragma: no cover
1015  
1016      def output(self, text, rules=None):
1017          self.tokens = self.block(text, rules)
1018          self.tokens.reverse()
1019  
1020          self.inline.setup(self.block.def_links, self.block.def_footnotes)
1021  
1022          out = self.renderer.placeholder()
1023          while self.pop():
1024              out += self.tok()
1025          return out
1026  
1027      def tok(self):
1028          t = self.token['type']
1029  
1030          # sepcial cases
1031          if t.endswith('_start'):
1032              t = t[:-6]
1033  
1034          return getattr(self, 'output_%s' % t)()
1035  
1036      def tok_text(self):
1037          text = self.token['text']
1038          while self.peek()['type'] == 'text':
1039              text += '\n' + self.pop()['text']
1040          return self.inline(text)
1041  
1042      def output_newline(self):
1043          return self.renderer.newline()
1044  
1045      def output_hrule(self):
1046          return self.renderer.hrule()
1047  
1048      def output_heading(self):
1049          return self.renderer.header(
1050              self.inline(self.token['text']),
1051              self.token['level'],
1052              self.token['text'],
1053          )
1054  
1055      def output_code(self):
1056          return self.renderer.block_code(
1057              self.token['text'], self.token['lang']
1058          )
1059  
1060      def output_table(self):
1061          aligns = self.token['align']
1062          aligns_length = len(aligns)
1063          cell = self.renderer.placeholder()
1064  
1065          # header part
1066          header = self.renderer.placeholder()
1067          for i, value in enumerate(self.token['header']):
1068              align = aligns[i] if i < aligns_length else None
1069              flags = {'header': True, 'align': align}
1070              cell += self.renderer.table_cell(self.inline(value), **flags)
1071  
1072          header += self.renderer.table_row(cell)
1073  
1074          # body part
1075          body = self.renderer.placeholder()
1076          for i, row in enumerate(self.token['cells']):
1077              cell = self.renderer.placeholder()
1078              for j, value in enumerate(row):
1079                  align = aligns[j] if j < aligns_length else None
1080                  flags = {'header': False, 'align': align}
1081                  cell += self.renderer.table_cell(self.inline(value), **flags)
1082              body += self.renderer.table_row(cell)
1083  
1084          return self.renderer.table(header, body)
1085  
1086      def output_block_quote(self):
1087          body = self.renderer.placeholder()
1088          while self.pop()['type'] != 'block_quote_end':
1089              body += self.tok()
1090          return self.renderer.block_quote(body)
1091  
1092      def output_list(self):
1093          ordered = self.token['ordered']
1094          body = self.renderer.placeholder()
1095          while self.pop()['type'] != 'list_end':
1096              body += self.tok()
1097          return self.renderer.list(body, ordered)
1098  
1099      def output_list_item(self):
1100          body = self.renderer.placeholder()
1101          while self.pop()['type'] != 'list_item_end':
1102              if self.token['type'] == 'text':
1103                  body += self.tok_text()
1104              else:
1105                  body += self.tok()
1106  
1107          return self.renderer.list_item(body)
1108  
1109      def output_loose_item(self):
1110          body = self.renderer.placeholder()
1111          while self.pop()['type'] != 'list_item_end':
1112              body += self.tok()
1113          return self.renderer.list_item(body)
1114  
1115      def output_footnote(self):
1116          self.inline._in_footnote = True
1117          body = self.renderer.placeholder()
1118          key = self.token['key']
1119          while self.pop()['type'] != 'footnote_end':
1120              body += self.tok()
1121          self.footnotes.append({'key': key, 'text': body})
1122          self.inline._in_footnote = False
1123          return self.renderer.placeholder()
1124  
1125      def output_close_html(self):
1126          text = self.token['text']
1127          return self.renderer.block_html(text)
1128  
1129      def output_open_html(self):
1130          text = self.token['text']
1131          tag = self.token['tag']
1132          if self._parse_block_html and tag not in _pre_tags:
1133              text = self.inline(text, rules=self.inline.inline_html_rules)
1134          extra = self.token.get('extra') or ''
1135          html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
1136          return self.renderer.block_html(html)
1137  
1138      def output_paragraph(self):
1139          return self.renderer.paragraph(self.inline(self.token['text']))
1140  
1141      def output_text(self):
1142          return self.renderer.paragraph(self.tok_text())
1143  
1144  
1145  def markdown(text, escape=True, **kwargs):
1146      """Render markdown formatted text to html.
1147      :param text: markdown formatted text content.
1148      :param escape: if set to False, all html tags will not be escaped.
1149      :param use_xhtml: output with xhtml tags.
1150      :param hard_wrap: if set to True, it will use the GFM line breaks feature.
1151      :param parse_block_html: parse text only in block level html.
1152      :param parse_inline_html: parse text only in inline level html.
1153      """
1154      return Markdown(escape=escape, **kwargs)(text)