/ galgebra / _utils / parser.py
parser.py
  1  """ A private module to parse multivector expressions """
  2  import re
  3  from typing import List
  4  
  5  
  6  # The `#` character is included because we generate tokens containing it in
  7  # _parse_paren.
  8  _operand_re = r"([A-Za-z0-9\_\#]+)"
  9  
 10  _operator_res = {
 11      '<>|': re.compile(r'(ARG(\||<|>)ARG)'.replace('ARG', _operand_re)),
 12      '^': re.compile(r'(ARG[\^]ARG([\^]ARG)*)'.replace('ARG', _operand_re)),
 13      '*': re.compile(r'(ARG[\*]ARG([\*]ARG)*)'.replace('ARG', _operand_re)),
 14  }
 15  
 16  
 17  def _contains_interval(interval1, interval2):  # interval1 inside interval2
 18      if interval1[0] > interval2[0] and interval1[1] < interval2[1]:
 19          return True
 20      else:
 21          return False
 22  
 23  
 24  # counter to generate unique tokens
 25  _parse_paren_calls = 0
 26  
 27  
 28  def _parse_paren(line):
 29      global _parse_paren_calls
 30      _parse_paren_calls += 1
 31  
 32      if ('(' not in line) or (')' not in line):
 33          return [[[line]]]
 34      level = 0
 35      max_level = 0
 36      ich = 0
 37      paren_lst = []
 38      for ch in line:
 39          if ch == '(':
 40              level += 1
 41              paren_lst.append([level, ich])
 42          if ch == ')':
 43              if level < 1:
 44                  raise ValueError('Mismathed Parenthesis in: ' + line + '\n')
 45              paren_lst.reverse()
 46              iparen = 0
 47              for elem in paren_lst:
 48                  if elem[0] == level:
 49                      paren_lst[iparen].append(ich)
 50                      break
 51                  iparen += 1
 52              paren_lst.reverse()
 53              level -= 1
 54          max_level = max(max_level, level)
 55          ich += 1
 56      if level != 0:
 57          raise ValueError('Mismatched Parenthesis in: ' + line + '\n')
 58      if max_level > 0:
 59          level_lst = []
 60          for _x in range(max_level + 1):
 61              level_lst.append([])
 62          for group in paren_lst:
 63              level_lst[group[0]].append(group[1:])
 64          ilevel = max_level
 65          while ilevel > 1:
 66              level = level_lst[ilevel]
 67              level_down = level_lst[ilevel - 1]
 68              igroup = 0
 69              for group in level:
 70                  igroup_down = 0
 71                  for group_down in level_down:
 72                      if _contains_interval(group, group_down):
 73                          level_lst[ilevel][igroup].append(igroup_down)
 74                      igroup_down += 1
 75                  igroup += 1
 76              ilevel -= 1
 77          ilevel = 1
 78          for level in level_lst[1:]:
 79              igroup = 0
 80              for group in level:
 81                  token = '#' + str(_parse_paren_calls) + '_' + str(ilevel) + '_' + str(igroup) + '#'
 82                  level_lst[ilevel][igroup].append(line[group[0]:group[1] + 1])
 83                  level_lst[ilevel][igroup].append(token)
 84                  igroup += 1
 85              ilevel += 1
 86          ilevel = 1
 87          for level in level_lst[1:]:
 88              igroup = 0
 89              for group in level:
 90                  group.append(group[-2])
 91                  level_lst[ilevel][igroup] = group
 92                  igroup += 1
 93              ilevel += 1
 94          ilevel = max_level
 95          while ilevel > 1:
 96              igroup = 0
 97              for group in level_lst[ilevel]:
 98                  group_down = level_lst[ilevel - 1][group[2]]
 99                  replace_text = group_down[-1].replace(group[-3], group[-2])
100                  level_lst[ilevel - 1][group[2]][-1] = replace_text
101                  igroup += 1
102              ilevel -= 1
103          for group in level_lst[1]:
104              line = line.replace(group[2], group[3])
105          ilevel = 1
106          level_lst[0] = [[line]]
107      return level_lst
108  
109  
110  def _unparse_paren(level_lst):
111      line = level_lst[0][0][0]
112      for level in level_lst[1:]:
113          for group in level:
114              new_string = group[-1]
115              if new_string[:2] == '((' and new_string[-2:] == '))':
116                  new_string = new_string[1:-1]
117              line = line.replace(group[-2], new_string)
118      return line
119  
120  
121  def _sub_paren(s):
122      string = s.group(0)
123      return '(%s)' % string
124  
125  
126  def _add_paren(line, re_exprs):
127      paren_flg = False
128      if (line[0] == '(') and (line[-1] == ')'):
129          paren_flg = True
130          line = line[1:-1]
131      if ('(' in line) or (')' in line):
132          line_levels = _parse_paren(line)
133          ilevel = 0
134          for level in line_levels:
135              igroup = 0
136              for group in level:
137                  group[-1] = re.sub(re_exprs, _sub_paren, group[-1])
138                  line_levels[ilevel][igroup] = group
139                  igroup += 1
140              ilevel += 1
141          line = _unparse_paren(line_levels)
142      else:
143          line = re.sub(re_exprs, _sub_paren, line)
144      if paren_flg:
145          line = '(' + line + ')'
146      return line
147  
148  
149  def validate_op_order(op_order: List[str]) -> None:
150      if not all(op in _operator_res for op in op_order):
151          raise ValueError("Illegal operator")
152  
153  
154  def parse_line(line: str, op_order: List[str]) -> str:
155      line = line.replace(' ', '')
156      level_lst = _parse_paren(line)
157      ilevel = 0
158      for level in level_lst:
159          igroup = 0
160          for group in level:
161              string = group[-1]
162              for op in op_order:
163                  string = _add_paren(string, _operator_res[op])
164              level_lst[ilevel][igroup][-1] = string
165              igroup += 1
166          ilevel += 1
167      line = _unparse_paren(level_lst)
168      return line