parser.py
1 """ A private module to parse multivector expressions """ 2 import re 3 from typing import List 4 5 6 # The `#` character is included because we generate tokens containing it in 7 # _parse_paren. 8 _operand_re = r"([A-Za-z0-9\_\#]+)" 9 10 _operator_res = { 11 '<>|': re.compile(r'(ARG(\||<|>)ARG)'.replace('ARG', _operand_re)), 12 '^': re.compile(r'(ARG[\^]ARG([\^]ARG)*)'.replace('ARG', _operand_re)), 13 '*': re.compile(r'(ARG[\*]ARG([\*]ARG)*)'.replace('ARG', _operand_re)), 14 } 15 16 17 def _contains_interval(interval1, interval2): # interval1 inside interval2 18 if interval1[0] > interval2[0] and interval1[1] < interval2[1]: 19 return True 20 else: 21 return False 22 23 24 # counter to generate unique tokens 25 _parse_paren_calls = 0 26 27 28 def _parse_paren(line): 29 global _parse_paren_calls 30 _parse_paren_calls += 1 31 32 if ('(' not in line) or (')' not in line): 33 return [[[line]]] 34 level = 0 35 max_level = 0 36 ich = 0 37 paren_lst = [] 38 for ch in line: 39 if ch == '(': 40 level += 1 41 paren_lst.append([level, ich]) 42 if ch == ')': 43 if level < 1: 44 raise ValueError('Mismathed Parenthesis in: ' + line + '\n') 45 paren_lst.reverse() 46 iparen = 0 47 for elem in paren_lst: 48 if elem[0] == level: 49 paren_lst[iparen].append(ich) 50 break 51 iparen += 1 52 paren_lst.reverse() 53 level -= 1 54 max_level = max(max_level, level) 55 ich += 1 56 if level != 0: 57 raise ValueError('Mismatched Parenthesis in: ' + line + '\n') 58 if max_level > 0: 59 level_lst = [] 60 for _x in range(max_level + 1): 61 level_lst.append([]) 62 for group in paren_lst: 63 level_lst[group[0]].append(group[1:]) 64 ilevel = max_level 65 while ilevel > 1: 66 level = level_lst[ilevel] 67 level_down = level_lst[ilevel - 1] 68 igroup = 0 69 for group in level: 70 igroup_down = 0 71 for group_down in level_down: 72 if _contains_interval(group, group_down): 73 level_lst[ilevel][igroup].append(igroup_down) 74 igroup_down += 1 75 igroup += 1 76 ilevel -= 1 77 ilevel = 1 78 for level in level_lst[1:]: 79 igroup = 0 80 for group in level: 81 token = '#' + str(_parse_paren_calls) + '_' + str(ilevel) + '_' + str(igroup) + '#' 82 level_lst[ilevel][igroup].append(line[group[0]:group[1] + 1]) 83 level_lst[ilevel][igroup].append(token) 84 igroup += 1 85 ilevel += 1 86 ilevel = 1 87 for level in level_lst[1:]: 88 igroup = 0 89 for group in level: 90 group.append(group[-2]) 91 level_lst[ilevel][igroup] = group 92 igroup += 1 93 ilevel += 1 94 ilevel = max_level 95 while ilevel > 1: 96 igroup = 0 97 for group in level_lst[ilevel]: 98 group_down = level_lst[ilevel - 1][group[2]] 99 replace_text = group_down[-1].replace(group[-3], group[-2]) 100 level_lst[ilevel - 1][group[2]][-1] = replace_text 101 igroup += 1 102 ilevel -= 1 103 for group in level_lst[1]: 104 line = line.replace(group[2], group[3]) 105 ilevel = 1 106 level_lst[0] = [[line]] 107 return level_lst 108 109 110 def _unparse_paren(level_lst): 111 line = level_lst[0][0][0] 112 for level in level_lst[1:]: 113 for group in level: 114 new_string = group[-1] 115 if new_string[:2] == '((' and new_string[-2:] == '))': 116 new_string = new_string[1:-1] 117 line = line.replace(group[-2], new_string) 118 return line 119 120 121 def _sub_paren(s): 122 string = s.group(0) 123 return '(%s)' % string 124 125 126 def _add_paren(line, re_exprs): 127 paren_flg = False 128 if (line[0] == '(') and (line[-1] == ')'): 129 paren_flg = True 130 line = line[1:-1] 131 if ('(' in line) or (')' in line): 132 line_levels = _parse_paren(line) 133 ilevel = 0 134 for level in line_levels: 135 igroup = 0 136 for group in level: 137 group[-1] = re.sub(re_exprs, _sub_paren, group[-1]) 138 line_levels[ilevel][igroup] = group 139 igroup += 1 140 ilevel += 1 141 line = _unparse_paren(line_levels) 142 else: 143 line = re.sub(re_exprs, _sub_paren, line) 144 if paren_flg: 145 line = '(' + line + ')' 146 return line 147 148 149 def validate_op_order(op_order: List[str]) -> None: 150 if not all(op in _operator_res for op in op_order): 151 raise ValueError("Illegal operator") 152 153 154 def parse_line(line: str, op_order: List[str]) -> str: 155 line = line.replace(' ', '') 156 level_lst = _parse_paren(line) 157 ilevel = 0 158 for level in level_lst: 159 igroup = 0 160 for group in level: 161 string = group[-1] 162 for op in op_order: 163 string = _add_paren(string, _operator_res[op]) 164 level_lst[ilevel][igroup][-1] = string 165 igroup += 1 166 ilevel += 1 167 line = _unparse_paren(level_lst) 168 return line