expression.py
1 """ 2 Expression module 3 """ 4 5 from .token import Token 6 7 8 class Expression: 9 """ 10 Parses expression statements and runs a set of substitution/formatting rules. 11 """ 12 13 def __init__(self, resolver, tolist): 14 """ 15 Creates a new expression parser. 16 17 Args: 18 resolver: function to call to resolve query column names with database column names 19 tolist: outputs expression lists if True, text if False 20 """ 21 22 self.resolver = resolver 23 self.tolist = tolist 24 25 def __call__(self, tokens, alias=False, aliases=None, similar=None): 26 """ 27 Parses and formats a list of tokens as follows: 28 - Replaces query column names with database column names 29 - Adds similar query placeholders and extracts similar function parameters 30 - Rewrites expression and returns 31 32 Args: 33 tokens: input expression 34 alias: if True, column aliases should be generated and added to aliases dict 35 aliases: dict of generated aliases, if present these tokens should NOT be resolved 36 similar: list of similar queries, if present new similar queries are appended to this list 37 38 Returns: 39 rewritten clause 40 """ 41 42 # Processes token expressions and applies a set of transformation rules 43 transformed = self.process(list(tokens), alias, aliases, similar) 44 45 # Re-write alias expression and return 46 if alias and not self.tolist: 47 return self.buildalias(transformed, tokens, aliases) 48 49 # Re-write input expression and return 50 return self.buildlist(transformed) if self.tolist is True else self.buildtext(transformed) 51 52 def process(self, tokens, alias, aliases, similar): 53 """ 54 Replaces query column names with database column names, adds similar query placeholders and 55 extracts similar function parameters. 56 57 Args: 58 tokens: input expression 59 alias: if True, column aliases should be generated and added to aliases dict 60 aliases: dict of generated aliases, if present these tokens should NOT be resolved 61 similar: list of similar queries, if present new similar queries are appended to this list 62 63 Returns: 64 transformed tokens 65 """ 66 67 # Create clause index and token iterator. Iterator skips distinct tokens. 68 index, iterator = 0, ((x, token) for x, token in enumerate(tokens) if not Token.isdistinct(token)) 69 for x, token in iterator: 70 # Check if separator, increment clause index 71 if Token.isseparator(token): 72 index += 1 73 74 # Check if token is a square bracket 75 elif Token.isbracket(token): 76 # Resolve bracket expression 77 self.bracket(iterator, tokens, x) 78 79 # Check if token is a similar function 80 elif Token.issimilar(tokens, x, similar): 81 # Resolve similar expression 82 self.similar(iterator, tokens, x, similar) 83 84 # Check if token is a function 85 elif Token.isfunction(tokens, x): 86 # Resolve function expression 87 self.function(iterator, tokens, token, aliases, similar) 88 89 # Check for alias expression 90 elif Token.isalias(tokens, x, alias): 91 # Process alias expression 92 self.alias(iterator, tokens, x, aliases, index) 93 94 # Check for attribute expression 95 elif Token.isattribute(tokens, x): 96 # Resolve attribute expression 97 self.attribute(tokens, x, aliases) 98 99 # Check for compound expression 100 elif Token.iscompound(tokens, x): 101 # Resolve compound expression 102 self.compound(iterator, tokens, x, aliases, similar) 103 104 # Remove replaced tokens 105 return [token for token in tokens if token] 106 107 def buildtext(self, tokens): 108 """ 109 Builds a new expression from tokens. This method applies a set of rules to generate whitespace between tokens. 110 111 Args: 112 tokens: input expression 113 114 Returns: 115 expression text 116 """ 117 118 # Rebuild expression 119 text = "" 120 for token in tokens: 121 # Write token with whitespace rules applied 122 text += Token.wrapspace(text, token) 123 124 # Remove any leading/trailing whitespace and return 125 return text.strip() 126 127 def buildlist(self, tokens): 128 """ 129 Builds a new expression from tokens. This method returns a list of expression components. These components can be joined together 130 on commas to form a text expression. 131 132 Args: 133 tokens: input expression 134 135 Returns: 136 expression list 137 """ 138 139 parts, current, parens, brackets = [], [], 0, 0 140 141 for token in tokens: 142 # Create new part 143 if token == "," and not parens and not brackets: 144 parts.append(self.buildtext(current)) 145 current = [] 146 else: 147 # Accumulate tokens 148 if token == "(": 149 parens += 1 150 elif token == ")": 151 parens -= 1 152 elif token == "[": 153 brackets += 1 154 elif token == "]": 155 brackets -= 1 156 elif Token.issortorder(token): 157 token = f" {token}" 158 current.append(token) 159 160 # Add last part 161 if current: 162 parts.append(self.buildtext(current)) 163 164 return parts 165 166 def buildalias(self, transformed, tokens, aliases): 167 """ 168 Builds new alias text expression from transformed and input tokens. 169 170 Args: 171 transformed: transformed tokens 172 tokens: original input tokens 173 aliases: dict of column aliases 174 175 Returns: 176 alias text expression 177 """ 178 179 # Convert tokens to expressions 180 transformed = self.buildlist(transformed) 181 tokens = self.buildlist(tokens) 182 183 expression = [] 184 for x, token in enumerate(transformed): 185 if x not in aliases.values(): 186 alias = tokens[x] 187 188 # Strip leading/trailing brackets from alias name that doesn't have operators 189 if not any(Token.isoperator(t) for t in alias) and alias[0] in ("[", "(") and alias[-1] in ("]", ")"): 190 alias = alias[1:-1] 191 192 # Strip leading distinct keyword 193 values = alias.split() 194 if len(values) > 0 and Token.isdistinct(values[0]): 195 alias = " ".join(values[1:]) 196 197 # Resolve alias 198 token = self.resolver(token, alias) 199 200 expression.append(token) 201 202 # Build alias text expression 203 return ", ".join(expression) 204 205 def bracket(self, iterator, tokens, x): 206 """ 207 Consumes a [bracket] expression. 208 209 Args: 210 iterator: tokens iterator 211 tokens: input tokens 212 x: current position 213 """ 214 215 # Function parameters 216 params = [] 217 218 # Clear token from stream 219 token = tokens[x] 220 tokens[x] = None 221 222 # Bracket counter (current token is an open bracket) 223 brackets = 1 224 225 # Read until token is a end bracket 226 while token and (token != "]" or brackets > 0): 227 x, token = next(iterator, (None, None)) 228 229 # Increase/decrease bracket counter 230 if token == "[": 231 brackets += 1 232 elif token == "]": 233 brackets -= 1 234 235 # Accumulate tokens 236 if token != "]" or brackets > 0: 237 params.append(token) 238 239 # Clear token from stream 240 tokens[x] = None 241 242 # Set last token to resolved bracket expression 243 tokens[x] = self.resolve(self.buildtext(params).replace("'", "''"), None) 244 245 def similar(self, iterator, tokens, x, similar): 246 """ 247 Substitutes a similar() function call with a placeholder that can later be used to add 248 embeddings query results as a filter. 249 250 Args: 251 iterator: tokens iterator 252 tokens: input tokens 253 x: current position 254 similar: list where similar function call parameters are stored 255 """ 256 257 # Function parameters 258 params = [] 259 260 # Clear token from stream 261 token = tokens[x] 262 tokens[x] = None 263 264 # Read until token is a closing paren 265 while token and token != ")": 266 x, token = next(iterator, (None, None)) 267 if token and token not in ["(", ",", ")"]: 268 # Strip quotes and accumulate tokens 269 params.append(token.replace("'", "").replace('"', "")) 270 271 # Clear token from stream 272 tokens[x] = None 273 274 # Add placeholder for embedding similarity results 275 tokens[x] = f"{Token.SIMILAR_TOKEN}{len(similar)}" 276 277 # Save parameters 278 similar.append(params) 279 280 def function(self, iterator, tokens, token, aliases, similar): 281 """ 282 Resolves column names within the function's parameters. 283 284 Args: 285 iterator: tokens iterator 286 tokens: input tokens 287 token: current token 288 aliases: dict of generated aliases, if present these tokens should NOT be resolved 289 similar: list where similar function call parameters are stored 290 """ 291 292 # Consume function parameters 293 while token and token != ")": 294 x, token = next(iterator, (None, None)) 295 296 # Check if token is a square bracket 297 if Token.isbracket(token): 298 # Resolve bracket expression 299 self.bracket(iterator, tokens, x) 300 301 # Check if token is a similar function 302 elif Token.issimilar(tokens, x, similar): 303 # Resolve similar expression 304 self.similar(iterator, tokens, x, similar) 305 306 # Check if token is a function 307 elif Token.isfunction(tokens, x): 308 # Resolve function parameters that are functions 309 self.function(iterator, tokens, token, aliases, similar) 310 311 # Check for attribute expression 312 elif Token.isattribute(tokens, x): 313 # Resolve attributes 314 self.attribute(tokens, x, aliases) 315 316 # Check for compound expression 317 elif Token.iscompound(tokens, x): 318 # Resolve compound expressions 319 self.compound(iterator, tokens, x, aliases, similar) 320 321 def alias(self, iterator, tokens, x, aliases, index): 322 """ 323 Reads an alias clause and stores it in aliases. 324 325 Args: 326 iterator: tokens iterator 327 tokens: input tokens 328 x: current position 329 aliases: dict where aliases are stored - stores {alias: clause index} 330 index: clause index, used to match aliases with columns 331 """ 332 333 token = tokens[x] 334 335 # If this is an alias token, get next token 336 if token in Token.ALIAS: 337 x, token = next(iterator, (None, None)) 338 339 # Consume tokens until end of stream or a separator is found. Evaluate next token to prevent consuming here. 340 while x + 1 < len(tokens) and not Token.isseparator(Token.get(tokens, x + 1)): 341 x, token = next(iterator, (None, None)) 342 343 # Add normalized alias and clause index 344 aliases[Token.normalize(token)] = index 345 346 def attribute(self, tokens, x, aliases): 347 """ 348 Resolves an attribute column name. 349 350 Args: 351 tokens: input tokens 352 x: current token position 353 aliases: dict of generated aliases, if present these tokens should NOT be resolved 354 """ 355 356 # Resolve attribute expression 357 tokens[x] = self.resolve(tokens[x], aliases) 358 359 def compound(self, iterator, tokens, x, aliases, similar): 360 """ 361 Resolves column names in a compound expression (left side <operator(s)> right side). 362 363 Args: 364 iterator: tokens iterator 365 tokens: input tokens 366 x: current token position 367 aliases: dict of generated aliases, if present these tokens should NOT be resolved 368 similar: list where similar function call parameters are stored 369 """ 370 371 # Resolve left side (left side already had function processing applied through standard loop) 372 if Token.iscolumn(tokens[x - 1]): 373 tokens[x - 1] = self.resolve(tokens[x - 1], aliases) 374 375 # Consume operator(s), handle both single and compound operators, i.e. column NOT LIKE 1 376 token = tokens[x] 377 while token and Token.isoperator(token): 378 x, token = next(iterator, (None, None)) 379 380 # Resolve right side 381 if token and Token.iscolumn(token): 382 # Need to process functions since it hasn't went through the standard loop yet 383 if Token.isfunction(tokens, x): 384 self.function(iterator, tokens, token, aliases, similar) 385 else: 386 tokens[x] = self.resolve(token, aliases) 387 388 def resolve(self, token, aliases): 389 """ 390 Resolves this token's value if it is not an alias or a bind parameter. 391 392 Args: 393 token: token to resolve 394 aliases: dict of generated aliases, if present these tokens should NOT be resolved 395 396 Returns: 397 resolved token value 398 """ 399 400 # Check for alias or bind parameter 401 if (aliases and Token.normalize(token) in aliases) or (token.startswith(":")): 402 return token 403 404 return self.resolver(token)