token.py
1 """ 2 Token module 3 """ 4 5 6 class Token: 7 """ 8 Methods to check for token type. 9 """ 10 11 # Similar token replacement 12 SIMILAR_TOKEN = "__SIMILAR__" 13 14 # Default distinct token 15 DISTINCT = ["distinct"] 16 17 # Default alias token 18 ALIAS = ["as"] 19 20 # Default list of comparison operators 21 OPERATORS = ["=", "!=", "<>", ">", ">=", "<", "<=", "+", "-", "*", "/", "%", "||", "not", "between", "like", "is", "null"] 22 23 # Default list of logic separators 24 LOGIC_SEPARATORS = ["and", "or"] 25 26 # Default list of sort order operators 27 SORT_ORDER = ["asc", "desc"] 28 29 @staticmethod 30 def get(tokens, x): 31 """ 32 Gets token at position x. This method will validate position is valid within tokens. 33 34 Args: 35 tokens: input tokens 36 x: position to retrieve 37 38 Returns: 39 tokens[x] if x is a valid position, None otherwise 40 """ 41 42 if 0 <= x < len(tokens): 43 return tokens[x] 44 45 return None 46 47 @staticmethod 48 def isalias(tokens, x, alias): 49 """ 50 Checks if tokens[x] is an alias keyword. 51 52 Args: 53 tokens: input tokens 54 x: current position 55 alias: if column alias processing is enabled 56 57 Returns: 58 True if tokens[x] is an alias token, False otherwise 59 """ 60 61 prior = Token.get(tokens, x - 1) 62 token = tokens[x] 63 64 # True if prior token is not a separator, grouping token or distinct token and current token is either a column token or quoted token 65 return ( 66 alias 67 and x > 0 68 and not Token.isseparator(prior) 69 and not Token.isgroupstart(prior) 70 and not Token.isdistinct(prior) 71 and (Token.iscolumn(token) or Token.isquoted(token)) 72 ) 73 74 @staticmethod 75 def isattribute(tokens, x): 76 """ 77 Checks if tokens[x] is an attribute. 78 79 Args: 80 tokens: input tokens 81 x: current position 82 83 Returns: 84 True if tokens[x] is an attribute, False otherwise 85 """ 86 87 # True if token is a column and next token is not an operator 88 return Token.iscolumn(tokens[x]) and not Token.isoperator(Token.get(tokens, x + 1)) 89 90 @staticmethod 91 def isbracket(token): 92 """ 93 Checks if token is an open bracket. 94 95 Args: 96 token: token to test 97 98 Returns: 99 True if token is an open bracket, False otherwise 100 """ 101 102 # Token is a bracket 103 return token == "[" 104 105 @staticmethod 106 def iscolumn(token): 107 """ 108 Checks if token is a column name. 109 110 Args: 111 token: token to test 112 113 Returns: 114 True if this token is a column name token, False otherwise 115 """ 116 117 # Columns are not operators, logic separators, literals or sort order tokens 118 return ( 119 token 120 and not Token.isoperator(token) 121 and not Token.islogicseparator(token) 122 and not Token.isliteral(token) 123 and not Token.issortorder(token) 124 ) 125 126 @staticmethod 127 def iscompound(tokens, x): 128 """ 129 Checks if tokens[x] is a compound expression. 130 131 Args: 132 tokens: input tokens 133 x: current position 134 135 Returns: 136 True if tokens[x] is a compound expression, False otherwise 137 """ 138 139 # Compound expression is defined as: <column> <operator(s)> <column> 140 return Token.isoperator(tokens[x]) and (Token.iscolumn(Token.get(tokens, x - 1)) or Token.iscolumn(Token.get(tokens, x + 1))) 141 142 @staticmethod 143 def isdistinct(token): 144 """ 145 Checks if token is the distinct keyword. 146 147 Args: 148 token: token to test 149 150 Returns: 151 True if this token is a distinct keyword, False otherwise 152 """ 153 154 # Token is the distinct keyword 155 return token and token.lower() in Token.DISTINCT 156 157 @staticmethod 158 def isfunction(tokens, x): 159 """ 160 Checks if tokens[x] is a function. 161 162 Args: 163 tokens: input tokens 164 x: current position 165 166 Returns: 167 True if tokens[x] is a function, False otherwise 168 """ 169 170 # True if a column token is followed by an open paren 171 return Token.iscolumn(tokens[x]) and Token.get(tokens, x + 1) == "(" 172 173 @staticmethod 174 def isgroupstart(token): 175 """ 176 Checks if token is a group start token. 177 178 Args: 179 token: token to test 180 181 Returns: 182 True if token is a group start token, False otherwise 183 """ 184 185 # Token is a paren 186 return token == "(" 187 188 @staticmethod 189 def isliteral(token): 190 """ 191 Checks if token is a literal. 192 193 Args: 194 token: token to test 195 196 Returns: 197 True if this token is a literal, False otherwise 198 """ 199 200 # Literals are wrapped in quotes, parens, wildcards or numeric. 201 return token and (token.startswith(("'", '"', ",", "(", ")", "*")) or token.replace(".", "", 1).isdigit()) 202 203 @staticmethod 204 def islogicseparator(token): 205 """ 206 Checks if token is a logic separator token. 207 208 Args: 209 token: token to test 210 211 Returns: 212 True if this token is a logic separator, False otherwise 213 """ 214 215 # Token is a logic separator 216 return token and token.lower() in Token.LOGIC_SEPARATORS 217 218 @staticmethod 219 def isoperator(token): 220 """ 221 Checks if token is an operator token. 222 223 Args: 224 token: token to test 225 226 Returns: 227 True if this token is an operator, False otherwise 228 """ 229 230 # Token is an operator 231 return token and token.lower() in Token.OPERATORS 232 233 @staticmethod 234 def isquoted(token): 235 """ 236 Checks if token is quoted. 237 238 Args: 239 token: token to test 240 241 Returns: 242 True if this token is quoted, False otherwise 243 """ 244 245 # Token is quoted 246 return token.startswith(("'", '"')) and token.endswith(("'", '"')) 247 248 @staticmethod 249 def isseparator(token): 250 """ 251 Checks if token is a separator token. 252 253 Args: 254 token to test 255 256 Returns: 257 True if this token is a separator, False otherwise 258 """ 259 260 # Token is a comma 261 return token == "," 262 263 @staticmethod 264 def issimilar(tokens, x, similar): 265 """ 266 Checks if tokens[x] is a similar() function. 267 268 Args: 269 tokens: input tokens 270 x: current position 271 similar: list where similar function call parameters are stored, can be None in which case similar processing is skipped 272 273 Returns: 274 True if tokens[x] is a similar clause 275 """ 276 277 # True if a "similar" token is followed by an open paren 278 return similar is not None and tokens[x].lower() == "similar" and Token.get(tokens, x + 1) == "(" 279 280 @staticmethod 281 def issortorder(token): 282 """ 283 Checks if token is a sort order token. 284 285 Args: 286 token: token to test 287 288 Returns: 289 True if this token is a sort order operator, False otherwise 290 """ 291 292 # Token is a sort order operator 293 return token and token.lower() in Token.SORT_ORDER 294 295 @staticmethod 296 def normalize(token): 297 """ 298 Applies a normalization algorithm to the input token as follows: 299 - Strip single and double quotes 300 - Make lowercase 301 302 Args: 303 token: input token 304 305 Returns: 306 normalized token 307 """ 308 309 # Lowercase, replace and return 310 return token.lower().replace("'", "").replace('"', "") 311 312 @staticmethod 313 def wrapspace(text, token): 314 """ 315 Applies whitespace wrapping rules to token. 316 317 Args: 318 text: current text buffer 319 token: token to add 320 321 Returns: 322 token with whitespace rules applied 323 """ 324 325 # Wildcards have no whitespace. Need special case since * is also multiply which does have whitespace. 326 if token in ["*"] and (not text or text.endswith((" ", "("))): 327 return token 328 329 # Operator whitespace 330 if Token.isoperator(token) or Token.islogicseparator(token) or token.lower() in ["in"]: 331 return f" {token} " if not text.endswith(" ") else f"{token} " 332 333 # Comma whitespace 334 if Token.isseparator(token): 335 return f"{token} " 336 337 # No whitespace if any of the following is True 338 if not text or text.endswith((" ", "(", "[")) or token in ["(", "[", ")", "]"] or token.startswith("."): 339 return token 340 341 # Default is to add leading whitespace 342 return f" {token}"