/ src / python / txtai / graph / query.py
query.py
  1  """
  2  Query module
  3  """
  4  
  5  import logging
  6  import re
  7  
  8  try:
  9      from grandcypher import GrandCypher
 10  
 11      GRANDCYPHER = True
 12  except ImportError:
 13      GRANDCYPHER = False
 14  
 15  # Logging configuration
 16  logger = logging.getLogger(__name__)
 17  
 18  
 19  class Query:
 20      """
 21      Runs openCypher graph queries using the GrandCypher library. This class also supports search functions.
 22      """
 23  
 24      # Similar token
 25      SIMILAR = "__SIMILAR__"
 26  
 27      def __init__(self):
 28          """
 29          Create a new graph query instance.
 30          """
 31  
 32          if not GRANDCYPHER:
 33              raise ImportError('GrandCypher is not available - install "graph" extra to enable')
 34  
 35      def __call__(self, graph, query, limit):
 36          """
 37          Runs a graph query.
 38  
 39          Args:
 40              graph: graph instance
 41              query: graph query, can be a full query string or a parsed query dictionary
 42              limit: number of results
 43  
 44          Returns:
 45              results
 46          """
 47  
 48          # Results by attribute and ids filter
 49          attributes, uids = None, None
 50  
 51          # Build the query from a parsed query
 52          if isinstance(query, dict):
 53              query, attributes, uids = self.build(query)
 54  
 55          # Filter graph, if applicable
 56          if uids:
 57              graph = self.filter(graph, attributes, uids)
 58  
 59          # Debug log graph query
 60          logger.debug(query)
 61  
 62          # Run openCypher query
 63          return GrandCypher(graph.backend, limit if limit else 3).run(query)
 64  
 65      def isquery(self, queries):
 66          """
 67          Checks a list of queries to see if all queries are openCypher queries.
 68  
 69          Args:
 70              queries: list of queries to check
 71  
 72          Returns:
 73              True if all queries are openCypher queries
 74          """
 75  
 76          # Check for required graph query clauses
 77          return all(query and query.strip().startswith("MATCH ") and "RETURN " in query for query in queries)
 78  
 79      def parse(self, query):
 80          """
 81          Parses a graph query. This method supports parsing search functions and replacing them with placeholders.
 82  
 83          Args:
 84              query: graph query
 85  
 86          Returns:
 87              parsed query as a dictionary
 88          """
 89  
 90          # Parameters
 91          where, limit, nodes, similar = None, None, [], []
 92  
 93          # Parse where clause
 94          match = re.search(r"where(.+?)return", query, flags=re.DOTALL | re.IGNORECASE)
 95          if match:
 96              where = match.group(1).strip()
 97  
 98          # Parse limit clause
 99          match = re.search(r"limit\s+(\d+)", query, flags=re.DOTALL | re.IGNORECASE)
100          if match:
101              limit = match.group(1)
102  
103          # Parse similar clauses
104          for x, match in enumerate(re.finditer(r"similar\((.+?)\)", query, flags=re.DOTALL | re.IGNORECASE)):
105              # Replace similar clause with placeholder
106              query = query.replace(match.group(0), f"{Query.SIMILAR}{x}")
107  
108              # Parse similar clause parameters
109              params = [param.strip().replace("'", "").replace('"', "") for param in match.group(1).split(",")]
110              nodes.append(params[0])
111              similar.append(params[1:])
112  
113          # Return parsed query
114          return {
115              "query": query,
116              "where": where,
117              "limit": limit,
118              "nodes": nodes,
119              "similar": similar,
120          }
121  
122      def build(self, parse):
123          """
124          Constructs a full query from a parsed query. This method supports substituting placeholders with search results.
125  
126          Args:
127              parse: parsed query
128  
129          Returns:
130              graph query
131          """
132  
133          # Get query. Initialize attributes and uids.
134          query, attributes, uids = parse["query"], {}, {}
135  
136          # Replace similar clause with id query
137          if "results" in parse:
138              for x, result in enumerate(parse["results"]):
139                  # Get query node
140                  node = parse["nodes"][x]
141  
142                  # Add similar match attribute
143                  attribute = f"match_{x}"
144                  clause = f"{node}.{attribute} > 0"
145  
146                  # Replace placeholder with earch results
147                  query = query.replace(f"{Query.SIMILAR}{x}", f"{clause}")
148  
149                  # Add uids and scores
150                  for uid, score in result:
151                      if uid not in uids:
152                          uids[uid] = score
153  
154                  # Add results by attribute matched
155                  attributes[attribute] = result
156  
157          # Return query, results by attribute matched and ids filter
158          return query, attributes, uids.items()
159  
160      def filter(self, graph, attributes, uids):
161          """
162          Filters the input graph by uids. This method also adds similar match attributes.
163  
164          Args:
165              graph: graph instance
166              attributes: results by attribute matched
167              uids: single list with all matching ids
168  
169          Returns:
170              filtered graph
171          """
172  
173          # Filter the graph
174          graph = graph.filter(uids)
175  
176          # Add similar match attributes
177          for attribute, result in attributes.items():
178              for uid, score in result:
179                  graph.addattribute(uid, attribute, score)
180  
181          return graph