Cradicle Explorer

base.py
  1  """
  2  API module
  3  """
  4  
  5  import json
  6  
  7  from .cluster import Cluster
  8  
  9  from ..app import Application
 10  
 11  
 12  class API(Application):
 13      """
 14      Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.
 15  
 16      Downstream applications can extend this base template to add/modify functionality.
 17      """
 18  
 19      def __init__(self, config, loaddata=True):
 20          super().__init__(config, loaddata)
 21  
 22          # Embeddings cluster
 23          self.cluster = None
 24          if self.config.get("cluster"):
 25              self.cluster = Cluster(self.config["cluster"])
 26  
 27      # pylint: disable=W0221
 28      def search(self, query, limit=None, weights=None, index=None, parameters=None, graph=False, request=None):
 29          # When search is invoked via the API, limit is set from the request
 30          # When search is invoked directly, limit is set using the method parameter
 31          limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)
 32          weights = self.weights(request.query_params.get("weights") if request and hasattr(request, "query_params") else weights)
 33          index = request.query_params.get("index") if request and hasattr(request, "query_params") else index
 34          parameters = request.query_params.get("parameters") if request and hasattr(request, "query_params") else parameters
 35          graph = request.query_params.get("graph") if request and hasattr(request, "query_params") else graph
 36  
 37          # Decode parameters
 38          parameters = json.loads(parameters) if parameters and isinstance(parameters, str) else parameters
 39  
 40          if self.cluster:
 41              return self.cluster.search(query, limit, weights, index, parameters, graph)
 42  
 43          return super().search(query, limit, weights, index, parameters, graph)
 44  
 45      def batchsearch(self, queries, limit=None, weights=None, index=None, parameters=None, graph=False):
 46          if self.cluster:
 47              return self.cluster.batchsearch(queries, self.limit(limit), weights, index, parameters, graph)
 48  
 49          return super().batchsearch(queries, limit, weights, index, parameters, graph)
 50  
 51      def add(self, documents):
 52          """
 53          Adds a batch of documents for indexing.
 54  
 55          Downstream applications can override this method to also store full documents in an external system.
 56  
 57          Args:
 58              documents: list of {id: value, text: value}
 59  
 60          Returns:
 61              unmodified input documents
 62          """
 63  
 64          if self.cluster:
 65              self.cluster.add(documents)
 66          else:
 67              super().add(documents)
 68  
 69          return documents
 70  
 71      def index(self):
 72          """
 73          Builds an embeddings index for previously batched documents.
 74          """
 75  
 76          if self.cluster:
 77              self.cluster.index()
 78          else:
 79              super().index()
 80  
 81      def upsert(self):
 82          """
 83          Runs an embeddings upsert operation for previously batched documents.
 84          """
 85  
 86          if self.cluster:
 87              self.cluster.upsert()
 88          else:
 89              super().upsert()
 90  
 91      def delete(self, ids):
 92          """
 93          Deletes from an embeddings index. Returns list of ids deleted.
 94  
 95          Args:
 96              ids: list of ids to delete
 97  
 98          Returns:
 99              ids deleted
100          """
101  
102          if self.cluster:
103              return self.cluster.delete(ids)
104  
105          return super().delete(ids)
106  
107      def reindex(self, config, function=None):
108          """
109          Recreates this embeddings index using config. This method only works if document content storage is enabled.
110  
111          Args:
112              config: new config
113              function: optional function to prepare content for indexing
114          """
115  
116          if self.cluster:
117              self.cluster.reindex(config, function)
118          else:
119              super().reindex(config, function)
120  
121      def count(self):
122          """
123          Total number of elements in this embeddings index.
124  
125          Returns:
126              number of elements in embeddings index
127          """
128  
129          if self.cluster:
130              return self.cluster.count()
131  
132          return super().count()
133  
134      def limit(self, limit):
135          """
136          Parses the number of results to return from the request. Allows range of 1-250, with a default of 10.
137  
138          Args:
139              limit: limit parameter
140  
141          Returns:
142              bounded limit
143          """
144  
145          # Return between 1 and 250 results, defaults to 10
146          return max(1, min(250, int(limit) if limit else 10))
147  
148      def weights(self, weights):
149          """
150          Parses the weights parameter from the request.
151  
152          Args:
153              weights: weights parameter
154  
155          Returns:
156              weights
157          """
158  
159          return float(weights) if weights else weights