Cradicle Explorer

/ examples / similarity.py
similarity.py
 1  """
 2  Basic similarity search example. Used in the original txtai demo.
 3  
 4  Requires streamlit to be installed.
 5    pip install streamlit
 6  """
 7  
 8  import os
 9  
10  import streamlit as st
11  
12  from txtai.embeddings import Embeddings
13  
14  
15  class Application:
16      """
17      Main application.
18      """
19  
20      def __init__(self):
21          """
22          Creates a new application.
23          """
24  
25          # Create embeddings model, backed by sentence-transformers & transformers
26          self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
27  
28      def run(self):
29          """
30          Runs a Streamlit application.
31          """
32  
33          st.title("Similarity Search")
34          st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.")
35  
36          data = [
37              "US tops 5 million confirmed virus cases",
38              "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
39              "Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
40              "The National Park Service warns against sacrificing slower friends in a bear attack",
41              "Maine man wins $1M from $25 lottery ticket",
42              "Make huge profits without work, earn up to $100,000 a day",
43          ]
44  
45          data = st.text_area("Data", value="\n".join(data))
46          query = st.text_input("Query")
47  
48          data = data.split("\n")
49  
50          if query:
51              # Get index of best section that best matches query
52              uid = self.embeddings.similarity(query, data)[0][0]
53              st.write(data[uid])
54  
55  
56  @st.cache(allow_output_mutation=True)
57  def create():
58      """
59      Creates and caches a Streamlit application.
60  
61      Returns:
62          Application
63      """
64  
65      return Application()
66  
67  
68  if __name__ == "__main__":
69      os.environ["TOKENIZERS_PARALLELISM"] = "false"
70  
71      # Create and run application
72      app = create()
73      app.run()