similarity.py
1 """ 2 Basic similarity search example. Used in the original txtai demo. 3 4 Requires streamlit to be installed. 5 pip install streamlit 6 """ 7 8 import os 9 10 import streamlit as st 11 12 from txtai.embeddings import Embeddings 13 14 15 class Application: 16 """ 17 Main application. 18 """ 19 20 def __init__(self): 21 """ 22 Creates a new application. 23 """ 24 25 # Create embeddings model, backed by sentence-transformers & transformers 26 self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"}) 27 28 def run(self): 29 """ 30 Runs a Streamlit application. 31 """ 32 33 st.title("Similarity Search") 34 st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.") 35 36 data = [ 37 "US tops 5 million confirmed virus cases", 38 "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg", 39 "Beijing mobilises invasion craft along coast as Taiwan tensions escalate", 40 "The National Park Service warns against sacrificing slower friends in a bear attack", 41 "Maine man wins $1M from $25 lottery ticket", 42 "Make huge profits without work, earn up to $100,000 a day", 43 ] 44 45 data = st.text_area("Data", value="\n".join(data)) 46 query = st.text_input("Query") 47 48 data = data.split("\n") 49 50 if query: 51 # Get index of best section that best matches query 52 uid = self.embeddings.similarity(query, data)[0][0] 53 st.write(data[uid]) 54 55 56 @st.cache(allow_output_mutation=True) 57 def create(): 58 """ 59 Creates and caches a Streamlit application. 60 61 Returns: 62 Application 63 """ 64 65 return Application() 66 67 68 if __name__ == "__main__": 69 os.environ["TOKENIZERS_PARALLELISM"] = "false" 70 71 # Create and run application 72 app = create() 73 app.run()