images.py
1 """ 2 Builds a similarity index for a directory of images 3 4 Requires streamlit to be installed. 5 pip install streamlit 6 """ 7 8 import glob 9 import os 10 import sys 11 12 import streamlit as st 13 14 from PIL import Image 15 16 from txtai.embeddings import Embeddings 17 18 19 class Application: 20 """ 21 Main application 22 """ 23 24 def __init__(self, directory): 25 """ 26 Creates a new application. 27 28 Args: 29 directory: directory of images 30 """ 31 32 self.embeddings = self.build(directory) 33 34 def build(self, directory): 35 """ 36 Builds an image embeddings index. 37 38 Args: 39 directory: directory with images 40 41 Returns: 42 Embeddings index 43 """ 44 45 embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"}) 46 embeddings.index(self.images(directory)) 47 48 # Update model to support multilingual queries 49 embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1" 50 embeddings.model = embeddings.loadvectors() 51 52 return embeddings 53 54 def images(self, directory): 55 """ 56 Generator that loops over each image in a directory. 57 58 Args: 59 directory: directory with images 60 """ 61 62 for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"): 63 yield (path, Image.open(path), None) 64 65 def run(self): 66 """ 67 Runs a Streamlit application. 68 """ 69 70 st.title("Image search") 71 72 st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ") 73 st.markdown( 74 "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) " 75 + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into " 76 + "the same space, enabling image similarity search. txtai can directly utilize these models." 77 ) 78 79 query = st.text_input("Search query:") 80 if query: 81 index, _ = self.embeddings.search(query, 1)[0] 82 st.image(Image.open(index)) 83 84 85 @st.cache(allow_output_mutation=True) 86 def create(directory): 87 """ 88 Creates and caches a Streamlit application. 89 90 Args: 91 directory: directory of images to index 92 93 Returns: 94 Application 95 """ 96 97 return Application(directory) 98 99 100 if __name__ == "__main__": 101 os.environ["TOKENIZERS_PARALLELISM"] = "false" 102 103 # Create and run application 104 app = create(sys.argv[1]) 105 app.run()