/ examples / images.py
images.py
  1  """
  2  Builds a similarity index for a directory of images
  3  
  4  Requires streamlit to be installed.
  5    pip install streamlit
  6  """
  7  
  8  import glob
  9  import os
 10  import sys
 11  
 12  import streamlit as st
 13  
 14  from PIL import Image
 15  
 16  from txtai.embeddings import Embeddings
 17  
 18  
 19  class Application:
 20      """
 21      Main application
 22      """
 23  
 24      def __init__(self, directory):
 25          """
 26          Creates a new application.
 27  
 28          Args:
 29              directory: directory of images
 30          """
 31  
 32          self.embeddings = self.build(directory)
 33  
 34      def build(self, directory):
 35          """
 36          Builds an image embeddings index.
 37  
 38          Args:
 39              directory: directory with images
 40  
 41          Returns:
 42              Embeddings index
 43          """
 44  
 45          embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
 46          embeddings.index(self.images(directory))
 47  
 48          # Update model to support multilingual queries
 49          embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
 50          embeddings.model = embeddings.loadvectors()
 51  
 52          return embeddings
 53  
 54      def images(self, directory):
 55          """
 56          Generator that loops over each image in a directory.
 57  
 58          Args:
 59              directory: directory with images
 60          """
 61  
 62          for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
 63              yield (path, Image.open(path), None)
 64  
 65      def run(self):
 66          """
 67          Runs a Streamlit application.
 68          """
 69  
 70          st.title("Image search")
 71  
 72          st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
 73          st.markdown(
 74              "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
 75              + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
 76              + "the same space, enabling image similarity search. txtai can directly utilize these models."
 77          )
 78  
 79          query = st.text_input("Search query:")
 80          if query:
 81              index, _ = self.embeddings.search(query, 1)[0]
 82              st.image(Image.open(index))
 83  
 84  
 85  @st.cache(allow_output_mutation=True)
 86  def create(directory):
 87      """
 88      Creates and caches a Streamlit application.
 89  
 90      Args:
 91          directory: directory of images to index
 92  
 93      Returns:
 94          Application
 95      """
 96  
 97      return Application(directory)
 98  
 99  
100  if __name__ == "__main__":
101      os.environ["TOKENIZERS_PARALLELISM"] = "false"
102  
103      # Create and run application
104      app = create(sys.argv[1])
105      app.run()