2_rag_example.py
1 """ 2 RAG (Retrieval Augmented Generation) Example. 3 This demonstrates how to load documents, create embeddings, and query them. 4 """ 5 6 from langchain_community.llms import Ollama 7 from langchain.text_splitter import RecursiveCharacterTextSplitter 8 from langchain_community.vectorstores import Chroma 9 from langchain_community.embeddings import OllamaEmbeddings 10 from langchain.chains import RetrievalQA 11 from langchain.schema import Document 12 13 # Initialize Ollama 14 llm = Ollama( 15 model="gpt-oss:120b", 16 base_url="http://192.222.50.154:11434" 17 ) 18 19 # Initialize embeddings (using a smaller model for speed) 20 # Note: You can change this to use OpenAI embeddings or other providers 21 embeddings = OllamaEmbeddings( 22 model="gpt-oss:120b", 23 base_url="http://192.222.50.154:11434" 24 ) 25 26 def create_vector_db_from_text(texts: list[str]): 27 """ 28 Create a vector database from a list of text strings. 29 """ 30 # Create documents 31 documents = [Document(page_content=text) for text in texts] 32 33 # Split documents into chunks 34 text_splitter = RecursiveCharacterTextSplitter( 35 chunk_size=1000, 36 chunk_overlap=200 37 ) 38 splits = text_splitter.split_documents(documents) 39 40 # Create vector store 41 vectorstore = Chroma.from_documents( 42 documents=splits, 43 embedding=embeddings, 44 collection_name="my_collection" 45 ) 46 47 return vectorstore 48 49 def query_documents(vectorstore, query: str): 50 """ 51 Query the vector database and get an answer. 52 """ 53 qa_chain = RetrievalQA.from_chain_type( 54 llm=llm, 55 chain_type="stuff", 56 retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), 57 return_source_documents=True 58 ) 59 60 result = qa_chain({"query": query}) 61 return result 62 63 if __name__ == "__main__": 64 # Sample documents about a fictional company 65 sample_docs = [ 66 """ 67 Acme Corporation is a technology company founded in 2020. 68 We specialize in artificial intelligence and machine learning solutions. 69 Our main products include AutoML Platform, AI Vision, and NLP Suite. 70 """, 71 """ 72 Acme Corporation has offices in San Francisco, New York, and London. 73 Our headquarters is located in San Francisco, California. 74 We have over 500 employees worldwide. 75 """, 76 """ 77 Acme Corporation's AI Vision product can analyze images and videos 78 for object detection, facial recognition, and scene understanding. 79 It achieves 95% accuracy on standard benchmarks and processes 80 1000 images per second. 81 """, 82 """ 83 The AutoML Platform by Acme Corporation allows users to build 84 machine learning models without writing code. It supports 85 classification, regression, and clustering tasks. The platform 86 has been used by over 10,000 companies. 87 """ 88 ] 89 90 print("Creating vector database from documents...") 91 vectorstore = create_vector_db_from_text(sample_docs) 92 print("Vector database created!\n") 93 94 # Example queries 95 queries = [ 96 "What products does Acme Corporation offer?", 97 "Where are Acme Corporation's offices located?", 98 "What can AI Vision do?" 99 ] 100 101 for query in queries: 102 print("=" * 60) 103 print(f"Query: {query}") 104 print("=" * 60) 105 result = query_documents(vectorstore, query) 106 print(f"\nAnswer: {result['result']}\n") 107 print("Source documents used:") 108 for i, doc in enumerate(result['source_documents'], 1): 109 print(f"{i}. {doc.page_content[:100]}...") 110 print()