rag_quickstart.py
1 """ 2 RAG Quick Start 3 Easy to use way to get started with RAG using YOUR data 4 5 For a complete application see this: https://github.com/neuml/rag 6 7 TxtAI has many example notebooks covering everything the framework provides 8 Examples: https://neuml.github.io/txtai/examples 9 10 Install TxtAI 11 pip install txtai[pipeline-data] 12 """ 13 14 # pylint: disable=C0103 15 import os 16 17 from txtai import Embeddings, RAG 18 from txtai.pipeline import Textractor 19 20 # Step 1: Collect files from local directory 21 # 22 # Defaults to "data". Set to whereever your files are. 23 path = "data" 24 files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] 25 26 # Step 2: Text Extraction / Chunking 27 # 28 # Using section based chunking here. More complex options available such as semantic chunking, iterative chunking etc. 29 # Documentation: https://neuml.github.io/txtai/pipeline/data/textractor 30 # Supports Chonkie chunking as well: https://docs.chonkie.ai/oss/chunkers/overview 31 textractor = Textractor(backend="docling", sections=True) 32 chunks = [] 33 for f in files: 34 for chunk in textractor(f): 35 chunks.append((f, chunk)) 36 37 # Step 3: Build an embeddings database 38 # 39 # The `path` parameter sets the vector embeddings model. Supports Hugging Face models, llama.cpp, Ollama, vLLM and more. 40 # Documentation: https://neuml.github.io/txtai/embeddings/ 41 embeddings = Embeddings(content=True, path="Qwen/Qwen3-Embedding-0.6B", maxlength=2048) 42 embeddings.index(chunks) 43 44 # Step 4: Create RAG pipeline 45 # 46 # Combines an embeddings database and an LLM. 47 # Supports Hugging Face models, llama.cpp, Ollama, vLLM and more 48 # Documentation: https://neuml.github.io/txtai/pipeline/text/rag 49 50 # User prompt template 51 template = """ 52 Answer the following question using the provided context. 53 54 Question: 55 {question} 56 57 Context: 58 {context} 59 """ 60 61 rag = RAG( 62 embeddings, 63 "Qwen/Qwen3-0.6B", 64 system="You are a friendly assistant", 65 template=template, 66 output="flatten", 67 ) 68 69 question = "Summarize the main advancements made by BERT" 70 print(rag(question, maxlength=2048, stripthink=True))