haystack/examples/getting_started/rag_custom_data.py

35 lines
1.5 KiB
Python
Raw Normal View History

from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils import build_rag_pipeline, build_indexing_pipeline
from haystack.pipeline_utils.indexing import download_files
# We are model agnostic :) In this getting started you can choose any OpenAI or Huggingface TGI generation model
generation_model = "gpt-3.5-turbo"
API_KEY = "sk-..." # ADD YOUR KEY HERE
# We support many different databases. Here, we load a simple and lightweight in-memory database.
document_store = InMemoryDocumentStore()
# Download example files from web
files = download_files(sources=["http://www.paulgraham.com/superlinear.html"])
# Pipelines are our main abstratcion.
# Here we create a pipeline that can index TXT and HTML. You can also use your own private files.
indexing_pipeline = build_indexing_pipeline(
document_store=document_store,
embedding_model="intfloat/e5-base-v2",
supported_mime_types=["text/plain", "text/html"], # "application/pdf"
)
indexing_pipeline.run(files=files) # you can also supply files=[path_to_directory], which is searched recursively
# RAG pipeline with vector-based retriever + LLM
rag_pipeline = build_rag_pipeline(
document_store=document_store,
embedding_model="intfloat/e5-base-v2",
generation_model=generation_model,
llm_api_key=API_KEY,
)
# For details, like which documents were used to generate the answer, look into the result object
result = rag_pipeline.run(query="What are superlinear returns and why are they important?")
print(result.data)