haystack/examples/getting_started/indexing.py

from pathlib import Path

from haystack.document_stores import InMemoryDocumentStore
from haystack.pipeline_utils import build_indexing_pipeline

# We support many different databases. Here we load a simple and lightweight in-memory document store.
document_store = InMemoryDocumentStore()

# Let's now build indexing pipeline that indexes PDFs and text files from a test folder.
indexing_pipeline = build_indexing_pipeline(
    document_store=document_store, embedding_model="sentence-transformers/all-mpnet-base-v2"
)
result = indexing_pipeline.run(files=list(Path("../../test/test_files").iterdir()))
print(result)
feat: Add Indexing Pipeline (#6424) * Add build_indexing_pipeline utils function * Pylint fixes * Move into another package to avoid circular deps * Revert change * Revert haystack/utils/__init__.py change * Add example * Use DocumentStore type, remove typing checks 2023-12-04 16:08:53 +01:00			`from pathlib import Path`

			`from haystack.document_stores import InMemoryDocumentStore`
			`from haystack.pipeline_utils import build_indexing_pipeline`

			`# We support many different databases. Here we load a simple and lightweight in-memory document store.`
			`document_store = InMemoryDocumentStore()`

			`# Let's now build indexing pipeline that indexes PDFs and text files from a test folder.`
			`indexing_pipeline = build_indexing_pipeline(`
			`document_store=document_store, embedding_model="sentence-transformers/all-mpnet-base-v2"`
			`)`
			`result = indexing_pipeline.run(files=list(Path("../../test/test_files").iterdir()))`
			`print(result)`