mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-14 12:31:10 +00:00

* Add build_indexing_pipeline utils function * Pylint fixes * Move into another package to avoid circular deps * Revert change * Revert haystack/utils/__init__.py change * Add example * Use DocumentStore type, remove typing checks
15 lines
618 B
Python
15 lines
618 B
Python
from pathlib import Path
|
|
|
|
from haystack.document_stores import InMemoryDocumentStore
|
|
from haystack.pipeline_utils import build_indexing_pipeline
|
|
|
|
# We support many different databases. Here we load a simple and lightweight in-memory document store.
|
|
document_store = InMemoryDocumentStore()
|
|
|
|
# Let's now build indexing pipeline that indexes PDFs and text files from a test folder.
|
|
indexing_pipeline = build_indexing_pipeline(
|
|
document_store=document_store, embedding_model="sentence-transformers/all-mpnet-base-v2"
|
|
)
|
|
result = indexing_pipeline.run(files=list(Path("../../test/test_files").iterdir()))
|
|
print(result)
|