Vladimir Blagojevic 008a322023
feat: Add Indexing Pipeline (#6424)
* Add build_indexing_pipeline utils function

* Pylint fixes

* Move into another package to avoid circular deps

* Revert change

* Revert haystack/utils/__init__.py change

* Add example

* Use DocumentStore type, remove typing checks
2023-12-04 16:08:53 +01:00

15 lines
618 B
Python

from pathlib import Path
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipeline_utils import build_indexing_pipeline
# We support many different databases. Here we load a simple and lightweight in-memory document store.
document_store = InMemoryDocumentStore()
# Let's now build indexing pipeline that indexes PDFs and text files from a test folder.
indexing_pipeline = build_indexing_pipeline(
document_store=document_store, embedding_model="sentence-transformers/all-mpnet-base-v2"
)
result = indexing_pipeline.run(files=list(Path("../../test/test_files").iterdir()))
print(result)