expand example to use bytestream (#6718)

This commit is contained in:
ZanSara 2024-01-11 12:04:25 +01:00 committed by GitHub
parent e1ec4e5e4d
commit 79d67b0338
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,8 +1,10 @@
from typing import Dict, Any
from pathlib import Path
from datetime import datetime
import os
from haystack import Pipeline
from haystack.dataclasses import ByteStream
from haystack.components.others import Multiplexer
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
@ -34,9 +36,20 @@ p.connect("joiner.documents", "cleaner.documents")
p.connect("cleaner.documents", "splitter.documents")
p.connect("splitter.documents", "writer.documents")
# Add metadata to your files by using ByteStream
sources = []
for position, path in enumerate(list(Path(".").iterdir())):
if path.is_file():
# Create the ByteStream
source = ByteStream.from_file_path(path)
# Add the metadata
source.meta["path"] = path
source.meta["position"] = position
sources.append(source)
result = p.run(
{
"file_type_router": {"sources": list(Path(".").iterdir())},
"file_type_router": {"sources": sources},
"metadata_multiplexer": {"value": {"date_added": datetime.now().isoformat()}},
}
)