mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-08 21:28:00 +00:00
expand example to use bytestream (#6718)
This commit is contained in:
parent
e1ec4e5e4d
commit
79d67b0338
@ -1,8 +1,10 @@
|
||||
from typing import Dict, Any
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
from haystack import Pipeline
|
||||
from haystack.dataclasses import ByteStream
|
||||
from haystack.components.others import Multiplexer
|
||||
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
@ -34,9 +36,20 @@ p.connect("joiner.documents", "cleaner.documents")
|
||||
p.connect("cleaner.documents", "splitter.documents")
|
||||
p.connect("splitter.documents", "writer.documents")
|
||||
|
||||
# Add metadata to your files by using ByteStream
|
||||
sources = []
|
||||
for position, path in enumerate(list(Path(".").iterdir())):
|
||||
if path.is_file():
|
||||
# Create the ByteStream
|
||||
source = ByteStream.from_file_path(path)
|
||||
# Add the metadata
|
||||
source.meta["path"] = path
|
||||
source.meta["position"] = position
|
||||
sources.append(source)
|
||||
|
||||
result = p.run(
|
||||
{
|
||||
"file_type_router": {"sources": list(Path(".").iterdir())},
|
||||
"file_type_router": {"sources": sources},
|
||||
"metadata_multiplexer": {"value": {"date_added": datetime.now().isoformat()}},
|
||||
}
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user