mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-18 13:42:06 +00:00
Account for nested files in s3 reader
This commit is contained in:
parent
cd3219de68
commit
e5dc38be2b
@ -4,6 +4,7 @@ A loader that fetches a file or iterates through a directory on AWS S3.
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from gpt_index import download_loader
|
from gpt_index import download_loader
|
||||||
@ -68,12 +69,16 @@ class S3Reader(BaseReader):
|
|||||||
|
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
if self.key:
|
if self.key:
|
||||||
filepath = f"{temp_dir}/{self.key}"
|
suffix = Path(self.key).suffix
|
||||||
|
filepath = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
|
||||||
s3_client.download_file(self.bucket, self.key, filepath)
|
s3_client.download_file(self.bucket, self.key, filepath)
|
||||||
else:
|
else:
|
||||||
bucket = s3.Bucket(self.bucket)
|
bucket = s3.Bucket(self.bucket)
|
||||||
for obj in bucket.objects.filter(Prefix=self.prefix):
|
for obj in bucket.objects.filter(Prefix=self.prefix):
|
||||||
filepath = f"{temp_dir}/{obj.key}"
|
suffix = Path(obj.key).suffix
|
||||||
|
filepath = (
|
||||||
|
f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
|
||||||
|
)
|
||||||
s3_client.download_file(self.bucket, obj.key, filepath)
|
s3_client.download_file(self.bucket, obj.key, filepath)
|
||||||
|
|
||||||
SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
|
SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user