Account for nested files in s3 reader

This commit is contained in:
EmptyCrown 2023-02-10 15:44:56 -08:00
parent cd3219de68
commit e5dc38be2b

View File

@ -4,6 +4,7 @@ A loader that fetches a file or iterates through a directory on AWS S3.
""" """
import tempfile import tempfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
from gpt_index import download_loader from gpt_index import download_loader
@ -68,12 +69,16 @@ class S3Reader(BaseReader):
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
if self.key: if self.key:
filepath = f"{temp_dir}/{self.key}" suffix = Path(self.key).suffix
filepath = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
s3_client.download_file(self.bucket, self.key, filepath) s3_client.download_file(self.bucket, self.key, filepath)
else: else:
bucket = s3.Bucket(self.bucket) bucket = s3.Bucket(self.bucket)
for obj in bucket.objects.filter(Prefix=self.prefix): for obj in bucket.objects.filter(Prefix=self.prefix):
filepath = f"{temp_dir}/{obj.key}" suffix = Path(obj.key).suffix
filepath = (
f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
)
s3_client.download_file(self.bucket, obj.key, filepath) s3_client.download_file(self.bucket, obj.key, filepath)
SimpleDirectoryReader = download_loader("SimpleDirectoryReader") SimpleDirectoryReader = download_loader("SimpleDirectoryReader")