dify/api/extensions/storage/aws_s3_storage.py

88 lines
3.2 KiB
Python
Raw Permalink Normal View History

2024-10-12 18:47:59 +08:00
import logging
2024-04-29 18:22:03 +08:00
from collections.abc import Generator
2024-12-24 18:38:51 +08:00
import boto3 # type: ignore
from botocore.client import Config # type: ignore
from botocore.exceptions import ClientError # type: ignore
2024-04-29 18:22:03 +08:00
from configs import dify_config
2024-04-29 18:22:03 +08:00
from extensions.storage.base_storage import BaseStorage
2024-10-12 18:47:59 +08:00
logger = logging.getLogger(__name__)
2024-04-29 18:22:03 +08:00
class AwsS3Storage(BaseStorage):
"""Implementation for Amazon Web Services S3 storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.S3_BUCKET_NAME
if dify_config.S3_USE_AWS_MANAGED_IAM:
2024-10-12 18:47:59 +08:00
logger.info("Using AWS managed IAM role for S3")
2024-06-14 15:19:59 +08:00
session = boto3.Session()
region_name = dify_config.S3_REGION
2024-10-12 18:47:59 +08:00
self.client = session.client(service_name="s3", region_name=region_name)
2024-06-14 15:19:59 +08:00
else:
2024-10-12 18:47:59 +08:00
logger.info("Using ak and sk for S3")
2024-06-14 15:19:59 +08:00
self.client = boto3.client(
"s3",
aws_secret_access_key=dify_config.S3_SECRET_KEY,
aws_access_key_id=dify_config.S3_ACCESS_KEY,
endpoint_url=dify_config.S3_ENDPOINT,
region_name=dify_config.S3_REGION,
config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
)
# create bucket
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
# if bucket not exists, create it
if e.response["Error"]["Code"] == "404":
self.client.create_bucket(Bucket=self.bucket_name)
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
elif e.response["Error"]["Code"] == "403":
pass
else:
# other error, raise exception
raise
2024-04-29 18:22:03 +08:00
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
def load_once(self, filename: str) -> bytes:
try:
2024-12-24 18:38:51 +08:00
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
2024-04-29 18:22:03 +08:00
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
2024-04-29 18:22:03 +08:00
raise FileNotFoundError("File not found")
else:
raise
return data
def load_stream(self, filename: str) -> Generator:
try:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise FileNotFoundError("file not found")
elif "reached max retries" in str(ex):
raise ValueError("please do not request the same file too frequently")
else:
raise
2024-04-29 18:22:03 +08:00
def download(self, filename, target_filepath):
self.client.download_file(self.bucket_name, filename, target_filepath)
2024-04-29 18:22:03 +08:00
def exists(self, filename):
try:
self.client.head_object(Bucket=self.bucket_name, Key=filename)
return True
except:
return False
2024-04-29 18:22:03 +08:00
def delete(self, filename):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)