| 
									
										
										
										
											2024-10-12 18:47:59 +08:00
										 |  |  | import logging | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | from collections.abc import Generator | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  | import boto3  # type: ignore | 
					
						
							|  |  |  | from botocore.client import Config  # type: ignore | 
					
						
							|  |  |  | from botocore.exceptions import ClientError  # type: ignore | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-22 14:04:59 +08:00
										 |  |  | from configs import dify_config | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | from extensions.storage.base_storage import BaseStorage | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-12 18:47:59 +08:00
										 |  |  | logger = logging.getLogger(__name__) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-09 14:15:27 +08:00
										 |  |  | class AwsS3Storage(BaseStorage): | 
					
						
							|  |  |  |     """Implementation for Amazon Web Services S3 storage.""" | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-22 13:01:37 +08:00
										 |  |  |     def __init__(self): | 
					
						
							|  |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2024-10-22 14:04:59 +08:00
										 |  |  |         self.bucket_name = dify_config.S3_BUCKET_NAME | 
					
						
							|  |  |  |         if dify_config.S3_USE_AWS_MANAGED_IAM: | 
					
						
							| 
									
										
										
										
											2024-10-12 18:47:59 +08:00
										 |  |  |             logger.info("Using AWS managed IAM role for S3") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 15:19:59 +08:00
										 |  |  |             session = boto3.Session() | 
					
						
							| 
									
										
										
										
											2024-10-22 14:04:59 +08:00
										 |  |  |             region_name = dify_config.S3_REGION | 
					
						
							| 
									
										
										
										
											2024-10-12 18:47:59 +08:00
										 |  |  |             self.client = session.client(service_name="s3", region_name=region_name) | 
					
						
							| 
									
										
										
										
											2024-06-14 15:19:59 +08:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2024-10-12 18:47:59 +08:00
										 |  |  |             logger.info("Using ak and sk for S3") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 15:19:59 +08:00
										 |  |  |             self.client = boto3.client( | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |                 "s3", | 
					
						
							| 
									
										
										
										
											2024-10-22 14:04:59 +08:00
										 |  |  |                 aws_secret_access_key=dify_config.S3_SECRET_KEY, | 
					
						
							|  |  |  |                 aws_access_key_id=dify_config.S3_ACCESS_KEY, | 
					
						
							|  |  |  |                 endpoint_url=dify_config.S3_ENDPOINT, | 
					
						
							|  |  |  |                 region_name=dify_config.S3_REGION, | 
					
						
							| 
									
										
										
										
											2025-02-07 22:35:24 +08:00
										 |  |  |                 config=Config( | 
					
						
							|  |  |  |                     s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}, | 
					
						
							|  |  |  |                     request_checksum_calculation="when_required", | 
					
						
							|  |  |  |                     response_checksum_validation="when_required", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2024-08-22 09:45:42 +08:00
										 |  |  |         # create bucket | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             self.client.head_bucket(Bucket=self.bucket_name) | 
					
						
							|  |  |  |         except ClientError as e: | 
					
						
							|  |  |  |             # if bucket not exists, create it | 
					
						
							|  |  |  |             if e.response["Error"]["Code"] == "404": | 
					
						
							|  |  |  |                 self.client.create_bucket(Bucket=self.bucket_name) | 
					
						
							| 
									
										
										
										
											2024-08-28 13:57:45 +08:00
										 |  |  |             # if bucket is not accessible, pass, maybe the bucket is existing but not accessible | 
					
						
							|  |  |  |             elif e.response["Error"]["Code"] == "403": | 
					
						
							|  |  |  |                 pass | 
					
						
							| 
									
										
										
										
											2024-08-22 09:45:42 +08:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 # other error, raise exception | 
					
						
							|  |  |  |                 raise | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def save(self, filename, data): | 
					
						
							|  |  |  |         self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def load_once(self, filename: str) -> bytes: | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |             data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read() | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  |         except ClientError as ex: | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |             if ex.response["Error"]["Code"] == "NoSuchKey": | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  |                 raise FileNotFoundError("File not found") | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise | 
					
						
							|  |  |  |         return data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def load_stream(self, filename: str) -> Generator: | 
					
						
							| 
									
										
										
										
											2024-10-25 10:11:25 +08:00
										 |  |  |         try: | 
					
						
							|  |  |  |             response = self.client.get_object(Bucket=self.bucket_name, Key=filename) | 
					
						
							|  |  |  |             yield from response["Body"].iter_chunks() | 
					
						
							|  |  |  |         except ClientError as ex: | 
					
						
							|  |  |  |             if ex.response["Error"]["Code"] == "NoSuchKey": | 
					
						
							| 
									
										
										
										
											2024-12-23 16:28:54 +08:00
										 |  |  |                 raise FileNotFoundError("file not found") | 
					
						
							|  |  |  |             elif "reached max retries" in str(ex): | 
					
						
							|  |  |  |                 raise ValueError("please do not request the same file too frequently") | 
					
						
							| 
									
										
										
										
											2024-10-25 10:11:25 +08:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def download(self, filename, target_filepath): | 
					
						
							| 
									
										
										
										
											2024-10-15 09:42:39 +09:00
										 |  |  |         self.client.download_file(self.bucket_name, filename, target_filepath) | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def exists(self, filename): | 
					
						
							| 
									
										
										
										
											2024-10-15 09:42:39 +09:00
										 |  |  |         try: | 
					
						
							|  |  |  |             self.client.head_object(Bucket=self.bucket_name, Key=filename) | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |         except: | 
					
						
							|  |  |  |             return False | 
					
						
							| 
									
										
										
										
											2024-04-29 18:22:03 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def delete(self, filename): | 
					
						
							|  |  |  |         self.client.delete_object(Bucket=self.bucket_name, Key=filename) |