| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | import base64 | 
					
						
							|  |  |  | import hashlib | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | import hmac | 
					
						
							|  |  |  | import logging | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import time | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | from mimetypes import guess_extension, guess_type | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | from typing import Optional, Union | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | from uuid import uuid4 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-21 21:22:06 +08:00
										 |  |  | import httpx | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  | from configs import dify_config | 
					
						
							| 
									
										
										
										
											2024-12-21 21:22:06 +08:00
										 |  |  | from core.helper import ssrf_proxy | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | from extensions.ext_database import db | 
					
						
							|  |  |  | from extensions.ext_storage import storage | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | from models.model import MessageFile | 
					
						
							|  |  |  | from models.tools import ToolFile | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | logger = logging.getLogger(__name__) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | class ToolFileManager: | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |     def sign_file(tool_file_id: str, extension: str) -> str: | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         sign file to get a temporary url | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         base_url = dify_config.FILES_URL | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         timestamp = str(int(time.time())) | 
					
						
							|  |  |  |         nonce = os.urandom(16).hex() | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}" | 
					
						
							|  |  |  |         secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() | 
					
						
							|  |  |  |         encoded_sign = base64.urlsafe_b64encode(sign).decode() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         verify signature | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}" | 
					
						
							|  |  |  |         secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() | 
					
						
							|  |  |  |         recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # verify signature | 
					
						
							|  |  |  |         if sign != recalculated_encoded_sign: | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         current_time = int(time.time()) | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |     def create_file_by_raw( | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         *, | 
					
						
							|  |  |  |         user_id: str, | 
					
						
							|  |  |  |         tenant_id: str, | 
					
						
							|  |  |  |         conversation_id: Optional[str], | 
					
						
							|  |  |  |         file_binary: bytes, | 
					
						
							|  |  |  |         mimetype: str, | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |     ) -> ToolFile: | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         extension = guess_extension(mimetype) or ".bin" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         unique_name = uuid4().hex | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         filename = f"{unique_name}{extension}" | 
					
						
							|  |  |  |         filepath = f"tools/{tenant_id}/{filename}" | 
					
						
							|  |  |  |         storage.save(filepath, file_binary) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         tool_file = ToolFile( | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             user_id=user_id, | 
					
						
							|  |  |  |             tenant_id=tenant_id, | 
					
						
							|  |  |  |             conversation_id=conversation_id, | 
					
						
							|  |  |  |             file_key=filepath, | 
					
						
							|  |  |  |             mimetype=mimetype, | 
					
						
							|  |  |  |             name=filename, | 
					
						
							|  |  |  |             size=len(file_binary), | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         db.session.add(tool_file) | 
					
						
							|  |  |  |         db.session.commit() | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         db.session.refresh(tool_file) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return tool_file | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |     def create_file_by_url( | 
					
						
							|  |  |  |         user_id: str, | 
					
						
							|  |  |  |         tenant_id: str, | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         conversation_id: str | None, | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         file_url: str, | 
					
						
							|  |  |  |     ) -> ToolFile: | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         # try to download image | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2024-12-21 21:22:06 +08:00
										 |  |  |             response = ssrf_proxy.get(file_url) | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             response.raise_for_status() | 
					
						
							|  |  |  |             blob = response.content | 
					
						
							| 
									
										
										
										
											2024-12-21 21:22:06 +08:00
										 |  |  |         except httpx.TimeoutException as e: | 
					
						
							|  |  |  |             raise ValueError(f"timeout when downloading file from {file_url}") | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |         mimetype = guess_type(file_url)[0] or "octet/stream" | 
					
						
							|  |  |  |         extension = guess_extension(mimetype) or ".bin" | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         unique_name = uuid4().hex | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         filename = f"{unique_name}{extension}" | 
					
						
							|  |  |  |         filepath = f"tools/{tenant_id}/{filename}" | 
					
						
							|  |  |  |         storage.save(filepath, blob) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         tool_file = ToolFile( | 
					
						
							|  |  |  |             user_id=user_id, | 
					
						
							|  |  |  |             tenant_id=tenant_id, | 
					
						
							|  |  |  |             conversation_id=conversation_id, | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             file_key=filepath, | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |             mimetype=mimetype, | 
					
						
							|  |  |  |             original_url=file_url, | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             name=filename, | 
					
						
							|  |  |  |             size=len(blob), | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         db.session.add(tool_file) | 
					
						
							|  |  |  |         db.session.commit() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return tool_file | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-02-09 15:21:33 +08:00
										 |  |  |     def get_file_binary(id: str) -> Union[tuple[bytes, str], None]: | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         get file binary | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param id: the id of the file | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :return: the binary of the file, mime type | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         tool_file = ( | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |             db.session.query(ToolFile) | 
					
						
							|  |  |  |             .filter( | 
					
						
							|  |  |  |                 ToolFile.id == id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             .first() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if not tool_file: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         blob = storage.load_once(tool_file.file_key) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return blob, tool_file.mimetype | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-02-09 15:21:33 +08:00
										 |  |  |     def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]: | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         get file binary | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param id: the id of the file | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :return: the binary of the file, mime type | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         message_file = ( | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |             db.session.query(MessageFile) | 
					
						
							|  |  |  |             .filter( | 
					
						
							|  |  |  |                 MessageFile.id == id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             .first() | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Check if message_file is not None | 
					
						
							|  |  |  |         if message_file is not None: | 
					
						
							|  |  |  |             # get tool file id | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             if message_file.url is not None: | 
					
						
							|  |  |  |                 tool_file_id = message_file.url.split("/")[-1] | 
					
						
							|  |  |  |                 # trim extension | 
					
						
							|  |  |  |                 tool_file_id = tool_file_id.split(".")[0] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 tool_file_id = None | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |         else: | 
					
						
							|  |  |  |             tool_file_id = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         tool_file = ( | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |             db.session.query(ToolFile) | 
					
						
							|  |  |  |             .filter( | 
					
						
							|  |  |  |                 ToolFile.id == tool_file_id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             .first() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if not tool_file: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         blob = storage.load_once(tool_file.file_key) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return blob, tool_file.mimetype | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |     def get_file_generator_by_tool_file_id(tool_file_id: str): | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         get file binary | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         :param tool_file_id: the id of the tool file | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         :return: the binary of the file, mime type | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         tool_file = ( | 
					
						
							| 
									
										
										
										
											2024-07-17 22:26:18 +08:00
										 |  |  |             db.session.query(ToolFile) | 
					
						
							|  |  |  |             .filter( | 
					
						
							|  |  |  |                 ToolFile.id == tool_file_id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             .first() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if not tool_file: | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |             return None, None | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         stream = storage.load_stream(tool_file.file_key) | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |         return stream, tool_file | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | # init tool_file_parser | 
					
						
							|  |  |  | from core.file.tool_file_parser import tool_file_manager | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  | tool_file_manager["manager"] = ToolFileManager |