| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2024-07-22 15:29:39 +08:00
										 |  |  | from collections.abc import Mapping, Sequence | 
					
						
							|  |  |  | from typing import Any, Union | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  | from urllib.parse import parse_qs, urlparse | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | import requests | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-13 14:44:10 +08:00
										 |  |  | from core.file.file_obj import FileBelongsTo, FileExtraConfig, FileTransferMethod, FileType, FileVar | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | from extensions.ext_database import db | 
					
						
							|  |  |  | from models.account import Account | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  | from models.model import EndUser, MessageFile, UploadFile | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | from services.file_service import IMAGE_EXTENSIONS | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class MessageFileParser: | 
					
						
							|  |  |  |     def __init__(self, tenant_id: str, app_id: str) -> None: | 
					
						
							|  |  |  |         self.tenant_id = tenant_id | 
					
						
							|  |  |  |         self.app_id = app_id | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |     def validate_and_transform_files_arg( | 
					
						
							|  |  |  |         self, files: Sequence[Mapping[str, Any]], file_extra_config: FileExtraConfig, user: Union[Account, EndUser] | 
					
						
							|  |  |  |     ) -> list[FileVar]: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         validate and transform files arg | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param files: | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         :param file_extra_config: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         :param user: | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         for file in files: | 
					
						
							|  |  |  |             if not isinstance(file, dict): | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                 raise ValueError("Invalid file format, must be dict") | 
					
						
							|  |  |  |             if not file.get("type"): | 
					
						
							|  |  |  |                 raise ValueError("Missing file type") | 
					
						
							|  |  |  |             FileType.value_of(file.get("type")) | 
					
						
							|  |  |  |             if not file.get("transfer_method"): | 
					
						
							|  |  |  |                 raise ValueError("Missing file transfer method") | 
					
						
							|  |  |  |             FileTransferMethod.value_of(file.get("transfer_method")) | 
					
						
							|  |  |  |             if file.get("transfer_method") == FileTransferMethod.REMOTE_URL.value: | 
					
						
							|  |  |  |                 if not file.get("url"): | 
					
						
							|  |  |  |                     raise ValueError("Missing file url") | 
					
						
							|  |  |  |                 if not file.get("url").startswith("http"): | 
					
						
							|  |  |  |                     raise ValueError("Invalid file url") | 
					
						
							|  |  |  |             if file.get("transfer_method") == FileTransferMethod.LOCAL_FILE.value and not file.get("upload_file_id"): | 
					
						
							|  |  |  |                 raise ValueError("Missing file upload_file_id") | 
					
						
							|  |  |  |             if file.get("transform_method") == FileTransferMethod.TOOL_FILE.value and not file.get("tool_file_id"): | 
					
						
							|  |  |  |                 raise ValueError("Missing file tool_file_id") | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # transform files to file objs | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         type_file_objs = self._to_file_objs(files, file_extra_config) | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # validate files | 
					
						
							|  |  |  |         new_files = [] | 
					
						
							|  |  |  |         for file_type, file_objs in type_file_objs.items(): | 
					
						
							|  |  |  |             if file_type == FileType.IMAGE: | 
					
						
							|  |  |  |                 # parse and validate files | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |                 image_config = file_extra_config.image_config | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # check if image file feature is enabled | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |                 if not image_config: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # Validate number of files | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                 if len(files) > image_config["number_limits"]: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                     raise ValueError(f"Number of image files exceeds the maximum limit {image_config['number_limits']}") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for file_obj in file_objs: | 
					
						
							|  |  |  |                     # Validate transfer method | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     if file_obj.transfer_method.value not in image_config["transfer_methods"]: | 
					
						
							|  |  |  |                         raise ValueError(f"Invalid transfer method: {file_obj.transfer_method.value}") | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                     # Validate file type | 
					
						
							|  |  |  |                     if file_obj.type != FileType.IMAGE: | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                         raise ValueError(f"Invalid file type: {file_obj.type}") | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                     if file_obj.transfer_method == FileTransferMethod.REMOTE_URL: | 
					
						
							|  |  |  |                         # check remote url valid and is image | 
					
						
							|  |  |  |                         result, error = self._check_image_remote_url(file_obj.url) | 
					
						
							|  |  |  |                         if result is False: | 
					
						
							|  |  |  |                             raise ValueError(error) | 
					
						
							|  |  |  |                     elif file_obj.transfer_method == FileTransferMethod.LOCAL_FILE: | 
					
						
							|  |  |  |                         # get upload file from upload_file_id | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                         upload_file = ( | 
					
						
							|  |  |  |                             db.session.query(UploadFile) | 
					
						
							|  |  |  |                             .filter( | 
					
						
							|  |  |  |                                 UploadFile.id == file_obj.related_id, | 
					
						
							|  |  |  |                                 UploadFile.tenant_id == self.tenant_id, | 
					
						
							|  |  |  |                                 UploadFile.created_by == user.id, | 
					
						
							|  |  |  |                                 UploadFile.created_by_role == ("account" if isinstance(user, Account) else "end_user"), | 
					
						
							|  |  |  |                                 UploadFile.extension.in_(IMAGE_EXTENSIONS), | 
					
						
							|  |  |  |                             ) | 
					
						
							|  |  |  |                             .first() | 
					
						
							|  |  |  |                         ) | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                         # check upload file is belong to tenant and user | 
					
						
							|  |  |  |                         if not upload_file: | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                             raise ValueError("Invalid upload file") | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                     new_files.append(file_obj) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # return all file objs | 
					
						
							|  |  |  |         return new_files | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-15 12:43:18 +08:00
										 |  |  |     def transform_message_files(self, files: list[MessageFile], file_extra_config: FileExtraConfig): | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         transform message files | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param files: | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         :param file_extra_config: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         # transform files to file objs | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         type_file_objs = self._to_file_objs(files, file_extra_config) | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # return all file objs | 
					
						
							|  |  |  |         return [file_obj for file_objs in type_file_objs.values() for file_obj in file_objs] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |     def _to_file_objs( | 
					
						
							|  |  |  |         self, files: list[Union[dict, MessageFile]], file_extra_config: FileExtraConfig | 
					
						
							|  |  |  |     ) -> dict[FileType, list[FileVar]]: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         transform files to file objs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param files: | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         :param file_extra_config: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         type_file_objs: dict[FileType, list[FileVar]] = { | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             # Currently only support image | 
					
						
							|  |  |  |             FileType.IMAGE: [] | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not files: | 
					
						
							|  |  |  |             return type_file_objs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # group by file type and convert file args or message files to FileObj | 
					
						
							|  |  |  |         for file in files: | 
					
						
							| 
									
										
										
										
											2024-01-24 00:13:04 +08:00
										 |  |  |             if isinstance(file, MessageFile): | 
					
						
							|  |  |  |                 if file.belongs_to == FileBelongsTo.ASSISTANT.value: | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2024-01-23 19:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |             file_obj = self._to_file_obj(file, file_extra_config) | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             if file_obj.type not in type_file_objs: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             type_file_objs[file_obj.type].append(file_obj) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return type_file_objs | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-15 12:43:18 +08:00
										 |  |  |     def _to_file_obj(self, file: Union[dict, MessageFile], file_extra_config: FileExtraConfig): | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         transform file to file obj | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param file: | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if isinstance(file, dict): | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |             transfer_method = FileTransferMethod.value_of(file.get("transfer_method")) | 
					
						
							| 
									
										
										
										
											2024-05-27 22:01:11 +08:00
										 |  |  |             if transfer_method != FileTransferMethod.TOOL_FILE: | 
					
						
							|  |  |  |                 return FileVar( | 
					
						
							|  |  |  |                     tenant_id=self.tenant_id, | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     type=FileType.value_of(file.get("type")), | 
					
						
							| 
									
										
										
										
											2024-05-27 22:01:11 +08:00
										 |  |  |                     transfer_method=transfer_method, | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     url=file.get("url") if transfer_method == FileTransferMethod.REMOTE_URL else None, | 
					
						
							|  |  |  |                     related_id=file.get("upload_file_id") if transfer_method == FileTransferMethod.LOCAL_FILE else None, | 
					
						
							|  |  |  |                     extra_config=file_extra_config, | 
					
						
							| 
									
										
										
										
											2024-05-27 22:01:11 +08:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |             return FileVar( | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                 tenant_id=self.tenant_id, | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                 type=FileType.value_of(file.get("type")), | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                 transfer_method=transfer_method, | 
					
						
							| 
									
										
										
										
											2024-05-27 22:01:11 +08:00
										 |  |  |                 url=None, | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                 related_id=file.get("tool_file_id"), | 
					
						
							|  |  |  |                 extra_config=file_extra_config, | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             ) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |             return FileVar( | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                 id=file.id, | 
					
						
							|  |  |  |                 tenant_id=self.tenant_id, | 
					
						
							|  |  |  |                 type=FileType.value_of(file.type), | 
					
						
							|  |  |  |                 transfer_method=FileTransferMethod.value_of(file.transfer_method), | 
					
						
							|  |  |  |                 url=file.url, | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |                 related_id=file.upload_file_id or None, | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                 extra_config=file_extra_config, | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _check_image_remote_url(self, url): | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             headers = { | 
					
						
							| 
									
										
										
										
											2024-09-12 14:00:36 +08:00
										 |  |  |                 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" | 
					
						
							|  |  |  |                 " Chrome/91.0.4472.124 Safari/537.36" | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  |             def is_s3_presigned_url(url): | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     parsed_url = urlparse(url) | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     if "amazonaws.com" not in parsed_url.netloc: | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  |                         return False | 
					
						
							|  |  |  |                     query_params = parse_qs(parsed_url.query) | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     required_params = ["Signature", "Expires"] | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  |                     for param in required_params: | 
					
						
							|  |  |  |                         if param not in query_params: | 
					
						
							|  |  |  |                             return False | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     if not query_params["Expires"][0].isdigit(): | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  |                         return False | 
					
						
							| 
									
										
										
										
											2024-09-10 17:00:20 +08:00
										 |  |  |                     signature = query_params["Signature"][0] | 
					
						
							|  |  |  |                     if not re.match(r"^[A-Za-z0-9+/]+={0,2}$", signature): | 
					
						
							| 
									
										
										
										
											2024-08-11 16:45:15 +08:00
										 |  |  |                         return False | 
					
						
							|  |  |  |                     return True | 
					
						
							|  |  |  |                 except Exception: | 
					
						
							|  |  |  |                     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if is_s3_presigned_url(url): | 
					
						
							|  |  |  |                 response = requests.get(url, headers=headers, allow_redirects=True) | 
					
						
							|  |  |  |                 if response.status_code in {200, 304}: | 
					
						
							|  |  |  |                     return True, "" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |             response = requests.head(url, headers=headers, allow_redirects=True) | 
					
						
							| 
									
										
										
										
											2024-07-05 21:10:33 +08:00
										 |  |  |             if response.status_code in {200, 304}: | 
					
						
							| 
									
										
										
										
											2023-11-13 22:05:46 +08:00
										 |  |  |                 return True, "" | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 return False, "URL does not exist." | 
					
						
							|  |  |  |         except requests.RequestException as e: | 
					
						
							|  |  |  |             return False, f"Error checking URL: {e}" |