mirror of
				https://github.com/langgenius/dify.git
				synced 2025-10-31 10:53:02 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			86 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import mimetypes
 | |
| import os
 | |
| import platform
 | |
| import re
 | |
| import urllib.parse
 | |
| import warnings
 | |
| from uuid import uuid4
 | |
| 
 | |
| import httpx
 | |
| 
 | |
| try:
 | |
|     import magic
 | |
| except ImportError:
 | |
|     if platform.system() == "Windows":
 | |
|         warnings.warn(
 | |
|             "To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
 | |
|         )
 | |
|     elif platform.system() == "Darwin":
 | |
|         warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
 | |
|     elif platform.system() == "Linux":
 | |
|         warnings.warn(
 | |
|             "To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
 | |
|         )
 | |
|     else:
 | |
|         warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
 | |
|     magic = None  # type: ignore
 | |
| 
 | |
| from pydantic import BaseModel
 | |
| 
 | |
| 
 | |
| class FileInfo(BaseModel):
 | |
|     filename: str
 | |
|     extension: str
 | |
|     mimetype: str
 | |
|     size: int
 | |
| 
 | |
| 
 | |
| def guess_file_info_from_response(response: httpx.Response):
 | |
|     url = str(response.url)
 | |
|     # Try to extract filename from URL
 | |
|     parsed_url = urllib.parse.urlparse(url)
 | |
|     url_path = parsed_url.path
 | |
|     filename = os.path.basename(url_path)
 | |
| 
 | |
|     # If filename couldn't be extracted, use Content-Disposition header
 | |
|     if not filename:
 | |
|         content_disposition = response.headers.get("Content-Disposition")
 | |
|         if content_disposition:
 | |
|             filename_match = re.search(r'filename="?(.+)"?', content_disposition)
 | |
|             if filename_match:
 | |
|                 filename = filename_match.group(1)
 | |
| 
 | |
|     # If still no filename, generate a unique one
 | |
|     if not filename:
 | |
|         unique_name = str(uuid4())
 | |
|         filename = f"{unique_name}"
 | |
| 
 | |
|     # Guess MIME type from filename first, then URL
 | |
|     mimetype, _ = mimetypes.guess_type(filename)
 | |
|     if mimetype is None:
 | |
|         mimetype, _ = mimetypes.guess_type(url)
 | |
|     if mimetype is None:
 | |
|         # If guessing fails, use Content-Type from response headers
 | |
|         mimetype = response.headers.get("Content-Type", "application/octet-stream")
 | |
| 
 | |
|     # Use python-magic to guess MIME type if still unknown or generic
 | |
|     if mimetype == "application/octet-stream" and magic is not None:
 | |
|         try:
 | |
|             mimetype = magic.from_buffer(response.content[:1024], mime=True)
 | |
|         except magic.MagicException:
 | |
|             pass
 | |
| 
 | |
|     extension = os.path.splitext(filename)[1]
 | |
| 
 | |
|     # Ensure filename has an extension
 | |
|     if not extension:
 | |
|         extension = mimetypes.guess_extension(mimetype) or ".bin"
 | |
|         filename = f"{filename}{extension}"
 | |
| 
 | |
|     return FileInfo(
 | |
|         filename=filename,
 | |
|         extension=extension,
 | |
|         mimetype=mimetype,
 | |
|         size=int(response.headers.get("Content-Length", -1)),
 | |
|     )
 | 
