mirror of
				https://github.com/langgenius/dify.git
				synced 2025-11-03 20:33:00 +00:00 
			
		
		
		
	fix(http_request): allow content type application/x-javascript (#10862)
				
					
				
			This commit is contained in:
		
							parent
							
								
									f3af7b5f35
								
							
						
					
					
						commit
						25fda7adc5
					
				@ -1,4 +1,6 @@
 | 
				
			|||||||
 | 
					import mimetypes
 | 
				
			||||||
from collections.abc import Sequence
 | 
					from collections.abc import Sequence
 | 
				
			||||||
 | 
					from email.message import Message
 | 
				
			||||||
from typing import Any, Literal, Optional
 | 
					from typing import Any, Literal, Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import httpx
 | 
					import httpx
 | 
				
			||||||
@ -7,14 +9,6 @@ from pydantic import BaseModel, Field, ValidationInfo, field_validator
 | 
				
			|||||||
from configs import dify_config
 | 
					from configs import dify_config
 | 
				
			||||||
from core.workflow.nodes.base import BaseNodeData
 | 
					from core.workflow.nodes.base import BaseNodeData
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NON_FILE_CONTENT_TYPES = (
 | 
					 | 
				
			||||||
    "application/json",
 | 
					 | 
				
			||||||
    "application/xml",
 | 
					 | 
				
			||||||
    "text/html",
 | 
					 | 
				
			||||||
    "text/plain",
 | 
					 | 
				
			||||||
    "application/x-www-form-urlencoded",
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class HttpRequestNodeAuthorizationConfig(BaseModel):
 | 
					class HttpRequestNodeAuthorizationConfig(BaseModel):
 | 
				
			||||||
    type: Literal["basic", "bearer", "custom"]
 | 
					    type: Literal["basic", "bearer", "custom"]
 | 
				
			||||||
@ -93,13 +87,53 @@ class Response:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def is_file(self):
 | 
					    def is_file(self):
 | 
				
			||||||
        content_type = self.content_type
 | 
					        """
 | 
				
			||||||
 | 
					        Determine if the response contains a file by checking:
 | 
				
			||||||
 | 
					        1. Content-Disposition header (RFC 6266)
 | 
				
			||||||
 | 
					        2. Content characteristics
 | 
				
			||||||
 | 
					        3. MIME type analysis
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        content_type = self.content_type.split(";")[0].strip().lower()
 | 
				
			||||||
        content_disposition = self.response.headers.get("content-disposition", "")
 | 
					        content_disposition = self.response.headers.get("content-disposition", "")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return "attachment" in content_disposition or (
 | 
					        # Check if it's explicitly marked as an attachment
 | 
				
			||||||
            not any(non_file in content_type for non_file in NON_FILE_CONTENT_TYPES)
 | 
					        if content_disposition:
 | 
				
			||||||
            and any(file_type in content_type for file_type in ("application/", "image/", "audio/", "video/"))
 | 
					            msg = Message()
 | 
				
			||||||
        )
 | 
					            msg["content-disposition"] = content_disposition
 | 
				
			||||||
 | 
					            disp_type = msg.get_content_disposition()  # Returns 'attachment', 'inline', or None
 | 
				
			||||||
 | 
					            filename = msg.get_filename()  # Returns filename if present, None otherwise
 | 
				
			||||||
 | 
					            if disp_type == "attachment" or filename is not None:
 | 
				
			||||||
 | 
					                return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # For application types, try to detect if it's a text-based format
 | 
				
			||||||
 | 
					        if content_type.startswith("application/"):
 | 
				
			||||||
 | 
					            # Common text-based application types
 | 
				
			||||||
 | 
					            if any(
 | 
				
			||||||
 | 
					                text_type in content_type
 | 
				
			||||||
 | 
					                for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql")
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Try to detect if content is text-based by sampling first few bytes
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                # Sample first 1024 bytes for text detection
 | 
				
			||||||
 | 
					                content_sample = self.response.content[:1024]
 | 
				
			||||||
 | 
					                content_sample.decode("utf-8")
 | 
				
			||||||
 | 
					                # If we can decode as UTF-8 and find common text patterns, likely not a file
 | 
				
			||||||
 | 
					                text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
 | 
				
			||||||
 | 
					                if any(marker in content_sample for marker in text_markers):
 | 
				
			||||||
 | 
					                    return False
 | 
				
			||||||
 | 
					            except UnicodeDecodeError:
 | 
				
			||||||
 | 
					                # If we can't decode as UTF-8, likely a binary file
 | 
				
			||||||
 | 
					                return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # For other types, use MIME type analysis
 | 
				
			||||||
 | 
					        main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
 | 
				
			||||||
 | 
					        if main_type:
 | 
				
			||||||
 | 
					            return main_type.split("/")[0] in ("application", "image", "audio", "video")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # For unknown types, check if it's a media type
 | 
				
			||||||
 | 
					        return any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def content_type(self) -> str:
 | 
					    def content_type(self) -> str:
 | 
				
			||||||
 | 
				
			|||||||
@ -0,0 +1,140 @@
 | 
				
			|||||||
 | 
					from unittest.mock import Mock, PropertyMock, patch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import httpx
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from core.workflow.nodes.http_request.entities import Response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.fixture
 | 
				
			||||||
 | 
					def mock_response():
 | 
				
			||||||
 | 
					    response = Mock(spec=httpx.Response)
 | 
				
			||||||
 | 
					    response.headers = {}
 | 
				
			||||||
 | 
					    return response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_is_file_with_attachment_disposition(mock_response):
 | 
				
			||||||
 | 
					    """Test is_file when content-disposition header contains 'attachment'"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-disposition": "attachment; filename=test.pdf", "content-type": "application/pdf"}
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_is_file_with_filename_disposition(mock_response):
 | 
				
			||||||
 | 
					    """Test is_file when content-disposition header contains filename parameter"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-disposition": "inline; filename=test.pdf", "content-type": "application/pdf"}
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize("content_type", ["application/pdf", "image/jpeg", "audio/mp3", "video/mp4"])
 | 
				
			||||||
 | 
					def test_is_file_with_file_content_types(mock_response, content_type):
 | 
				
			||||||
 | 
					    """Test is_file with various file content types"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": content_type}
 | 
				
			||||||
 | 
					    # Mock binary content
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file, f"Content type {content_type} should be identified as a file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    "content_type",
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        "application/json",
 | 
				
			||||||
 | 
					        "application/xml",
 | 
				
			||||||
 | 
					        "application/javascript",
 | 
				
			||||||
 | 
					        "application/x-www-form-urlencoded",
 | 
				
			||||||
 | 
					        "application/yaml",
 | 
				
			||||||
 | 
					        "application/graphql",
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def test_text_based_application_types(mock_response, content_type):
 | 
				
			||||||
 | 
					    """Test common text-based application types are not identified as files"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": content_type}
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert not response.is_file, f"Content type {content_type} should not be identified as a file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    ("content", "content_type"),
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        (b'{"key": "value"}', "application/octet-stream"),
 | 
				
			||||||
 | 
					        (b"[1, 2, 3]", "application/unknown"),
 | 
				
			||||||
 | 
					        (b"function test() {}", "application/x-unknown"),
 | 
				
			||||||
 | 
					        (b"<root>test</root>", "application/binary"),
 | 
				
			||||||
 | 
					        (b"var x = 1;", "application/data"),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def test_content_based_detection(mock_response, content, content_type):
 | 
				
			||||||
 | 
					    """Test content-based detection for text-like content"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": content_type}
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=content)
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert not response.is_file, f"Content {content} with type {content_type} should not be identified as a file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    ("content", "content_type"),
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        (bytes([0x00, 0xFF] * 512), "application/octet-stream"),
 | 
				
			||||||
 | 
					        (bytes([0x89, 0x50, 0x4E, 0x47]), "application/unknown"),  # PNG magic numbers
 | 
				
			||||||
 | 
					        (bytes([0xFF, 0xD8, 0xFF]), "application/binary"),  # JPEG magic numbers
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def test_binary_content_detection(mock_response, content, content_type):
 | 
				
			||||||
 | 
					    """Test content-based detection for binary content"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": content_type}
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=content)
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file, f"Binary content with type {content_type} should be identified as a file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    ("content_type", "expected_main_type"),
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        ("x-world/x-vrml", "model"),  # VRML 3D model
 | 
				
			||||||
 | 
					        ("font/ttf", "application"),  # TrueType font
 | 
				
			||||||
 | 
					        ("text/csv", "text"),  # CSV text file
 | 
				
			||||||
 | 
					        ("unknown/xyz", None),  # Unknown type
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def test_mimetype_based_detection(mock_response, content_type, expected_main_type):
 | 
				
			||||||
 | 
					    """Test detection using mimetypes.guess_type for non-application content types"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": content_type}
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=bytes([0x00]))  # Dummy content
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with patch("core.workflow.nodes.http_request.entities.mimetypes.guess_type") as mock_guess_type:
 | 
				
			||||||
 | 
					        # Mock the return value based on expected_main_type
 | 
				
			||||||
 | 
					        if expected_main_type:
 | 
				
			||||||
 | 
					            mock_guess_type.return_value = (f"{expected_main_type}/subtype", None)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            mock_guess_type.return_value = (None, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = Response(mock_response)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Check if the result matches our expectation
 | 
				
			||||||
 | 
					        if expected_main_type in ("application", "image", "audio", "video"):
 | 
				
			||||||
 | 
					            assert response.is_file, f"Content type {content_type} should be identified as a file"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            assert not response.is_file, f"Content type {content_type} should not be identified as a file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Verify that guess_type was called
 | 
				
			||||||
 | 
					        mock_guess_type.assert_called_once()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_is_file_with_inline_disposition(mock_response):
 | 
				
			||||||
 | 
					    """Test is_file when content-disposition is 'inline'"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-disposition": "inline", "content-type": "application/pdf"}
 | 
				
			||||||
 | 
					    # Mock binary content
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_is_file_with_no_content_disposition(mock_response):
 | 
				
			||||||
 | 
					    """Test is_file when no content-disposition header is present"""
 | 
				
			||||||
 | 
					    mock_response.headers = {"content-type": "application/pdf"}
 | 
				
			||||||
 | 
					    # Mock binary content
 | 
				
			||||||
 | 
					    type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
 | 
				
			||||||
 | 
					    response = Response(mock_response)
 | 
				
			||||||
 | 
					    assert response.is_file
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user