mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-03 20:27:50 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			86 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from datahub.ingestion.source.metadata.business_glossary import clean_url, create_id
 | 
						|
 | 
						|
 | 
						|
def test_clean_url():
 | 
						|
    """Test the clean_url function with various input cases"""
 | 
						|
    test_cases = [
 | 
						|
        ("Basic Term", "Basic-Term"),
 | 
						|
        ("Term With Spaces", "Term-With-Spaces"),
 | 
						|
        ("Special@#$Characters!", "SpecialCharacters"),
 | 
						|
        ("MixedCase Term", "MixedCase-Term"),
 | 
						|
        ("Multiple   Spaces", "Multiple-Spaces"),
 | 
						|
        ("Term-With-Hyphens", "Term-With-Hyphens"),
 | 
						|
        ("Term.With.Dots", "Term.With.Dots"),  # Preserve periods
 | 
						|
        ("Term_With_Underscores", "TermWithUnderscores"),
 | 
						|
        ("123 Numeric Term", "123-Numeric-Term"),
 | 
						|
        ("@#$Special At Start", "Special-At-Start"),
 | 
						|
        ("-Leading-Trailing-", "Leading-Trailing"),
 | 
						|
        ("Multiple...Periods", "Multiple.Periods"),  # Test multiple periods
 | 
						|
        ("Mixed-Hyphens.Periods", "Mixed-Hyphens.Periods"),  # Test mixed separators
 | 
						|
    ]
 | 
						|
 | 
						|
    for input_str, expected in test_cases:
 | 
						|
        result = clean_url(input_str)
 | 
						|
        assert result == expected, (
 | 
						|
            f"Expected '{expected}' for input '{input_str}', got '{result}'"
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def test_clean_url_edge_cases():
 | 
						|
    """Test clean_url function with edge cases"""
 | 
						|
    test_cases = [
 | 
						|
        ("", ""),  # Empty string
 | 
						|
        (" ", ""),  # Single space
 | 
						|
        ("   ", ""),  # Multiple spaces
 | 
						|
        ("@#$%", ""),  # Only special characters
 | 
						|
        ("A", "A"),  # Single character
 | 
						|
        ("A B", "A-B"),  # Two characters with space
 | 
						|
        ("A.B", "A.B"),  # Period separator
 | 
						|
        ("...", ""),  # Only periods
 | 
						|
        (".Leading.Trailing.", "Leading.Trailing"),  # Leading/trailing periods
 | 
						|
    ]
 | 
						|
 | 
						|
    for input_str, expected in test_cases:
 | 
						|
        result = clean_url(input_str)
 | 
						|
        assert result == expected, (
 | 
						|
            f"Expected '{expected}' for input '{input_str}', got '{result}'"
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def test_create_id_url_cleaning():
 | 
						|
    """Test create_id function's URL cleaning behavior"""
 | 
						|
    # Test basic URL cleaning
 | 
						|
    id_ = create_id(["pii", "secure % password"], None, False)
 | 
						|
    assert id_ == "pii.secure-password"
 | 
						|
 | 
						|
    # Test with multiple path components
 | 
						|
    id_ = create_id(["Term One", "Term Two", "Term Three"], None, False)
 | 
						|
    assert id_ == "Term-One.Term-Two.Term-Three"
 | 
						|
 | 
						|
    # Test with path components containing periods
 | 
						|
    id_ = create_id(["Term.One", "Term.Two"], None, False)
 | 
						|
    assert id_ == "Term.One.Term.Two"
 | 
						|
 | 
						|
 | 
						|
def test_create_id_with_special_chars():
 | 
						|
    """Test create_id function's handling of special characters"""
 | 
						|
    # Test with non-ASCII characters (should trigger auto_id)
 | 
						|
    id_ = create_id(["pii", "secure パスワード"], None, False)
 | 
						|
    assert len(id_) == 32  # GUID length
 | 
						|
    assert id_.isalnum()  # Should only contain alphanumeric characters
 | 
						|
 | 
						|
    # Test with characters that aren't periods or hyphens
 | 
						|
    id_ = create_id(["test", "special@#$chars"], None, False)
 | 
						|
    assert id_ == "test.specialchars"
 | 
						|
 | 
						|
 | 
						|
def test_create_id_with_default():
 | 
						|
    """Test create_id function with default_id parameter"""
 | 
						|
    # Test that default_id is respected
 | 
						|
    id_ = create_id(["any", "path"], "custom-id", False)
 | 
						|
    assert id_ == "custom-id"
 | 
						|
 | 
						|
    # Test with URN as default_id
 | 
						|
    id_ = create_id(["any", "path"], "urn:li:glossaryTerm:custom-id", False)
 | 
						|
    assert id_ == "urn:li:glossaryTerm:custom-id"
 |