mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-03 20:19:31 +00:00 
			
		
		
		
	* Create ometa client once and pass it around & improve pycln config * Fix * Fix * Fix tests * Fix maven ci * Fix tests * Fix tests * Fix tests * Format * Fix DI
		
			
				
	
	
		
			65 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			65 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#  Copyright 2021 Collate
 | 
						|
#  Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
#  you may not use this file except in compliance with the License.
 | 
						|
#  You may obtain a copy of the License at
 | 
						|
#  http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#  Unless required by applicable law or agreed to in writing, software
 | 
						|
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
#  See the License for the specific language governing permissions and
 | 
						|
#  limitations under the License.
 | 
						|
"""
 | 
						|
Test Column Name Scanner
 | 
						|
"""
 | 
						|
from unittest import TestCase
 | 
						|
 | 
						|
from metadata.pii.scanners.ner_scanner import NERScanner
 | 
						|
 | 
						|
 | 
						|
class NERScannerTest(TestCase):
 | 
						|
    """
 | 
						|
    Validate various typical column names
 | 
						|
    """
 | 
						|
 | 
						|
    ner_scanner = NERScanner()
 | 
						|
 | 
						|
    def test_scanner_none(self):
 | 
						|
        self.assertIsNone(self.ner_scanner.scan(list(range(100))))
 | 
						|
        self.assertIsNone(
 | 
						|
            self.ner_scanner.scan(
 | 
						|
                " ".split(
 | 
						|
                    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam consequat quam sagittis convallis cursus."
 | 
						|
                )
 | 
						|
            )
 | 
						|
        )
 | 
						|
 | 
						|
    def test_scanner_sensitive(self):
 | 
						|
        self.assertEqual(
 | 
						|
            self.ner_scanner.scan(
 | 
						|
                [
 | 
						|
                    "geraldc@gmail.com",
 | 
						|
                    "saratimithi@godesign.com",
 | 
						|
                    "heroldsean@google.com",
 | 
						|
                ]
 | 
						|
            ).tag_fqn,
 | 
						|
            "PII.Sensitive",
 | 
						|
        )
 | 
						|
        self.assertEqual(
 | 
						|
            self.ner_scanner.scan(
 | 
						|
                ["im ok", "saratimithi@godesign.com", "not sensitive"]
 | 
						|
            ).tag_fqn,
 | 
						|
            "PII.Sensitive",
 | 
						|
        )
 | 
						|
 | 
						|
    def test_scanner_nonsensitive(self):
 | 
						|
        self.assertEqual(
 | 
						|
            self.ner_scanner.scan(
 | 
						|
                [
 | 
						|
                    "Washington",
 | 
						|
                    "Alaska",
 | 
						|
                    "Netherfield Lea Street",
 | 
						|
                ]
 | 
						|
            ).tag_fqn,
 | 
						|
            "PII.NonSensitive",
 | 
						|
        )
 |