mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 02:29:03 +00:00 
			
		
		
		
	 6c5c9088ea
			
		
	
	
		6c5c9088ea
		
	
	
	
	
		
			
			* Remove 'ORGANIZATION' PII Tag as it is no longer supported by our PII detectors. * Updata presidio version to fix wrong regex for indian passport * Increase sample size of Indian passport numbers --------- Co-authored-by: Pere Menal <pere.menal@getcollate.io> (cherry picked from commit 3c6c762d9c0d7036124aae3a4dc90f51d6a674c0)
		
			
				
	
	
		
			32 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			32 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #  Copyright 2025 Collate
 | |
| #  Licensed under the Collate Community License, Version 1.0 (the "License");
 | |
| #  you may not use this file except in compliance with the License.
 | |
| #  You may obtain a copy of the License at
 | |
| #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
 | |
| #  Unless required by applicable law or agreed to in writing, software
 | |
| #  distributed under the License is distributed on an "AS IS" BASIS,
 | |
| #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| #  See the License for the specific language governing permissions and
 | |
| #  limitations under the License.
 | |
| from metadata.pii.algorithms.presidio_utils import (
 | |
|     build_analyzer_engine,
 | |
|     set_presidio_logger_level,
 | |
| )
 | |
| from metadata.pii.algorithms.tags import PIITag
 | |
| from metadata.pii.scanners.ner_scanner import SUPPORTED_LANG
 | |
| 
 | |
| 
 | |
| def test_analyzer_supports_all_expected_pii_entities():
 | |
|     """
 | |
|     Here we check that the analyzer can potentially detect all our PII entities.
 | |
|     """
 | |
|     set_presidio_logger_level()
 | |
|     analyzer = build_analyzer_engine()
 | |
| 
 | |
|     entities = set(PIITag.values())
 | |
|     supported_entities = set(analyzer.get_supported_entities(SUPPORTED_LANG))
 | |
|     assert entities <= supported_entities, (
 | |
|         f"Analyzer does not support all expected PII entities. "
 | |
|         f"{entities - supported_entities}"
 | |
|     )
 |