| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  | #  Copyright 2021 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Test Column Name Scanner | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | from unittest import TestCase | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-04 11:02:57 +02:00
										 |  |  | from metadata.pii.scanners.ner_scanner import NERScanner | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class NERScannerTest(TestCase): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Validate various typical column names | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-18 12:53:22 +02:00
										 |  |  |     ner_scanner = NERScanner() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |     def test_scanner_none(self): | 
					
						
							| 
									
										
										
										
											2023-05-18 12:53:22 +02:00
										 |  |  |         self.assertIsNone(self.ner_scanner.scan(list(range(100)))) | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |         self.assertIsNone( | 
					
						
							| 
									
										
										
										
											2023-05-18 12:53:22 +02:00
										 |  |  |             self.ner_scanner.scan( | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |                 " ".split( | 
					
						
							|  |  |  |                     "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam consequat quam sagittis convallis cursus." | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_scanner_sensitive(self): | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2023-05-18 12:53:22 +02:00
										 |  |  |             self.ner_scanner.scan( | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |                 [ | 
					
						
							|  |  |  |                     "geraldc@gmail.com", | 
					
						
							|  |  |  |                     "saratimithi@godesign.com", | 
					
						
							|  |  |  |                     "heroldsean@google.com", | 
					
						
							|  |  |  |                 ] | 
					
						
							| 
									
										
										
										
											2023-09-06 11:30:46 +02:00
										 |  |  |             ).tag_fqn, | 
					
						
							|  |  |  |             "PII.Sensitive", | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2023-05-18 12:53:22 +02:00
										 |  |  |             self.ner_scanner.scan( | 
					
						
							|  |  |  |                 ["im ok", "saratimithi@godesign.com", "not sensitive"] | 
					
						
							| 
									
										
										
										
											2023-09-06 11:30:46 +02:00
										 |  |  |             ).tag_fqn, | 
					
						
							|  |  |  |             "PII.Sensitive", | 
					
						
							| 
									
										
										
										
											2023-05-17 15:58:17 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-05-19 18:21:01 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_scanner_nonsensitive(self): | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.ner_scanner.scan( | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     "Washington", | 
					
						
							|  |  |  |                     "Alaska", | 
					
						
							|  |  |  |                     "Netherfield Lea Street", | 
					
						
							|  |  |  |                 ] | 
					
						
							| 
									
										
										
										
											2023-09-06 11:30:46 +02:00
										 |  |  |             ).tag_fqn, | 
					
						
							|  |  |  |             "PII.NonSensitive", | 
					
						
							| 
									
										
										
										
											2023-05-19 18:21:01 +02:00
										 |  |  |         ) |