MINOR - Bump Presidio Analyzer and validate support for legal entities (#17750)

This commit is contained in:
Pere Miquel Brull 2024-09-06 16:07:08 +02:00
parent 478caa51fa
commit 01e4b04573
3 changed files with 42 additions and 2 deletions

View File

@ -316,7 +316,7 @@ plugins: Dict[str, Set[str]] = {
VERSIONS["spacy"],
VERSIONS["pandas"],
VERSIONS["numpy"],
"presidio-analyzer==2.2.32",
"presidio-analyzer==2.2.355",
},
}

View File

@ -51,7 +51,8 @@ class NEREntity(Enum):
UK_NHS = TagType.SENSITIVE.value
# Spain
NIF = TagType.SENSITIVE.value
ES_NIF = TagType.SENSITIVE.value
ES_NIE = TagType.SENSITIVE.value
# Italy
IT_FISCAL_CODE = TagType.SENSITIVE.value
@ -60,8 +61,25 @@ class NEREntity(Enum):
IT_PASSPORT = TagType.SENSITIVE.value
IT_IDENTITY_CARD = TagType.SENSITIVE.value
# Poland
PL_PESEL = TagType.SENSITIVE.value
# Singapore
SG_NRIC_FIN = TagType.SENSITIVE.value
SG_UEN = TagType.SENSITIVE.value
# Australia
AU_ABN = TagType.SENSITIVE.value
AU_ACN = TagType.SENSITIVE.value
AU_TFN = TagType.SENSITIVE.value
AU_MEDICARE = TagType.SENSITIVE.value
# India
IN_PAN = TagType.SENSITIVE.value
IN_AADHAAR = TagType.SENSITIVE.value
IN_VEHICLE_REGISTRATION = TagType.SENSITIVE.value
IN_VOTER = TagType.SENSITIVE.value
IN_PASSPORT = TagType.SENSITIVE.value
# Finland
FI_PERSONAL_IDENTITY_CODE = TagType.SENSITIVE.value

View File

@ -141,3 +141,25 @@ def test_scanner_with_lists(scanner):
).tag_fqn
== "PII.Sensitive"
)
def test_scan_entities(scanner):
"""
We can properly validate certain entities.
> NOTE: These lists are randomly generated and not valid IDs for any actual use
"""
pan_numbers = ["AFZPK7190K", "BLQSM2938L", "CWRTJ5821M", "DZXNV9045A", "EHYKG6752P"]
assert scanner.scan(pan_numbers).tag_fqn == "PII.Sensitive"
ssn_numbers = [
"123-45-6789",
"987-65-4321",
"543-21-0987",
"678-90-1234",
"876-54-3210",
]
assert scanner.scan(ssn_numbers).tag_fqn == "PII.Sensitive"
nif_numbers = ["12345678A", "87654321B", "23456789C", "98765432D", "34567890E"]
assert scanner.scan(nif_numbers).tag_fqn == "PII.Sensitive"