mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-10 07:53:35 +00:00
MINOR - Bump Presidio Analyzer and validate support for legal entities (#17750)
This commit is contained in:
parent
478caa51fa
commit
01e4b04573
@ -316,7 +316,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
VERSIONS["spacy"],
|
VERSIONS["spacy"],
|
||||||
VERSIONS["pandas"],
|
VERSIONS["pandas"],
|
||||||
VERSIONS["numpy"],
|
VERSIONS["numpy"],
|
||||||
"presidio-analyzer==2.2.32",
|
"presidio-analyzer==2.2.355",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -51,7 +51,8 @@ class NEREntity(Enum):
|
|||||||
UK_NHS = TagType.SENSITIVE.value
|
UK_NHS = TagType.SENSITIVE.value
|
||||||
|
|
||||||
# Spain
|
# Spain
|
||||||
NIF = TagType.SENSITIVE.value
|
ES_NIF = TagType.SENSITIVE.value
|
||||||
|
ES_NIE = TagType.SENSITIVE.value
|
||||||
|
|
||||||
# Italy
|
# Italy
|
||||||
IT_FISCAL_CODE = TagType.SENSITIVE.value
|
IT_FISCAL_CODE = TagType.SENSITIVE.value
|
||||||
@ -60,8 +61,25 @@ class NEREntity(Enum):
|
|||||||
IT_PASSPORT = TagType.SENSITIVE.value
|
IT_PASSPORT = TagType.SENSITIVE.value
|
||||||
IT_IDENTITY_CARD = TagType.SENSITIVE.value
|
IT_IDENTITY_CARD = TagType.SENSITIVE.value
|
||||||
|
|
||||||
|
# Poland
|
||||||
|
PL_PESEL = TagType.SENSITIVE.value
|
||||||
|
|
||||||
|
# Singapore
|
||||||
|
SG_NRIC_FIN = TagType.SENSITIVE.value
|
||||||
|
SG_UEN = TagType.SENSITIVE.value
|
||||||
|
|
||||||
# Australia
|
# Australia
|
||||||
AU_ABN = TagType.SENSITIVE.value
|
AU_ABN = TagType.SENSITIVE.value
|
||||||
AU_ACN = TagType.SENSITIVE.value
|
AU_ACN = TagType.SENSITIVE.value
|
||||||
AU_TFN = TagType.SENSITIVE.value
|
AU_TFN = TagType.SENSITIVE.value
|
||||||
AU_MEDICARE = TagType.SENSITIVE.value
|
AU_MEDICARE = TagType.SENSITIVE.value
|
||||||
|
|
||||||
|
# India
|
||||||
|
IN_PAN = TagType.SENSITIVE.value
|
||||||
|
IN_AADHAAR = TagType.SENSITIVE.value
|
||||||
|
IN_VEHICLE_REGISTRATION = TagType.SENSITIVE.value
|
||||||
|
IN_VOTER = TagType.SENSITIVE.value
|
||||||
|
IN_PASSPORT = TagType.SENSITIVE.value
|
||||||
|
|
||||||
|
# Finland
|
||||||
|
FI_PERSONAL_IDENTITY_CODE = TagType.SENSITIVE.value
|
||||||
|
|||||||
@ -141,3 +141,25 @@ def test_scanner_with_lists(scanner):
|
|||||||
).tag_fqn
|
).tag_fqn
|
||||||
== "PII.Sensitive"
|
== "PII.Sensitive"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_entities(scanner):
|
||||||
|
"""
|
||||||
|
We can properly validate certain entities.
|
||||||
|
|
||||||
|
> NOTE: These lists are randomly generated and not valid IDs for any actual use
|
||||||
|
"""
|
||||||
|
pan_numbers = ["AFZPK7190K", "BLQSM2938L", "CWRTJ5821M", "DZXNV9045A", "EHYKG6752P"]
|
||||||
|
assert scanner.scan(pan_numbers).tag_fqn == "PII.Sensitive"
|
||||||
|
|
||||||
|
ssn_numbers = [
|
||||||
|
"123-45-6789",
|
||||||
|
"987-65-4321",
|
||||||
|
"543-21-0987",
|
||||||
|
"678-90-1234",
|
||||||
|
"876-54-3210",
|
||||||
|
]
|
||||||
|
assert scanner.scan(ssn_numbers).tag_fqn == "PII.Sensitive"
|
||||||
|
|
||||||
|
nif_numbers = ["12345678A", "87654321B", "23456789C", "98765432D", "34567890E"]
|
||||||
|
assert scanner.scan(nif_numbers).tag_fqn == "PII.Sensitive"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user