mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-17 11:43:54 +00:00
Ensure recognizers are created (#23645)
* Add the migration classes and data for recognizers This is so that we can run a migration that sets `json->recognizers` of `PII.Sensitive` and `PII.NonSensitive` tags from json values. The issue with normal migrations was that the value of recognizers was too long to be persisted in the server migrations log. Created a common `migration.utils.v1110.MigrationProcessBase` * Ensure building automatically with the right parameters * Update typescript types
This commit is contained in:
parent
c4a4b22295
commit
a6ac42371d
@ -0,0 +1,167 @@
|
||||
[
|
||||
{
|
||||
"name": "DateRecognizer",
|
||||
"displayName": "Date Recognizer",
|
||||
"description": "Recognize date using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "DateRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "PhoneRecognizer",
|
||||
"displayName": "Phone Recognizer",
|
||||
"description": "Recognize multi-regional phone numbers.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "PhoneRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UrlRecognizer",
|
||||
"displayName": "Url Recognizer",
|
||||
"description": "Recognize urls using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UrlRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SpacyRecognizer",
|
||||
"displayName": "Recognizer using spaCy NLP model",
|
||||
"description": "Recognize PII entities using a spaCy NLP model.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SpacyRecognizer",
|
||||
"supportedEntities": [
|
||||
"DATE_TIME",
|
||||
"NRP",
|
||||
"LOCATION"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"displayName": "Date time column name",
|
||||
"name": "date_time",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "date_time_pattern_0",
|
||||
"regex": "^.*(date|time|dob|birthday|dod).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "DATE_TIME",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Nrp column name",
|
||||
"name": "nrp",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "nrp_pattern_0",
|
||||
"regex": "^.*(gender|nationality).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "NRP",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Location column name",
|
||||
"name": "location",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "location_pattern_0",
|
||||
"regex": "^.*(address|city|state|county|country|zipcode|zip|postal|zone|borough).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "LOCATION",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Phone number column name",
|
||||
"name": "phone_number",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "phone_number_pattern_0",
|
||||
"regex": "^.*(phone).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "PHONE_NUMBER",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
}
|
||||
]
|
741
bootstrap/sql/migrations/native/1.11.0/data/tags/Sensitive.json
Normal file
741
bootstrap/sql/migrations/native/1.11.0/data/tags/Sensitive.json
Normal file
@ -0,0 +1,741 @@
|
||||
[
|
||||
{
|
||||
"name": "EnglishCreditCardRecognizer",
|
||||
"displayName": "English Credit Card Recognizer",
|
||||
"description": "Recognize common credit card numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "CreditCardRecognizer",
|
||||
"supportedLanguage": "en",
|
||||
"context": [
|
||||
"credit",
|
||||
"card",
|
||||
"visa",
|
||||
"mastercard",
|
||||
"cc",
|
||||
"amex",
|
||||
"discover",
|
||||
"jcb",
|
||||
"diners",
|
||||
"maestro",
|
||||
"instapayment"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SpanishCreditCardRecognizer",
|
||||
"displayName": "Spanish Credit Card Recognizer",
|
||||
"description": "Recognize common credit card numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "CreditCardRecognizer",
|
||||
"supportedLanguage": "es",
|
||||
"context": [
|
||||
"tarjeta",
|
||||
"credito",
|
||||
"visa",
|
||||
"mastercard",
|
||||
"cc",
|
||||
"amex",
|
||||
"discover",
|
||||
"jcb",
|
||||
"diners",
|
||||
"maestro",
|
||||
"instapayment"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItalianCreditCardRecognizer",
|
||||
"displayName": "Italian Credit Card Recognizer",
|
||||
"description": "Recognize common credit card numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "CreditCardRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "PolishCreditCardRecognizer",
|
||||
"displayName": "Polish Credit Card Recognizer",
|
||||
"description": "Recognize common credit card numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "CreditCardRecognizer",
|
||||
"supportedLanguage": "pl"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UsBankRecognizer",
|
||||
"displayName": "Us Bank Recognizer",
|
||||
"description": "Recognizes US bank number using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UsBankRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UsLicenseRecognizer",
|
||||
"displayName": "Us License Recognizer",
|
||||
"description": "Recognizes US driver license using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UsLicenseRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UsItinRecognizer",
|
||||
"displayName": "Us Itin Recognizer",
|
||||
"description": "Recognizes US ITIN (Individual Taxpayer Identification Number) using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UsItinRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UsPassportRecognizer",
|
||||
"displayName": "Us Passport Recognizer",
|
||||
"description": "Recognizes US Passport number using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UsPassportRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UsSsnRecognizer",
|
||||
"displayName": "Us Ssn Recognizer",
|
||||
"description": "Recognize US Social Security Number (SSN) using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UsSsnRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "NhsRecognizer",
|
||||
"displayName": "Nhs Recognizer",
|
||||
"description": "Recognizes NHS number using regex and checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "NhsRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "UkNinoRecognizer",
|
||||
"displayName": "Uk Nino Recognizer",
|
||||
"description": "Recognizes UK National Insurance Number using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "UkNinoRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SgFinRecognizer",
|
||||
"displayName": "Sg Fin Recognizer",
|
||||
"description": "Recognize SG FIN/NRIC number using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SgFinRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "AuAbnRecognizer",
|
||||
"displayName": "Au Abn Recognizer",
|
||||
"description": "Recognizes Australian Business Number (\\\"ABN\\\").<br/><br/>The Australian Business Number (ABN) is a unique 11 digit identifier issued to all entities registered in the Australian Business Register (ABR). The 11 digit ABN is structured as a 9 digit identifier<br/><br/>with two leading check digits.<br/><br/>The leading check digits are derived using a modulus 89 calculation.<br/><br/>This recognizer identifies ABN using regex, context words and checksum.<br/><br/>Reference: https://abr.business.gov.au/Help/AbnFormat",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "AuAbnRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "AuAcnRecognizer",
|
||||
"displayName": "Au Acn Recognizer",
|
||||
"description": "Recognizes Australian Company Number (\\\"ACN\\\".<br/><br/>The Australian Company Number (ACN) is a nine digit number with the last digit being a check digit calculated using a modified modulus 10 calculation.<br/><br/>This recognizer identifies ACN using regex, context words, and checksum.<br/><br/>Reference: https://asic.gov.au/",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "AuAcnRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "AuTfnRecognizer",
|
||||
"displayName": "Au Tfn Recognizer",
|
||||
"description": "Recognizes Australian Tax File Numbers (\\\"TFN\\\".<br/><br/>The tax file number (TFN) is a unique identifier issued by the Australian Taxation Office to each taxpaying entity, an individual, company,<br/><br/>superannuation fund, partnership, or trust.<br/><br/>The TFN consists of a nine digit number, usually presented in the format NNN NNN NNN.<br/><br/>TFN includes a check digit for detecting erroneous number based on simple modulo 11.<br/><br/>This recognizer uses regex, context words,<br/><br/>and checksum to identify TFN.<br/><br/>Reference: https://www.ato.gov.au/individuals/tax-file-number/",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "AuTfnRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "AuMedicareRecognizer",
|
||||
"displayName": "Au Medicare Recognizer",
|
||||
"description": "Recognizes Australian Medicare number using regex, context words, and checksum.<br/><br/>Medicare number is a unique identifier issued by Australian Government that enables the cardholder to receive a rebates of medical expenses under Australia's Medicare system.<br/><br/>It uses a modulus 10 checksum scheme to validate the number.<br/><br/>Reference: https://en.wikipedia.org/wiki/Medicare_card_(Australia)",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "AuMedicareRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "InPanRecognizer",
|
||||
"displayName": "In Pan Recognizer",
|
||||
"description": "Recognizes Indian Permanent Account Number (\\\"PAN\\\".<br/><br/>The Permanent Account Number (PAN) is a ten digit alpha-numeric code with the last digit being a check digit calculated using a modified modulus 10 calculation.<br/><br/>This recognizer identifies PAN using regex and context words.<br/><br/>Reference: https://en.wikipedia.org/wiki/Permanent_account_number<br/><br/>https://incometaxindia.gov.in/Forms/tps/1.Permanent%20Account%20Number%20(PAN).pdf",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "InPanRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "InAadhaarRecognizer",
|
||||
"displayName": "In Aadhaar Recognizer",
|
||||
"description": "Recognizes Indian UIDAI Person Identification Number (\\\"AADHAAR\\\").<br/><br/>Reference: https://en.wikipedia.org/wiki/Aadhaar<br/><br/>A 12 digit unique number that is issued to each individual by Government of India",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "InAadhaarRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "InVehicleRegistrationRecognizer",
|
||||
"displayName": "In Vehicle Registration Recognizer",
|
||||
"description": "Recognizes Indian Vehicle Registration Number issued by RTO.<br/><br/>Reference(s):<br/><br/>https://en.wikipedia.org/wiki/Vehicle_registration_plates_of_India<br/><br/>https://en.wikipedia.org/wiki/Regional_Transport_Office<br/><br/>https://en.wikipedia.org/wiki/List_of_Regional_Transport_Office_districts_in_India<br/><br/>The registration scheme changed over time with multiple formats in play over the years<br/><br/>India has multiple active patterns for registration plates issued to different vehicle categories",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "InVehicleRegistrationRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "InPassportRecognizer",
|
||||
"displayName": "In Passport Recognizer",
|
||||
"description": "Recognizes Indian Passport Number.<br/><br/>Indian Passport Number is a eight digit alphanumeric number.<br/><br/>Reference:<br/><br/>https://www.bajajallianz.com/blog/travel-insurance-articles/where-is-passport-number-in-indian-passport.html",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "InPassportRecognizer",
|
||||
"supportedLanguage": "en"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "EsNifRecognizer",
|
||||
"displayName": "Es Nif Recognizer",
|
||||
"description": "Recognize NIF number using regex and checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "EsNifRecognizer",
|
||||
"supportedLanguage": "es"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "EsNieRecognizer",
|
||||
"displayName": "Es Nie Recognizer",
|
||||
"description": "Recognize NIE number using regex and checksum.<br/><br/>Reference(s):<br/><br/>https://es.wikipedia.org/wiki/N%C3%BAmero_de_identidad_de_extranjero<br/><br/>https://www.interior.gob.es/opencms/ca/servicios-al-ciudadano/tramites-y-gestiones/dni/calculo-del-digito-de-control-del-nif-nie/",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "EsNieRecognizer",
|
||||
"supportedLanguage": "es"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItDriverLicenseRecognizer",
|
||||
"displayName": "It Driver License Recognizer",
|
||||
"description": "Recognizes IT Driver License using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "ItDriverLicenseRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItFiscalCodeRecognizer",
|
||||
"displayName": "It Fiscal Code Recognizer",
|
||||
"description": "Recognizes IT Fiscal Code using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "ItFiscalCodeRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItVatCodeRecognizer",
|
||||
"displayName": "It Vat Code Recognizer",
|
||||
"description": "Recognizes Italian VAT code using regex and checksum.<br/><br/>For more information about italian VAT code:<br/><br/>https://en.wikipedia.org/wiki/VAT_identification_number#:~:text=%5B2%5D)-,Italy,-Partita%20IVA",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "ItVatCodeRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItIdentityCardRecognizer",
|
||||
"displayName": "It Identity Card Recognizer",
|
||||
"description": "Recognizes Italian Identity Card number using case-insensitive regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "ItIdentityCardRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "ItPassportRecognizer",
|
||||
"displayName": "It Passport Recognizer",
|
||||
"description": "Recognizes IT Passport number using case-insensitive regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "ItPassportRecognizer",
|
||||
"supportedLanguage": "it"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "PlPeselRecognizer",
|
||||
"displayName": "Pl Pesel Recognizer",
|
||||
"description": "Recognize PESEL number using regex and checksum.<br/><br/>For more information about PESEL: https://en.wikipedia.org/wiki/PESEL",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "PlPeselRecognizer",
|
||||
"supportedLanguage": "pl"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "CryptoRecognizer",
|
||||
"displayName": "Crypto Recognizer",
|
||||
"description": "Recognize common crypto account numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "CryptoRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "EmailRecognizer",
|
||||
"displayName": "Email Recognizer",
|
||||
"description": "Recognize email addresses using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "EmailRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "IbanRecognizer",
|
||||
"displayName": "Iban Recognizer",
|
||||
"description": "Recognize IBAN code using regex and checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "IbanRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "IpRecognizer",
|
||||
"displayName": "Ip Recognizer",
|
||||
"description": "Recognize IP address using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "IpRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "MedicalLicenseRecognizer",
|
||||
"displayName": "Medical License Recognizer",
|
||||
"description": "Recognize common Medical license numbers using regex + checksum.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "MedicalLicenseRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "InVoterRecognizer",
|
||||
"displayName": "In Voter Recognizer",
|
||||
"description": "Recognize Indian Voter/Election Id(EPIC).<br/><br/>The Elector's Photo Identity Card or Voter id is a ten digit alpha-numeric code issued by Election Commission of India to adult domiciles who have reached the age of 18<br/><br/>Ref: https://en.wikipedia.org/wiki/Voter_ID_(India)",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "InVoterRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "AbaRoutingRecognizer",
|
||||
"displayName": "ABA Routing Recognizer",
|
||||
"description": "Recognize American Banking Association (ABA) routing number.<br/><br/>Also known as routing transit number (RTN) and used to identify financial institutions and process transactions.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "AbaRoutingRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "FiPersonalIdentityCodeRecognizer",
|
||||
"displayName": "FI Personal Identity Code Recognizer",
|
||||
"description": "Recognizes and validates Finnish Personal Identity Codes (Henkilötunnus).",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "FiPersonalIdentityCodeRecognizer",
|
||||
"supportedLanguage": "fi"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SgUenRecognizer",
|
||||
"displayName": "Singaporean UEN recognizer",
|
||||
"description": "Recognize Singapore UEN (Unique Entity Number) using regex.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SgUenRecognizer"
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SpacyRecognizer",
|
||||
"displayName": "Recognizer using spaCy NLP model",
|
||||
"description": "Recognize PII entities using a spaCy NLP model.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SpacyRecognizer",
|
||||
"supportedEntities": [
|
||||
"PERSON"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"displayName": "US SSN column name",
|
||||
"name": "us_ssn",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "us_ssn_pattern_0",
|
||||
"regex": "^.*(ssn|social).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "US_SSN",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Credit card column name",
|
||||
"name": "credit_card",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "credit_card_pattern_0",
|
||||
"regex": "^.*(credit).*(card).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "CREDIT_CARD",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "US bank number column name",
|
||||
"name": "us_bank_number",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "us_bank_number_pattern_0",
|
||||
"regex": "\\b(account|acct|acc)[_-]?(number|num|no)\\b",
|
||||
"score": 0.6
|
||||
},
|
||||
{
|
||||
"name": "us_bank_number_pattern_1",
|
||||
"regex": "\\bbank[_-]?(account|number|num|no)?\\b",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "US_BANK_NUMBER",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Iban code column name",
|
||||
"name": "iban_code",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "iban_code_pattern_0",
|
||||
"regex": "\\b(account|acct|acc)[_-]?(number|num|no)\\b",
|
||||
"score": 0.6
|
||||
},
|
||||
{
|
||||
"name": "iban_code_pattern_1",
|
||||
"regex": "\\bbank[_-]?(account|number|num|no)?\\b",
|
||||
"score": 0.6
|
||||
},
|
||||
{
|
||||
"name": "iban_code_pattern_2",
|
||||
"regex": "\\biban(?:[_]?(number|code))?\\b",
|
||||
"score": 0.6
|
||||
},
|
||||
{
|
||||
"name": "iban_code_pattern_3",
|
||||
"regex": "\\bbank[_]?iban\\b",
|
||||
"score": 0.6
|
||||
},
|
||||
{
|
||||
"name": "iban_code_pattern_4",
|
||||
"regex": "\\binternational[_]?(account|bank[_]?number)\\b",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "IBAN_CODE",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Email address column name",
|
||||
"name": "email_address",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "email_address_pattern_0",
|
||||
"regex": "^(email|e-mail|mail)(.*address)?$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "EMAIL_ADDRESS",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
},
|
||||
{
|
||||
"displayName": "Person column name",
|
||||
"name": "person",
|
||||
"description": "A regex recognizer for column names",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "pattern",
|
||||
"supportedLanguage": "en",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "person_pattern_0",
|
||||
"regex": "^.*(user|client|person|first|last|maiden|nick).*(name).*$",
|
||||
"score": 0.6
|
||||
}
|
||||
],
|
||||
"supportedEntity": "PERSON",
|
||||
"regexFlags": {
|
||||
"dotAll": true,
|
||||
"multiline": true,
|
||||
"ignoreCase": true
|
||||
},
|
||||
"context": []
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "column_name"
|
||||
}
|
||||
]
|
@ -219,6 +219,8 @@ class PresidioRecognizerFactory:
|
||||
args["supported_language"] = supported_language
|
||||
if context := config.context:
|
||||
args["context"] = context
|
||||
if supported_entities := config.supportedEntities:
|
||||
args["supported_entities"] = [entity.value for entity in supported_entities]
|
||||
|
||||
return predefined_class(**args)
|
||||
|
||||
|
@ -14,7 +14,8 @@ from metadata.pii.tag_processor import TagAnalyzerGenerator, TagProcessor
|
||||
def create_pii_processor(
|
||||
metadata: OpenMetadata[Any, Any], openmetadata_config: OpenMetadataWorkflowConfig
|
||||
) -> AutoClassificationProcessor:
|
||||
if getattr(openmetadata_config.processor, "type") == "tag-pii-processor":
|
||||
processor_type = getattr(openmetadata_config.processor, "type", "tag-pii-processor")
|
||||
if processor_type == "tag-pii-processor":
|
||||
return TagProcessor(
|
||||
config=parse_workflow_config_gracefully(openmetadata_config.model_dump()),
|
||||
metadata=metadata,
|
||||
|
@ -68,7 +68,7 @@ def build_auto_classification_workflow_config(
|
||||
config={},
|
||||
),
|
||||
processor=Processor(
|
||||
type="orm-profiler",
|
||||
type="tag-pii-processor",
|
||||
config={},
|
||||
),
|
||||
workflowConfig=WorkflowConfig(
|
||||
|
@ -74,6 +74,8 @@ public interface MigrationProcess {
|
||||
|
||||
String getPostDDLScriptFilePath();
|
||||
|
||||
String getMigrationsDir();
|
||||
|
||||
// Handle Non-transactional supported SQLs here Example changes in table struct (DDL
|
||||
Map<String, QueryStatus> runSchemaChanges(boolean isForceMigration);
|
||||
|
||||
|
@ -91,6 +91,11 @@ public class MigrationProcessImpl implements MigrationProcess {
|
||||
return migrationFile.getPostDDLScriptFile();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMigrationsDir() {
|
||||
return migrationFile.getDirPath();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, QueryStatus> runSchemaChanges(boolean isForceMigration) {
|
||||
return performSqlExecutionAndUpdate(
|
||||
|
@ -0,0 +1,18 @@
|
||||
package org.openmetadata.service.migration.mysql.v1110;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openmetadata.service.migration.utils.MigrationFile;
|
||||
import org.openmetadata.service.migration.utils.v1110.MigrationProcessBase;
|
||||
|
||||
@Slf4j
|
||||
public class Migration extends MigrationProcessBase {
|
||||
public Migration(MigrationFile migrationFile) {
|
||||
super(migrationFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getQueryFormat() {
|
||||
return "UPDATE tag SET json = JSON_SET(json, '$.recognizers', CAST('%s' AS JSON)) "
|
||||
+ "WHERE JSON_EXTRACT(json, '$.fullyQualifiedName') = '%s'";
|
||||
}
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package org.openmetadata.service.migration.postgres.v1110;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openmetadata.service.migration.utils.MigrationFile;
|
||||
import org.openmetadata.service.migration.utils.v1110.MigrationProcessBase;
|
||||
|
||||
@Slf4j
|
||||
public class Migration extends MigrationProcessBase {
|
||||
public Migration(MigrationFile migrationFile) {
|
||||
super(migrationFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getQueryFormat() {
|
||||
return "UPDATE tag SET json = jsonb_set(json, '{recognizers}', '%s'::jsonb) "
|
||||
+ "WHERE json->>'fullyQualifiedName' = '%s'";
|
||||
}
|
||||
}
|
@ -154,6 +154,10 @@ public class MigrationFile implements Comparable<MigrationFile> {
|
||||
return postDDLScripts;
|
||||
}
|
||||
|
||||
public String getDirPath() {
|
||||
return this.dir.getAbsolutePath();
|
||||
}
|
||||
|
||||
private int[] convertToNumber(String version) {
|
||||
final String[] split = version.split("\\-")[0].split("\\.");
|
||||
int[] numbers = new int[split.length];
|
||||
|
@ -0,0 +1,77 @@
|
||||
package org.openmetadata.service.migration.utils.v1110;
|
||||
|
||||
import static org.openmetadata.service.util.EntityUtil.hash;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Map;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openmetadata.service.migration.QueryStatus;
|
||||
import org.openmetadata.service.migration.api.MigrationProcessImpl;
|
||||
import org.openmetadata.service.migration.utils.MigrationFile;
|
||||
|
||||
@Slf4j
|
||||
public abstract class MigrationProcessBase extends MigrationProcessImpl {
|
||||
private static final Map<String, String> PATH_BY_TAG =
|
||||
Map.of(
|
||||
"PII.Sensitive", "data/tags/Sensitive.json",
|
||||
"PII.NonSensitive", "data/tags/NonSensitive.json");
|
||||
|
||||
public MigrationProcessBase(MigrationFile migrationFile) {
|
||||
super(migrationFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, QueryStatus> runPostDDLScripts(boolean isForceMigration) {
|
||||
Map<String, QueryStatus> result = super.runPostDDLScripts(isForceMigration);
|
||||
|
||||
PATH_BY_TAG.forEach(
|
||||
(tagFqn, relativePath) -> {
|
||||
try {
|
||||
updateTagRecognizers(tagFqn, relativePath, result, isForceMigration);
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to update recognizers for tag: {}", tagFqn, e);
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void updateTagRecognizers(
|
||||
String tagFqn,
|
||||
String relativePath,
|
||||
Map<String, QueryStatus> results,
|
||||
Boolean isForceMigration)
|
||||
throws IOException {
|
||||
Path dataPath = Paths.get(this.getMigrationsDir(), relativePath);
|
||||
|
||||
if (!Files.exists(dataPath)) {
|
||||
LOG.warn("Tag data file not found: {}", dataPath);
|
||||
return;
|
||||
}
|
||||
|
||||
String jsonContent = Files.readString(dataPath);
|
||||
|
||||
String queryFormat = getQueryFormat();
|
||||
String updateQuery = String.format(queryFormat, jsonContent.replace("'", "''"), tagFqn);
|
||||
|
||||
String truncatedQuery = String.format(queryFormat, "[ ... data truncated ... ]", tagFqn);
|
||||
|
||||
try {
|
||||
handle.execute(updateQuery);
|
||||
migrationDAO.upsertServerMigrationSQL(getVersion(), truncatedQuery, hash(truncatedQuery));
|
||||
results.put(
|
||||
updateQuery, new QueryStatus(QueryStatus.Status.SUCCESS, "Successfully Executed Query"));
|
||||
} catch (Exception e) {
|
||||
String message = String.format("Failed to run sql: [%s] due to [%s]", truncatedQuery, e);
|
||||
results.put(truncatedQuery, new QueryStatus(QueryStatus.Status.FAILURE, message));
|
||||
if (!isForceMigration) {
|
||||
throw new RuntimeException(message, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract String getQueryFormat();
|
||||
}
|
@ -63,6 +63,24 @@
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"name": "SpacyRecognizer",
|
||||
"displayName": "Recognizer using spaCy NLP model",
|
||||
"description": "Recognize PII entities using a spaCy NLP model.",
|
||||
"enabled": true,
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SpacyRecognizer",
|
||||
"supportedEntities": [
|
||||
"DATE_TIME",
|
||||
"NRP",
|
||||
"LOCATION"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
},
|
||||
{
|
||||
"displayName": "Date time column name",
|
||||
"name": "date_time",
|
||||
@ -724,7 +742,10 @@
|
||||
"isSystemDefault": true,
|
||||
"recognizerConfig": {
|
||||
"type": "predefined",
|
||||
"name": "SpacyRecognizer"
|
||||
"name": "SpacyRecognizer",
|
||||
"supportedEntities": [
|
||||
"PERSON"
|
||||
]
|
||||
},
|
||||
"confidenceThreshold": 0.6,
|
||||
"target": "content"
|
||||
|
@ -70,6 +70,13 @@
|
||||
"type": "string"
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"supportedEntities": {
|
||||
"description": "PII (Personally Identifiable Information) tags for classification and detection of sensitive data",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "piiEntity.json"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["type", "name"],
|
||||
|
@ -289,6 +289,11 @@ export interface RecognizerConfig {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name?: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -261,6 +261,11 @@ export interface RecognizerConfig {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name?: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -369,6 +369,11 @@ export interface RecognizerConfig {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name?: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -435,6 +435,11 @@ export interface RecognizerConfig {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name?: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,6 +23,11 @@ export interface PredefinedRecognizer {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
/**
|
||||
* Language supported by this recognizer (ISO 639-1 code)
|
||||
*/
|
||||
@ -77,3 +82,49 @@ export enum Name {
|
||||
UsPassportRecognizer = "UsPassportRecognizer",
|
||||
UsSsnRecognizer = "UsSsnRecognizer",
|
||||
}
|
||||
|
||||
/**
|
||||
* Enum of PII (Personally Identifiable Information) tags for classification and detection
|
||||
* of sensitive data. Based on Presidio supported entities
|
||||
* (https://microsoft.github.io/presidio/supported_entities/).
|
||||
*/
|
||||
export enum PIIEntity {
|
||||
AuAbn = "AU_ABN",
|
||||
AuAcn = "AU_ACN",
|
||||
AuMedicare = "AU_MEDICARE",
|
||||
AuTfn = "AU_TFN",
|
||||
CreditCard = "CREDIT_CARD",
|
||||
Crypto = "CRYPTO",
|
||||
DateTime = "DATE_TIME",
|
||||
EmailAddress = "EMAIL_ADDRESS",
|
||||
EsNie = "ES_NIE",
|
||||
EsNif = "ES_NIF",
|
||||
FiPersonalIdentityCode = "FI_PERSONAL_IDENTITY_CODE",
|
||||
IPAddress = "IP_ADDRESS",
|
||||
IbanCode = "IBAN_CODE",
|
||||
InAadhaar = "IN_AADHAAR",
|
||||
InPan = "IN_PAN",
|
||||
InPassport = "IN_PASSPORT",
|
||||
InVehicleRegistration = "IN_VEHICLE_REGISTRATION",
|
||||
InVoter = "IN_VOTER",
|
||||
ItDriverLicense = "IT_DRIVER_LICENSE",
|
||||
ItFiscalCode = "IT_FISCAL_CODE",
|
||||
ItIdentityCard = "IT_IDENTITY_CARD",
|
||||
ItPassport = "IT_PASSPORT",
|
||||
ItVatCode = "IT_VAT_CODE",
|
||||
Location = "LOCATION",
|
||||
MedicalLicense = "MEDICAL_LICENSE",
|
||||
Nrp = "NRP",
|
||||
Person = "PERSON",
|
||||
PhoneNumber = "PHONE_NUMBER",
|
||||
PlPesel = "PL_PESEL",
|
||||
SgNricFin = "SG_NRIC_FIN",
|
||||
SgUen = "SG_UEN",
|
||||
URL = "URL",
|
||||
UkNhs = "UK_NHS",
|
||||
UsBankNumber = "US_BANK_NUMBER",
|
||||
UsDriverLicense = "US_DRIVER_LICENSE",
|
||||
UsItin = "US_ITIN",
|
||||
UsPassport = "US_PASSPORT",
|
||||
UsSsn = "US_SSN",
|
||||
}
|
||||
|
@ -206,6 +206,11 @@ export interface RecognizerConfig {
|
||||
* Name of the recognizer (defaults to class name if not provided)
|
||||
*/
|
||||
name?: Name;
|
||||
/**
|
||||
* PII (Personally Identifiable Information) tags for classification and detection of
|
||||
* sensitive data
|
||||
*/
|
||||
supportedEntities?: PIIEntity[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user