From c101c7cf3060a5cd2ea016f0276abce4119d9910 Mon Sep 17 00:00:00 2001 From: Aniket Katkar Date: Wed, 27 Nov 2024 14:51:59 +0530 Subject: [PATCH] Minor: Auto classification UI (#18785) * Add the auto classification pipeline option in the service ingestion * Localization changes for other languages * Improve the logic for getSupportedPipelineTypes function and add unit tests for newly added logic * Add playwright tests for the auto classification feature * Improve the getSupportedPipelineTypes function logic to reduce the cognitive complexity * update md docs * Add classificationFilterPattern in the UI schema form order * fix logs from backend for auto classification * Changes to view the auto classification logs * Fix the sonar errors --------- Co-authored-by: Pere Miquel Brull --- .../sdk/PipelineServiceClientInterface.java | 2 + .../e2e/Features/AutoClassification.spec.ts | 118 ++++++++++++++++ .../entity/ingestion/MySqlIngestionClass.ts | 19 +-- .../entity/ingestion/ServiceBaseClass.ts | 8 +- .../ui/playwright/utils/autoClassification.ts | 88 ++++++++++++ .../Database/workflows/autoClassification.md | 126 ++++++++++++++++++ .../en-US/Database/workflows/profiler.md | 22 +-- .../ui/src/constants/Services.constant.ts | 2 + .../ui/src/locale/languages/de-de.json | 1 + .../ui/src/locale/languages/en-us.json | 1 + .../ui/src/locale/languages/es-es.json | 1 + .../ui/src/locale/languages/fr-fr.json | 1 + .../ui/src/locale/languages/gl-es.json | 1 + .../ui/src/locale/languages/he-he.json | 1 + .../ui/src/locale/languages/ja-jp.json | 1 + .../ui/src/locale/languages/nl-nl.json | 1 + .../ui/src/locale/languages/pr-pr.json | 1 + .../ui/src/locale/languages/pt-br.json | 1 + .../ui/src/locale/languages/pt-pt.json | 1 + .../ui/src/locale/languages/ru-ru.json | 1 + .../ui/src/locale/languages/th-th.json | 1 + .../ui/src/locale/languages/zh-cn.json | 1 + .../LogsViewerPage.interfaces.ts | 1 + .../pages/LogsViewerPage/LogsViewerPage.tsx | 23 ++-- .../ui/src/utils/IngestionUtils.test.tsx | 101 ++++++++++++++ .../resources/ui/src/utils/IngestionUtils.tsx | 50 +++---- .../src/utils/IngestionWorkflowUtils.test.ts | 5 + .../ui/src/utils/IngestionWorkflowUtils.ts | 7 + 28 files changed, 518 insertions(+), 68 deletions(-) create mode 100644 openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/AutoClassification.spec.ts create mode 100644 openmetadata-ui/src/main/resources/ui/playwright/utils/autoClassification.ts create mode 100644 openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/autoClassification.md create mode 100644 openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.test.tsx diff --git a/openmetadata-spec/src/main/java/org/openmetadata/sdk/PipelineServiceClientInterface.java b/openmetadata-spec/src/main/java/org/openmetadata/sdk/PipelineServiceClientInterface.java index e4065fc8853..3de12be69c7 100644 --- a/openmetadata-spec/src/main/java/org/openmetadata/sdk/PipelineServiceClientInterface.java +++ b/openmetadata-spec/src/main/java/org/openmetadata/sdk/PipelineServiceClientInterface.java @@ -52,6 +52,8 @@ public interface PipelineServiceClientInterface { "ingestion_task", PipelineType.PROFILER.toString(), "profiler_task", + PipelineType.AUTO_CLASSIFICATION.toString(), + "auto_classification_task", PipelineType.LINEAGE.toString(), "lineage_task", PipelineType.DBT.toString(), diff --git a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/AutoClassification.spec.ts b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/AutoClassification.spec.ts new file mode 100644 index 00000000000..b08cd8b4a1b --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/AutoClassification.spec.ts @@ -0,0 +1,118 @@ +/* + * Copyright 2024 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import test from '@playwright/test'; +import { PLAYWRIGHT_INGESTION_TAG_OBJ } from '../../constant/config'; +import MysqlIngestionClass from '../../support/entity/ingestion/MySqlIngestionClass'; +import { addAndTriggerAutoClassificationPipeline } from '../../utils/autoClassification'; +import { redirectToHomePage } from '../../utils/common'; +import { settingClick, SettingOptionsType } from '../../utils/sidebar'; + +const mysqlService = new MysqlIngestionClass(['sensitive_customers']); + +// use the admin user to login +test.use({ + storageState: 'playwright/.auth/admin.json', + trace: process.env.PLAYWRIGHT_IS_OSS ? 'off' : 'on-first-retry', + video: process.env.PLAYWRIGHT_IS_OSS ? 'on' : 'off', +}); + +test.describe.configure({ + // 11 minutes max for ingestion tests + timeout: 11 * 60 * 1000, +}); + +test.describe('Auto Classification', PLAYWRIGHT_INGESTION_TAG_OBJ, async () => { + test('should be able to auto classify data', async ({ page }) => { + await redirectToHomePage(page); + await settingClick( + page, + mysqlService.category as unknown as SettingOptionsType + ); + + // Create and ingest service data + await mysqlService.createService(page); + + await addAndTriggerAutoClassificationPipeline(page, mysqlService); + + // Check if the classification is successful + const getDatabases = page.waitForResponse( + (response) => + response.url().includes('/api/v1/databases?service=') && + response.request().method() === 'GET' && + response.status() === 200 + ); + + // Click on databases tab + await page.click('.ant-tabs-nav-list [data-testid="databases"]'); + + await getDatabases; + + // Click on the database name + await page + .getByTestId('child-asset-name-link') + .getByText('default') + .click(); + + await page.waitForSelector('[data-testid="cypress_integrations_test_db"]'); + + // Click on the database schema name + await page.getByTestId('cypress_integrations_test_db').click(); + + await page.waitForSelector('[data-testid="sensitive_customers"]'); + + // Click on the table name + await page.getByTestId('sensitive_customers').click(); + + // Verify the sensitive tags + await test + .expect( + page.locator( + `[data-row-key*="user_name"] [data-testid="tag-PII.Sensitive"] ` + ) + ) + .toBeAttached(); + + await test + .expect( + page.locator(`[data-row-key*="SSN"] [data-testid="tag-PII.Sensitive"] `) + ) + .toBeAttached(); + + await test + .expect( + page.locator( + `[data-row-key*="DWH_X10"] [data-testid="tag-PII.Sensitive"] ` + ) + ) + .toBeAttached(); + + mysqlService.name; + + // Verify the non sensitive tags + await test + .expect( + page.locator( + `[data-row-key*="address"] [data-testid="tag-PII.NonSensitive"] ` + ) + ) + .toBeAttached(); + + // Delete the created service + await settingClick( + page, + mysqlService.category as unknown as SettingOptionsType + ); + await mysqlService.deleteService(page); + }); +}); diff --git a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts index 1a222ae717e..d71358f6058 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts @@ -33,17 +33,18 @@ import { import ServiceBaseClass from './ServiceBaseClass'; class MysqlIngestionClass extends ServiceBaseClass { - name: string; + name = ''; tableFilter: string[]; profilerTable = 'alert_entity'; - constructor() { - super( - Services.Database, - `pw-mysql-with-%-${uuid()}`, - 'Mysql', - 'bot_entity' - ); - this.tableFilter = ['bot_entity', 'alert_entity', 'chart_entity']; + constructor(tableFilter?: string[]) { + const serviceName = `pw-mysql-with-%-${uuid()}`; + super(Services.Database, serviceName, 'Mysql', 'bot_entity'); + this.name = serviceName; + this.tableFilter = tableFilter ?? [ + 'bot_entity', + 'alert_entity', + 'chart_entity', + ]; } async createService(page: Page) { diff --git a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/ServiceBaseClass.ts b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/ServiceBaseClass.ts index 6b0afb54bcb..4de563f6313 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/ServiceBaseClass.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/ServiceBaseClass.ts @@ -98,10 +98,10 @@ class ServiceBaseClass { await testConnection(page); } - await this.submitService(this.serviceName, page); + await this.submitService(page); if (this.shouldAddIngestion) { - await this.addIngestionPipeline(this.serviceName, page); + await this.addIngestionPipeline(page); } } @@ -149,7 +149,7 @@ class ServiceBaseClass { // Handle validate ingestion details in respective service here } - async addIngestionPipeline(serviceName: string, page: Page) { + async addIngestionPipeline(page: Page) { await page.click('[data-testid="add-ingestion-button"]'); // Add ingestion page @@ -191,7 +191,7 @@ class ServiceBaseClass { await this.handleIngestionRetry('metadata', page); } - async submitService(serviceName: string, page: Page) { + async submitService(page: Page) { await page.click('[data-testid="submit-btn"]'); await page.waitForSelector('[data-testid="success-line"]', { state: 'visible', diff --git a/openmetadata-ui/src/main/resources/ui/playwright/utils/autoClassification.ts b/openmetadata-ui/src/main/resources/ui/playwright/utils/autoClassification.ts new file mode 100644 index 00000000000..0a0190261da --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/playwright/utils/autoClassification.ts @@ -0,0 +1,88 @@ +/* + * Copyright 2024 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Page } from '@playwright/test'; +import MysqlIngestionClass from '../support/entity/ingestion/MySqlIngestionClass'; +import { getApiContext, toastNotification } from './common'; +import { visitServiceDetailsPage } from './service'; + +export const addAndTriggerAutoClassificationPipeline = async ( + page: Page, + mysqlService: MysqlIngestionClass +) => { + const { apiContext } = await getApiContext(page); + + await visitServiceDetailsPage( + page, + { + type: mysqlService.category, + name: mysqlService.name, + displayName: mysqlService.name, + }, + true + ); + + // Add auto classification ingestion + await page.click('[data-testid="ingestions"]'); + + await page.click('[data-testid="add-new-ingestion-button"]'); + + await page.waitForSelector('[data-menu-id*="autoClassification"]'); + + await page.click('[data-menu-id*="autoClassification"]'); + + // Fill the auto classification form details + await page.waitForSelector('#root\\/tableFilterPattern\\/includes'); + + await mysqlService.fillIngestionDetails(page); + + await page.click('#root\\/enableAutoClassification'); + + await page.click('[data-testid="submit-btn"]'); + + // Make sure we create ingestion with None schedule to avoid conflict between Airflow and Argo behavior + await mysqlService.scheduleIngestion(page); + + await page.click('[data-testid="view-service-button"]'); + + // Header available once page loads + await page.getByTestId('loader').waitFor({ state: 'detached' }); + await page.getByTestId('ingestions').click(); + await page + .getByLabel('Ingestions') + .getByTestId('loader') + .waitFor({ state: 'detached' }); + + const response = await apiContext + .get( + `/api/v1/services/ingestionPipelines?service=${encodeURIComponent( + mysqlService.name + )}&pipelineType=autoClassification&serviceType=databaseService&limit=1` + ) + .then((res) => res.json()); + + // need manual wait to settle down the deployed pipeline, before triggering the pipeline + await page.waitForTimeout(3000); + + await page.click( + `[data-row-key*="${response.data[0].name}"] [data-testid="more-actions"]` + ); + await page.getByTestId('run-button').click(); + + await toastNotification(page, `Pipeline triggered successfully!`); + + // need manual wait to make sure we are awaiting on latest run results + await page.waitForTimeout(2000); + + await mysqlService.handleIngestionRetry('autoClassification', page); +}; diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/autoClassification.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/autoClassification.md new file mode 100644 index 00000000000..fc83c0ff351 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/autoClassification.md @@ -0,0 +1,126 @@ +# Auto Classification + +Auto Classification Pipeline Configuration. + +The main goal of this pipeline is bringing in Sample Data from your sources, as well as using NLP models to +automatically classify your data based on PII (Personally Identifiable Information) and other sensitive information. + +## Configuration + +$$section +### Database Filter Pattern $(id="databaseFilterPattern") + +Database filter patterns to control whether to include database as part of metadata ingestion. + +**Include**: Explicitly include databases by adding a list of regular expressions to the `Include` field. OpenMetadata will include all databases with names matching one or more of the supplied regular expressions. All other databases will be excluded. + +For example, to include only those databases whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`. + +**Exclude**: Explicitly exclude databases by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all databases with names matching one or more of the supplied regular expressions. All other databases will be included. + +For example, to exclude all databases with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`. + +Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on database filter patterns. +$$ + +$$section +### Schema Filter Pattern $(id="schemaFilterPattern") + +Schema filter patterns are used to control whether to include schemas as part of metadata ingestion. + +**Include**: Explicitly include schemas by adding a list of regular expressions to the `Include` field. OpenMetadata will include all schemas with names matching one or more of the supplied regular expressions. All other schemas will be excluded. + +For example, to include only those schemas whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`. + +**Exclude**: Explicitly exclude schemas by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all schemas with names matching one or more of the supplied regular expressions. All other schemas will be included. + +For example, to exclude all schemas with the name containing the word `demo`, add regex pattern in the exclude field as `.*demo.*`. + +Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on schema filter patterns. +$$ + +$$section +### Table Filter Pattern $(id="tableFilterPattern") + +Table filter patterns are used to control whether to include tables as part of metadata ingestion. + +**Include**: Explicitly include tables by adding a list of regular expressions to the `Include` field. OpenMetadata will include all tables with names matching one or more of the supplied regular expressions. All other tables will be excluded. + +For example, to include only those tables whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`. + +**Exclude**: Explicitly exclude tables by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all tables with names matching one or more of the supplied regular expressions. All other tables will be included. + +For example, to exclude all tables with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`. + +Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on table filter patterns. +$$ + +$$section +### Enable Debug Logs $(id="enableDebugLog") + +Set the `Enable Debug Log` toggle to set the logging level of the process to debug. You can check these logs in the Ingestion tab of the service and dig deeper into any errors you might find. +$$ + +$$section +### Include Views $(id="includeViews") +If activated the profiler will compute metric for view entity types. Note that it can have a negative impact on the profiler performance. +$$ + +$$section +### Use FQN For Filtering Views $(id="useFqnForFiltering") +Set this flag when you want to apply the filters on Fully Qualified Names (e.g `service_name.db_name.schema_name.table_name`) instead of applying them to the raw name of the asset (e.g `table_name`). + +This Flag is useful in scenarios when you have different schemas with same name in multiple databases, or tables with same name in different schemas, and you want to filter out only one of them. + +Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on how to use this field. +$$ + + +$$section +### Store Sample Data $(id="storeSampleData") + +Set the Store Sample Data toggle to control whether to store sample data as part of Auto Classification workflow. If this is enabled, 100 rows will be ingested by default. You can update the number of rows in the "DatabaseServiceProfilerPipeline Advanced Config" section (i.e. `Sample Data Rows Count` setting). + +If disabled, OpenMetadata will not store any sample data, but will still use it on-the-fly to compute the Auto Classification. +$$ + +$$section +### Enable Auto Classification $(id="enableAutoClassification") + +Set the Enable Auto Classification toggle to control whether to automatically tag columns that might contain sensitive information. + +Use the `Confidence` setting to set the confidence level when inferring the tags for a column. +$$ + +$$section +### PII Inference Confidence Level $(id="confidence") +Confidence level to use when inferring whether a column should be applied the classification or not (between 0 and 100). A number closer to 100 will yield less false positive but potentially more false negative. +$$ + +$$section +### Profile Sample Type $(id="profileSampleType") +The sample type can be set to either: + +* **Percentage**: this will use a percentage to sample the table (e.g. if table has 100 rows, and we set sample percentage tp 50%, the profiler will use 50 random rows to compute the metrics). +* **Row Count**: this will use a number of rows to sample the table (e.g. if table has 100 rows, and we set row count to 10, the profiler will use 10 random rows to compute the metrics). + $$ + +$$section +### Profile Sample $(id="profileSample") +Percentage of data or number of rows to use when sampling tables to compute the profiler metrics. By default (i.e. if left blank), the profiler will run against the entire table. +$$ + +$$section +### Sampling Method Type $(id="samplingMethodType") + +**This parameter is effective for Snowflake only** + +The sampling method type can be set to **BERNOULLI** or **SYSTEM**. You can find the difference of two values in the document of the Snowflake. When you choice **BERNOULLI**, it will scan full rows in the table even though small value is set at the **Profile Sample**. However, it has less restlictions than **SYSTEM**. + +If no option is chosen, the default is **BERNOULLI**. +$$ + +$$section +### Sample Data Rows Count $(id="sampleDataCount") +Set the number of rows to ingest when `Ingest Sample Data` toggle is on. Defaults to 50. +$$ diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/profiler.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/profiler.md index 253fa8542c2..4f716d3e44f 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/profiler.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/workflows/profiler.md @@ -74,27 +74,12 @@ This Flag is useful in scenarios when you have different schemas with same name Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on how to use this field. $$ -$$section -### Ingest Sample Data $(id="generateSampleData") - -Set the Ingest Sample Data toggle to control whether to ingest sample data as part of profiler ingestion. If this is enabled, 100 rows will be ingested by default. You can update the number of rows in the "DatabaseServiceProfilerPipeline Advanced Config" section (i.e. `Sample Data Rows Count` setting). -$$ - $$section ### Compute Metrics $(id="computeMetrics") Set the `Compute Metrics` toggle off to not perform any metric computation during the profiler ingestion workflow. Used in combination with `Ingest Sample Data` toggle on allows you to only ingest sample data. $$ -$$section -### Auto Tag PII $(id="processPiiSensitive") - -Set the `Auto Tag PII` toggle to control whether to automatically tag columns that might contain sensitive information as part of profiler ingestion. - -If `Ingest Sample Data` is enabled, OpenMetadata will leverage machine learning to infer which column may contain PII sensitive data. If disabled, OpenMetadata will infer this information from the column name. Use the `Confidence` setting in the "DatabaseServiceProfilerPipeline Advanced Config" to set the confience level when infering the PII status of a column. -$$ - - $$section ### Profile Sample Type $(id="profileSampleType") The sample type can be set to either: @@ -115,12 +100,7 @@ $$section The sampling method type can be set to **BERNOULLI** or **SYSTEM**. You can find the difference of two values in the document of the Snowflake. When you choice **BERNOULLI**, it will scan full rows in the table even though small value is set at the **Profile Sample**. However, it has less restlictions than **SYSTEM**. -If no option is choiced, the default is **BERNOULLI**. -$$ - -$$section -### PII Inference Confidence Level $(id="confidence") -Confidence level to use when infering whether a column shoul be flagged as PII or not (between 0 and 100). A number closer to 100 will yield less false positive but potentially more false negative. +If no option is chosen, the default is **BERNOULLI**. $$ $$section diff --git a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts index da8b7ef85f8..7b415b948dc 100644 --- a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts +++ b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts @@ -323,6 +323,7 @@ export const INGESTION_WORKFLOW_UI_SCHEMA = { 'databaseFilterPattern', 'schemaFilterPattern', 'tableFilterPattern', + 'classificationFilterPattern', 'enableDebugLog', '*', ], @@ -493,5 +494,6 @@ export const SERVICE_INGESTION_PIPELINE_TYPES = [ PipelineType.Usage, PipelineType.Lineage, PipelineType.Profiler, + PipelineType.AutoClassification, PipelineType.Dbt, ]; diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json index 075c327fd96..e2a9564b212 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json @@ -115,6 +115,7 @@ "authentication-uri": "Authentifizierungs-URI", "authority": "Behörde", "authorize-app": "Authorize {{app}}", + "auto-classification": "Automatische Klassifizierung", "auto-pii-confidence-score": "Auto PII-Vertrauensscore", "auto-tag-pii-uppercase": "Auto PII-Tag", "automatically-generate": "Automatisch generieren", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json index e55e0e28aa0..8d150c97f42 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json @@ -115,6 +115,7 @@ "authentication-uri": "Authentication URI", "authority": "Authority", "authorize-app": "Authorize {{app}}", + "auto-classification": "Auto Classification", "auto-pii-confidence-score": "Auto PII Confidence Score", "auto-tag-pii-uppercase": "Auto Tag PII", "automatically-generate": "Automatically Generate", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json index adce69ecd72..399adb1786d 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json @@ -115,6 +115,7 @@ "authentication-uri": "URI de autenticación", "authority": "Autoridad", "authorize-app": "Autorizar {{app}}", + "auto-classification": "Clasificación automática", "auto-pii-confidence-score": "Nivel de Confianza de Auto PII", "auto-tag-pii-uppercase": "Etiqueta de información personal identificable automática", "automatically-generate": "Generar automáticamente", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json index 51f87d2283b..6df0cd1ca7e 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json @@ -115,6 +115,7 @@ "authentication-uri": "URI d'Authentification", "authority": "Autorité", "authorize-app": "Authorize {{app}}", + "auto-classification": "Classification Automatique", "auto-pii-confidence-score": "Score de Confiance Auto PII", "auto-tag-pii-uppercase": "Balise Auto PII", "automatically-generate": "Générer Automatiquement", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json index 5c24cc04e4d..2762cad96c0 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json @@ -115,6 +115,7 @@ "authentication-uri": "URI de autenticación", "authority": "Autoridade", "authorize-app": "Autorizar {{app}}", + "auto-classification": "Clasificación automática", "auto-pii-confidence-score": "Puntuación de confianza automática de PII", "auto-tag-pii-uppercase": "Etiquetado automático de PII", "automatically-generate": "Xerar automaticamente", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json index f0e13a3ee9a..68169146c27 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json @@ -115,6 +115,7 @@ "authentication-uri": "URI אימות", "authority": "רשות", "authorize-app": "אמת את {{app}}", + "auto-classification": "סיווג אוטומטי", "auto-pii-confidence-score": "ציון ביטחון PII אוטומטי", "auto-tag-pii-uppercase": "תיוג PII אוטומטי", "automatically-generate": "צור באופן אוטומטי", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json index b2b02f7b9c8..8d610313d94 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json @@ -115,6 +115,7 @@ "authentication-uri": "認証URI", "authority": "Authority", "authorize-app": "Authorize {{app}}", + "auto-classification": "自動分類", "auto-pii-confidence-score": "Auto PII Confidence Score", "auto-tag-pii-uppercase": "自動PIIタグ", "automatically-generate": "自動生成", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json index 67fdab03b92..8ea7a551228 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json @@ -115,6 +115,7 @@ "authentication-uri": "Authenticatie-URI", "authority": "Autoriteit", "authorize-app": "Applicatie autoriseren {{app}}", + "auto-classification": "Automatische classificatie", "auto-pii-confidence-score": "Automatische PII-vertrouwensscore", "auto-tag-pii-uppercase": "Automatisch taggen van PII", "automatically-generate": "Automatisch genereren", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json index 5595dac5602..1c1e47b9ec7 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json @@ -115,6 +115,7 @@ "authentication-uri": "آدرس URI احراز هویت", "authority": "مرجع", "authorize-app": "مجوز دادن به {{app}}", + "auto-classification": "طبقه‌بندی خودکار", "auto-pii-confidence-score": "امتیاز اعتماد PII خودکار", "auto-tag-pii-uppercase": "برچسب PII خودکار", "automatically-generate": "تولید خودکار", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json index 5afc59b10c7..4f42b5d45c7 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json @@ -115,6 +115,7 @@ "authentication-uri": "URI de Autenticação", "authority": "Autoridade", "authorize-app": "Autorizar {{app}}", + "auto-classification": "Classificação Automática", "auto-pii-confidence-score": "Pontuação de Confiança Automática PII", "auto-tag-pii-uppercase": "Auto Tag PII", "automatically-generate": "Gerar Automaticamente", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json index 531e4268c43..ba4ba1eea95 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json @@ -115,6 +115,7 @@ "authentication-uri": "URI de Autenticação", "authority": "Autoridade", "authorize-app": "Autorizar {{app}}", + "auto-classification": "Classificação Automática", "auto-pii-confidence-score": "Pontuação de Confiança Automática PII", "auto-tag-pii-uppercase": "Etiqueta Automática PII", "automatically-generate": "Gerar Automaticamente", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json index cee404982cd..29c89cd19c5 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json @@ -115,6 +115,7 @@ "authentication-uri": "URI аутентификации", "authority": "Власть", "authorize-app": "Authorize {{app}}", + "auto-classification": "Автоклассификация", "auto-pii-confidence-score": "Оценка достоверности Auto PII", "auto-tag-pii-uppercase": "Автотег PII", "automatically-generate": "Автоматически генерировать", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json index 6a2db111906..a874fea3583 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json @@ -115,6 +115,7 @@ "authentication-uri": "URI การรับรอง", "authority": "อำนาจ", "authorize-app": "อนุญาต {{app}}", + "auto-classification": "การจำแนกประเภทอัตโนมัติ", "auto-pii-confidence-score": "คะแนนความมั่นใจ PII อัตโนมัติ", "auto-tag-pii-uppercase": "แท็ก PII อัตโนมัติ", "automatically-generate": "สร้างโดยอัตโนมัติ", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json index 09d93a1eec5..820c976160b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json @@ -115,6 +115,7 @@ "authentication-uri": "鉴权 URI", "authority": "授权", "authorize-app": "授权{{app}}", + "auto-classification": "自动分类", "auto-pii-confidence-score": "自动计算 PII 信任值", "auto-tag-pii-uppercase": "自动标记 PII", "automatically-generate": "自动生成", diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.interfaces.ts b/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.interfaces.ts index f3c6e41797e..661a63cb565 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.interfaces.ts +++ b/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.interfaces.ts @@ -21,6 +21,7 @@ export interface IngestionPipelineLogByIdInterface { data_insight_task?: string; dbt_task?: string; elasticsearch_reindex_task?: string; + auto_classification_task?: string; total?: string; after?: string; } diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.tsx b/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.tsx index a195ad44608..530ce44a358 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/pages/LogsViewerPage/LogsViewerPage.tsx @@ -112,40 +112,45 @@ const LogsViewerPage = () => { switch (pipelineType || ingestionDetails?.pipelineType) { case PipelineType.Metadata: - setLogs(logs.concat(res.data?.ingestion_task || '')); + setLogs(logs.concat(res.data?.ingestion_task ?? '')); break; case PipelineType.Application: - setLogs(logs.concat(res.data?.application_task || '')); + setLogs(logs.concat(res.data?.application_task ?? '')); break; case PipelineType.Profiler: - setLogs(logs.concat(res.data?.profiler_task || '')); + setLogs(logs.concat(res.data?.profiler_task ?? '')); break; case PipelineType.Usage: - setLogs(logs.concat(res.data?.usage_task || '')); + setLogs(logs.concat(res.data?.usage_task ?? '')); break; case PipelineType.Lineage: - setLogs(logs.concat(res.data?.lineage_task || '')); + setLogs(logs.concat(res.data?.lineage_task ?? '')); break; case PipelineType.Dbt: - setLogs(logs.concat(res.data?.dbt_task || '')); + setLogs(logs.concat(res.data?.dbt_task ?? '')); break; case PipelineType.TestSuite: - setLogs(logs.concat(res.data?.test_suite_task || '')); + setLogs(logs.concat(res.data?.test_suite_task ?? '')); break; case PipelineType.DataInsight: - setLogs(logs.concat(res.data?.data_insight_task || '')); + setLogs(logs.concat(res.data?.data_insight_task ?? '')); break; case PipelineType.ElasticSearchReindex: - setLogs(logs.concat(res.data?.elasticsearch_reindex_task || '')); + setLogs(logs.concat(res.data?.elasticsearch_reindex_task ?? '')); + + break; + + case PipelineType.AutoClassification: + setLogs(logs.concat(res.data?.auto_classification_task ?? '')); break; diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.test.tsx b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.test.tsx new file mode 100644 index 00000000000..451eacaa839 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.test.tsx @@ -0,0 +1,101 @@ +/* + * Copyright 2024 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { PipelineType } from '../generated/api/services/ingestionPipelines/createIngestionPipeline'; +import { DatabaseServiceType } from '../generated/entity/services/databaseService'; +import { MetadataServiceType } from '../generated/entity/services/metadataService'; +import { ServicesType } from '../interface/service.interface'; +import { getSupportedPipelineTypes } from './IngestionUtils'; + +describe('getSupportedPipelineTypes', () => { + it('should return only return metadata pipeline types if config is undefined', () => { + const serviceDetails = {}; + const result = getSupportedPipelineTypes(serviceDetails as ServicesType); + + expect(result).toEqual([PipelineType.Metadata]); + }); + + it('should return supported pipeline types based on config', () => { + const serviceDetails: ServicesType = { + id: '', + name: '', + serviceType: DatabaseServiceType.Athena, + connection: { + config: { + supportsMetadataExtraction: true, + supportsUsageExtraction: true, + supportsLineageExtraction: true, + supportsProfiler: true, + supportsDBTExtraction: true, + supportsViewLineageExtraction: true, + }, + }, + }; + const result = getSupportedPipelineTypes(serviceDetails); + + expect(result).toEqual([ + PipelineType.Metadata, + PipelineType.Usage, + PipelineType.Lineage, + PipelineType.Profiler, + PipelineType.AutoClassification, + PipelineType.Dbt, + ]); + }); + + it('should return empty array if no pipeline types are supported', () => { + const serviceDetails = { + id: '', + name: '', + serviceType: DatabaseServiceType.Athena, + connection: { + config: {}, + }, + }; + const result = getSupportedPipelineTypes(serviceDetails); + + expect(result).toEqual([]); + }); + + it('should include DataInsight if supportsDataInsightExtraction is true', () => { + const serviceDetails: ServicesType = { + id: '', + name: '', + serviceType: MetadataServiceType.Alation, + connection: { + config: { + supportsDataInsightExtraction: true, + }, + }, + }; + const result = getSupportedPipelineTypes(serviceDetails); + + expect(result).toContain(PipelineType.DataInsight); + }); + + it('should include ElasticSearchReindex if supportsElasticSearchReindexingExtraction is true', () => { + const serviceDetails = { + id: '', + name: '', + serviceType: MetadataServiceType.AlationSink, + connection: { + config: { + supportsElasticSearchReindexingExtraction: true, + }, + }, + }; + const result = getSupportedPipelineTypes(serviceDetails); + + expect(result).toContain(PipelineType.ElasticSearchReindex); + }); +}); diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.tsx b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.tsx index e3ac7317cf7..9e50c736e00 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionUtils.tsx @@ -14,7 +14,7 @@ import { Typography } from 'antd'; import { ExpandableConfig } from 'antd/lib/table/interface'; import { t } from 'i18next'; -import { isEmpty, isUndefined, startCase } from 'lodash'; +import { isEmpty, isUndefined, startCase, uniq } from 'lodash'; import { ServiceTypes } from 'Models'; import React from 'react'; import ErrorPlaceHolder from '../components/common/ErrorWithPlaceholder/ErrorPlaceHolder'; @@ -44,7 +44,6 @@ import { IngestionPipeline, StepSummary, } from '../generated/entity/services/ingestionPipelines/ingestionPipeline'; -import { Connection as MetadataConnection } from '../generated/entity/services/metadataService'; import { SearchSourceAlias } from '../interface/search.interface'; import { DataObj, ServicesType } from '../interface/service.interface'; import { Transi18next } from './CommonUtils'; @@ -141,32 +140,33 @@ export const getBreadCrumbsArray = ( }; export const getSupportedPipelineTypes = (serviceDetails: ServicesType) => { - let pipelineType = []; + const pipelineType: PipelineType[] = []; const config = serviceDetails?.connection?.config as Connection; - if (config) { - config?.supportsMetadataExtraction && - pipelineType.push(PipelineType.Metadata); - config?.supportsUsageExtraction && pipelineType.push(PipelineType.Usage); - (config?.supportsLineageExtraction || - config?.supportsViewLineageExtraction) && - pipelineType.push(PipelineType.Lineage); - config?.supportsProfiler && pipelineType.push(PipelineType.Profiler); - config?.supportsDBTExtraction && pipelineType.push(PipelineType.Dbt); - (config as MetadataConnection)?.supportsDataInsightExtraction && - pipelineType.push(PipelineType.DataInsight); - (config as MetadataConnection)?.supportsElasticSearchReindexingExtraction && - pipelineType.push(PipelineType.ElasticSearchReindex); - } else { - pipelineType = [ - PipelineType.Metadata, - PipelineType.Usage, - PipelineType.Lineage, - PipelineType.Profiler, - PipelineType.Dbt, - ]; + + if (isUndefined(config)) { + return [PipelineType.Metadata]; } - return pipelineType; + const pipelineMapping: { [key: string]: PipelineType[] } = { + supportsMetadataExtraction: [PipelineType.Metadata], + supportsUsageExtraction: [PipelineType.Usage], + supportsLineageExtraction: [PipelineType.Lineage], + supportsViewLineageExtraction: [PipelineType.Lineage], + supportsProfiler: [PipelineType.Profiler, PipelineType.AutoClassification], + supportsDBTExtraction: [PipelineType.Dbt], + supportsDataInsightExtraction: [PipelineType.DataInsight], + supportsElasticSearchReindexingExtraction: [ + PipelineType.ElasticSearchReindex, + ], + }; + + Object.keys(pipelineMapping).forEach((key) => { + if (config[key as keyof Connection]) { + pipelineType.push(...pipelineMapping[key]); + } + }); + + return uniq(pipelineType); }; export const getIngestionTypes = ( diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.test.ts b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.test.ts index 2b9df829bfd..ae816511033 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.test.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.test.ts @@ -104,10 +104,15 @@ describe('Ingestion Workflow tests', () => { WorkflowType.Usage, ServiceCategory.PIPELINE_SERVICES ); + const autoClassificationSchema = getSchemaByWorkflowType( + WorkflowType.AutoClassification, + ServiceCategory.DATABASE_SERVICES + ); expect(metadataSchema).toBeDefined(); expect(profilerSchema).toBeDefined(); expect(usageSchema).toBeDefined(); + expect(autoClassificationSchema).toBeDefined(); }); it('should getSchemaByWorkflowType return a default object with for an unknown workflow type', () => { diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.ts b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.ts index ec38b82d1df..88edea79e9f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/IngestionWorkflowUtils.ts @@ -19,6 +19,7 @@ import { } from '../generated/api/services/ingestionPipelines/createIngestionPipeline'; import apiServiceMetadataPipeline from '../jsons/ingestionSchemas/apiServiceMetadataPipeline.json'; import dashboardMetadataPipeline from '../jsons/ingestionSchemas/dashboardServiceMetadataPipeline.json'; +import databaseAutoClassificationPipeline from '../jsons/ingestionSchemas/databaseServiceAutoClassificationPipeline.json'; import databaseMetadataPipeline from '../jsons/ingestionSchemas/databaseServiceMetadataPipeline.json'; import databaseProfilerPipeline from '../jsons/ingestionSchemas/databaseServiceProfilerPipeline.json'; import databaseLineagePipeline from '../jsons/ingestionSchemas/databaseServiceQueryLineagePipeline.json'; @@ -98,6 +99,12 @@ export const getSchemaByWorkflowType = ( ...databaseProfilerPipeline, }; + break; + case WorkflowType.AutoClassification: + schema = { + ...databaseAutoClassificationPipeline, + }; + break; case WorkflowType.Usage: schema = {