mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-31 04:14:34 +00:00
Minor: Auto classification UI (#18785)
* Add the auto classification pipeline option in the service ingestion * Localization changes for other languages * Improve the logic for getSupportedPipelineTypes function and add unit tests for newly added logic * Add playwright tests for the auto classification feature * Improve the getSupportedPipelineTypes function logic to reduce the cognitive complexity * update md docs * Add classificationFilterPattern in the UI schema form order * fix logs from backend for auto classification * Changes to view the auto classification logs * Fix the sonar errors --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
This commit is contained in:
parent
e8031bcc0e
commit
c101c7cf30
@ -52,6 +52,8 @@ public interface PipelineServiceClientInterface {
|
||||
"ingestion_task",
|
||||
PipelineType.PROFILER.toString(),
|
||||
"profiler_task",
|
||||
PipelineType.AUTO_CLASSIFICATION.toString(),
|
||||
"auto_classification_task",
|
||||
PipelineType.LINEAGE.toString(),
|
||||
"lineage_task",
|
||||
PipelineType.DBT.toString(),
|
||||
|
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright 2024 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import test from '@playwright/test';
|
||||
import { PLAYWRIGHT_INGESTION_TAG_OBJ } from '../../constant/config';
|
||||
import MysqlIngestionClass from '../../support/entity/ingestion/MySqlIngestionClass';
|
||||
import { addAndTriggerAutoClassificationPipeline } from '../../utils/autoClassification';
|
||||
import { redirectToHomePage } from '../../utils/common';
|
||||
import { settingClick, SettingOptionsType } from '../../utils/sidebar';
|
||||
|
||||
const mysqlService = new MysqlIngestionClass(['sensitive_customers']);
|
||||
|
||||
// use the admin user to login
|
||||
test.use({
|
||||
storageState: 'playwright/.auth/admin.json',
|
||||
trace: process.env.PLAYWRIGHT_IS_OSS ? 'off' : 'on-first-retry',
|
||||
video: process.env.PLAYWRIGHT_IS_OSS ? 'on' : 'off',
|
||||
});
|
||||
|
||||
test.describe.configure({
|
||||
// 11 minutes max for ingestion tests
|
||||
timeout: 11 * 60 * 1000,
|
||||
});
|
||||
|
||||
test.describe('Auto Classification', PLAYWRIGHT_INGESTION_TAG_OBJ, async () => {
|
||||
test('should be able to auto classify data', async ({ page }) => {
|
||||
await redirectToHomePage(page);
|
||||
await settingClick(
|
||||
page,
|
||||
mysqlService.category as unknown as SettingOptionsType
|
||||
);
|
||||
|
||||
// Create and ingest service data
|
||||
await mysqlService.createService(page);
|
||||
|
||||
await addAndTriggerAutoClassificationPipeline(page, mysqlService);
|
||||
|
||||
// Check if the classification is successful
|
||||
const getDatabases = page.waitForResponse(
|
||||
(response) =>
|
||||
response.url().includes('/api/v1/databases?service=') &&
|
||||
response.request().method() === 'GET' &&
|
||||
response.status() === 200
|
||||
);
|
||||
|
||||
// Click on databases tab
|
||||
await page.click('.ant-tabs-nav-list [data-testid="databases"]');
|
||||
|
||||
await getDatabases;
|
||||
|
||||
// Click on the database name
|
||||
await page
|
||||
.getByTestId('child-asset-name-link')
|
||||
.getByText('default')
|
||||
.click();
|
||||
|
||||
await page.waitForSelector('[data-testid="cypress_integrations_test_db"]');
|
||||
|
||||
// Click on the database schema name
|
||||
await page.getByTestId('cypress_integrations_test_db').click();
|
||||
|
||||
await page.waitForSelector('[data-testid="sensitive_customers"]');
|
||||
|
||||
// Click on the table name
|
||||
await page.getByTestId('sensitive_customers').click();
|
||||
|
||||
// Verify the sensitive tags
|
||||
await test
|
||||
.expect(
|
||||
page.locator(
|
||||
`[data-row-key*="user_name"] [data-testid="tag-PII.Sensitive"] `
|
||||
)
|
||||
)
|
||||
.toBeAttached();
|
||||
|
||||
await test
|
||||
.expect(
|
||||
page.locator(`[data-row-key*="SSN"] [data-testid="tag-PII.Sensitive"] `)
|
||||
)
|
||||
.toBeAttached();
|
||||
|
||||
await test
|
||||
.expect(
|
||||
page.locator(
|
||||
`[data-row-key*="DWH_X10"] [data-testid="tag-PII.Sensitive"] `
|
||||
)
|
||||
)
|
||||
.toBeAttached();
|
||||
|
||||
mysqlService.name;
|
||||
|
||||
// Verify the non sensitive tags
|
||||
await test
|
||||
.expect(
|
||||
page.locator(
|
||||
`[data-row-key*="address"] [data-testid="tag-PII.NonSensitive"] `
|
||||
)
|
||||
)
|
||||
.toBeAttached();
|
||||
|
||||
// Delete the created service
|
||||
await settingClick(
|
||||
page,
|
||||
mysqlService.category as unknown as SettingOptionsType
|
||||
);
|
||||
await mysqlService.deleteService(page);
|
||||
});
|
||||
});
|
@ -33,17 +33,18 @@ import {
|
||||
import ServiceBaseClass from './ServiceBaseClass';
|
||||
|
||||
class MysqlIngestionClass extends ServiceBaseClass {
|
||||
name: string;
|
||||
name = '';
|
||||
tableFilter: string[];
|
||||
profilerTable = 'alert_entity';
|
||||
constructor() {
|
||||
super(
|
||||
Services.Database,
|
||||
`pw-mysql-with-%-${uuid()}`,
|
||||
'Mysql',
|
||||
'bot_entity'
|
||||
);
|
||||
this.tableFilter = ['bot_entity', 'alert_entity', 'chart_entity'];
|
||||
constructor(tableFilter?: string[]) {
|
||||
const serviceName = `pw-mysql-with-%-${uuid()}`;
|
||||
super(Services.Database, serviceName, 'Mysql', 'bot_entity');
|
||||
this.name = serviceName;
|
||||
this.tableFilter = tableFilter ?? [
|
||||
'bot_entity',
|
||||
'alert_entity',
|
||||
'chart_entity',
|
||||
];
|
||||
}
|
||||
|
||||
async createService(page: Page) {
|
||||
|
@ -98,10 +98,10 @@ class ServiceBaseClass {
|
||||
await testConnection(page);
|
||||
}
|
||||
|
||||
await this.submitService(this.serviceName, page);
|
||||
await this.submitService(page);
|
||||
|
||||
if (this.shouldAddIngestion) {
|
||||
await this.addIngestionPipeline(this.serviceName, page);
|
||||
await this.addIngestionPipeline(page);
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,7 +149,7 @@ class ServiceBaseClass {
|
||||
// Handle validate ingestion details in respective service here
|
||||
}
|
||||
|
||||
async addIngestionPipeline(serviceName: string, page: Page) {
|
||||
async addIngestionPipeline(page: Page) {
|
||||
await page.click('[data-testid="add-ingestion-button"]');
|
||||
|
||||
// Add ingestion page
|
||||
@ -191,7 +191,7 @@ class ServiceBaseClass {
|
||||
await this.handleIngestionRetry('metadata', page);
|
||||
}
|
||||
|
||||
async submitService(serviceName: string, page: Page) {
|
||||
async submitService(page: Page) {
|
||||
await page.click('[data-testid="submit-btn"]');
|
||||
await page.waitForSelector('[data-testid="success-line"]', {
|
||||
state: 'visible',
|
||||
|
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright 2024 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { Page } from '@playwright/test';
|
||||
import MysqlIngestionClass from '../support/entity/ingestion/MySqlIngestionClass';
|
||||
import { getApiContext, toastNotification } from './common';
|
||||
import { visitServiceDetailsPage } from './service';
|
||||
|
||||
export const addAndTriggerAutoClassificationPipeline = async (
|
||||
page: Page,
|
||||
mysqlService: MysqlIngestionClass
|
||||
) => {
|
||||
const { apiContext } = await getApiContext(page);
|
||||
|
||||
await visitServiceDetailsPage(
|
||||
page,
|
||||
{
|
||||
type: mysqlService.category,
|
||||
name: mysqlService.name,
|
||||
displayName: mysqlService.name,
|
||||
},
|
||||
true
|
||||
);
|
||||
|
||||
// Add auto classification ingestion
|
||||
await page.click('[data-testid="ingestions"]');
|
||||
|
||||
await page.click('[data-testid="add-new-ingestion-button"]');
|
||||
|
||||
await page.waitForSelector('[data-menu-id*="autoClassification"]');
|
||||
|
||||
await page.click('[data-menu-id*="autoClassification"]');
|
||||
|
||||
// Fill the auto classification form details
|
||||
await page.waitForSelector('#root\\/tableFilterPattern\\/includes');
|
||||
|
||||
await mysqlService.fillIngestionDetails(page);
|
||||
|
||||
await page.click('#root\\/enableAutoClassification');
|
||||
|
||||
await page.click('[data-testid="submit-btn"]');
|
||||
|
||||
// Make sure we create ingestion with None schedule to avoid conflict between Airflow and Argo behavior
|
||||
await mysqlService.scheduleIngestion(page);
|
||||
|
||||
await page.click('[data-testid="view-service-button"]');
|
||||
|
||||
// Header available once page loads
|
||||
await page.getByTestId('loader').waitFor({ state: 'detached' });
|
||||
await page.getByTestId('ingestions').click();
|
||||
await page
|
||||
.getByLabel('Ingestions')
|
||||
.getByTestId('loader')
|
||||
.waitFor({ state: 'detached' });
|
||||
|
||||
const response = await apiContext
|
||||
.get(
|
||||
`/api/v1/services/ingestionPipelines?service=${encodeURIComponent(
|
||||
mysqlService.name
|
||||
)}&pipelineType=autoClassification&serviceType=databaseService&limit=1`
|
||||
)
|
||||
.then((res) => res.json());
|
||||
|
||||
// need manual wait to settle down the deployed pipeline, before triggering the pipeline
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
await page.click(
|
||||
`[data-row-key*="${response.data[0].name}"] [data-testid="more-actions"]`
|
||||
);
|
||||
await page.getByTestId('run-button').click();
|
||||
|
||||
await toastNotification(page, `Pipeline triggered successfully!`);
|
||||
|
||||
// need manual wait to make sure we are awaiting on latest run results
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
await mysqlService.handleIngestionRetry('autoClassification', page);
|
||||
};
|
@ -0,0 +1,126 @@
|
||||
# Auto Classification
|
||||
|
||||
Auto Classification Pipeline Configuration.
|
||||
|
||||
The main goal of this pipeline is bringing in Sample Data from your sources, as well as using NLP models to
|
||||
automatically classify your data based on PII (Personally Identifiable Information) and other sensitive information.
|
||||
|
||||
## Configuration
|
||||
|
||||
$$section
|
||||
### Database Filter Pattern $(id="databaseFilterPattern")
|
||||
|
||||
Database filter patterns to control whether to include database as part of metadata ingestion.
|
||||
|
||||
**Include**: Explicitly include databases by adding a list of regular expressions to the `Include` field. OpenMetadata will include all databases with names matching one or more of the supplied regular expressions. All other databases will be excluded.
|
||||
|
||||
For example, to include only those databases whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`.
|
||||
|
||||
**Exclude**: Explicitly exclude databases by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all databases with names matching one or more of the supplied regular expressions. All other databases will be included.
|
||||
|
||||
For example, to exclude all databases with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`.
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on database filter patterns.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Schema Filter Pattern $(id="schemaFilterPattern")
|
||||
|
||||
Schema filter patterns are used to control whether to include schemas as part of metadata ingestion.
|
||||
|
||||
**Include**: Explicitly include schemas by adding a list of regular expressions to the `Include` field. OpenMetadata will include all schemas with names matching one or more of the supplied regular expressions. All other schemas will be excluded.
|
||||
|
||||
For example, to include only those schemas whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`.
|
||||
|
||||
**Exclude**: Explicitly exclude schemas by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all schemas with names matching one or more of the supplied regular expressions. All other schemas will be included.
|
||||
|
||||
For example, to exclude all schemas with the name containing the word `demo`, add regex pattern in the exclude field as `.*demo.*`.
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on schema filter patterns.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Table Filter Pattern $(id="tableFilterPattern")
|
||||
|
||||
Table filter patterns are used to control whether to include tables as part of metadata ingestion.
|
||||
|
||||
**Include**: Explicitly include tables by adding a list of regular expressions to the `Include` field. OpenMetadata will include all tables with names matching one or more of the supplied regular expressions. All other tables will be excluded.
|
||||
|
||||
For example, to include only those tables whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`.
|
||||
|
||||
**Exclude**: Explicitly exclude tables by adding a list of regular expressions to the `Exclude` field. OpenMetadata will exclude all tables with names matching one or more of the supplied regular expressions. All other tables will be included.
|
||||
|
||||
For example, to exclude all tables with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`.
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on table filter patterns.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Enable Debug Logs $(id="enableDebugLog")
|
||||
|
||||
Set the `Enable Debug Log` toggle to set the logging level of the process to debug. You can check these logs in the Ingestion tab of the service and dig deeper into any errors you might find.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Include Views $(id="includeViews")
|
||||
If activated the profiler will compute metric for view entity types. Note that it can have a negative impact on the profiler performance.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Use FQN For Filtering Views $(id="useFqnForFiltering")
|
||||
Set this flag when you want to apply the filters on Fully Qualified Names (e.g `service_name.db_name.schema_name.table_name`) instead of applying them to the raw name of the asset (e.g `table_name`).
|
||||
|
||||
This Flag is useful in scenarios when you have different schemas with same name in multiple databases, or tables with same name in different schemas, and you want to filter out only one of them.
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on how to use this field.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Store Sample Data $(id="storeSampleData")
|
||||
|
||||
Set the Store Sample Data toggle to control whether to store sample data as part of Auto Classification workflow. If this is enabled, 100 rows will be ingested by default. You can update the number of rows in the "DatabaseServiceProfilerPipeline Advanced Config" section (i.e. `Sample Data Rows Count` setting).
|
||||
|
||||
If disabled, OpenMetadata will not store any sample data, but will still use it on-the-fly to compute the Auto Classification.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Enable Auto Classification $(id="enableAutoClassification")
|
||||
|
||||
Set the Enable Auto Classification toggle to control whether to automatically tag columns that might contain sensitive information.
|
||||
|
||||
Use the `Confidence` setting to set the confidence level when inferring the tags for a column.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### PII Inference Confidence Level $(id="confidence")
|
||||
Confidence level to use when inferring whether a column should be applied the classification or not (between 0 and 100). A number closer to 100 will yield less false positive but potentially more false negative.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Profile Sample Type $(id="profileSampleType")
|
||||
The sample type can be set to either:
|
||||
|
||||
* **Percentage**: this will use a percentage to sample the table (e.g. if table has 100 rows, and we set sample percentage tp 50%, the profiler will use 50 random rows to compute the metrics).
|
||||
* **Row Count**: this will use a number of rows to sample the table (e.g. if table has 100 rows, and we set row count to 10, the profiler will use 10 random rows to compute the metrics).
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Profile Sample $(id="profileSample")
|
||||
Percentage of data or number of rows to use when sampling tables to compute the profiler metrics. By default (i.e. if left blank), the profiler will run against the entire table.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Sampling Method Type $(id="samplingMethodType")
|
||||
|
||||
**This parameter is effective for Snowflake only**
|
||||
|
||||
The sampling method type can be set to **BERNOULLI** or **SYSTEM**. You can find the difference of two values in the document of the Snowflake. When you choice **BERNOULLI**, it will scan full rows in the table even though small value is set at the **Profile Sample**. However, it has less restlictions than **SYSTEM**.
|
||||
|
||||
If no option is chosen, the default is **BERNOULLI**.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Sample Data Rows Count $(id="sampleDataCount")
|
||||
Set the number of rows to ingest when `Ingest Sample Data` toggle is on. Defaults to 50.
|
||||
$$
|
@ -74,27 +74,12 @@ This Flag is useful in scenarios when you have different schemas with same name
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#table-filter-pattern) document for further examples on how to use this field.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Ingest Sample Data $(id="generateSampleData")
|
||||
|
||||
Set the Ingest Sample Data toggle to control whether to ingest sample data as part of profiler ingestion. If this is enabled, 100 rows will be ingested by default. You can update the number of rows in the "DatabaseServiceProfilerPipeline Advanced Config" section (i.e. `Sample Data Rows Count` setting).
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Compute Metrics $(id="computeMetrics")
|
||||
|
||||
Set the `Compute Metrics` toggle off to not perform any metric computation during the profiler ingestion workflow. Used in combination with `Ingest Sample Data` toggle on allows you to only ingest sample data.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Auto Tag PII $(id="processPiiSensitive")
|
||||
|
||||
Set the `Auto Tag PII` toggle to control whether to automatically tag columns that might contain sensitive information as part of profiler ingestion.
|
||||
|
||||
If `Ingest Sample Data` is enabled, OpenMetadata will leverage machine learning to infer which column may contain PII sensitive data. If disabled, OpenMetadata will infer this information from the column name. Use the `Confidence` setting in the "DatabaseServiceProfilerPipeline Advanced Config" to set the confience level when infering the PII status of a column.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Profile Sample Type $(id="profileSampleType")
|
||||
The sample type can be set to either:
|
||||
@ -115,12 +100,7 @@ $$section
|
||||
|
||||
The sampling method type can be set to **BERNOULLI** or **SYSTEM**. You can find the difference of two values in the document of the Snowflake. When you choice **BERNOULLI**, it will scan full rows in the table even though small value is set at the **Profile Sample**. However, it has less restlictions than **SYSTEM**.
|
||||
|
||||
If no option is choiced, the default is **BERNOULLI**.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### PII Inference Confidence Level $(id="confidence")
|
||||
Confidence level to use when infering whether a column shoul be flagged as PII or not (between 0 and 100). A number closer to 100 will yield less false positive but potentially more false negative.
|
||||
If no option is chosen, the default is **BERNOULLI**.
|
||||
$$
|
||||
|
||||
$$section
|
||||
|
@ -323,6 +323,7 @@ export const INGESTION_WORKFLOW_UI_SCHEMA = {
|
||||
'databaseFilterPattern',
|
||||
'schemaFilterPattern',
|
||||
'tableFilterPattern',
|
||||
'classificationFilterPattern',
|
||||
'enableDebugLog',
|
||||
'*',
|
||||
],
|
||||
@ -493,5 +494,6 @@ export const SERVICE_INGESTION_PIPELINE_TYPES = [
|
||||
PipelineType.Usage,
|
||||
PipelineType.Lineage,
|
||||
PipelineType.Profiler,
|
||||
PipelineType.AutoClassification,
|
||||
PipelineType.Dbt,
|
||||
];
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "Authentifizierungs-URI",
|
||||
"authority": "Behörde",
|
||||
"authorize-app": "Authorize {{app}}",
|
||||
"auto-classification": "Automatische Klassifizierung",
|
||||
"auto-pii-confidence-score": "Auto PII-Vertrauensscore",
|
||||
"auto-tag-pii-uppercase": "Auto PII-Tag",
|
||||
"automatically-generate": "Automatisch generieren",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "Authentication URI",
|
||||
"authority": "Authority",
|
||||
"authorize-app": "Authorize {{app}}",
|
||||
"auto-classification": "Auto Classification",
|
||||
"auto-pii-confidence-score": "Auto PII Confidence Score",
|
||||
"auto-tag-pii-uppercase": "Auto Tag PII",
|
||||
"automatically-generate": "Automatically Generate",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI de autenticación",
|
||||
"authority": "Autoridad",
|
||||
"authorize-app": "Autorizar {{app}}",
|
||||
"auto-classification": "Clasificación automática",
|
||||
"auto-pii-confidence-score": "Nivel de Confianza de Auto PII",
|
||||
"auto-tag-pii-uppercase": "Etiqueta de información personal identificable automática",
|
||||
"automatically-generate": "Generar automáticamente",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI d'Authentification",
|
||||
"authority": "Autorité",
|
||||
"authorize-app": "Authorize {{app}}",
|
||||
"auto-classification": "Classification Automatique",
|
||||
"auto-pii-confidence-score": "Score de Confiance Auto PII",
|
||||
"auto-tag-pii-uppercase": "Balise Auto PII",
|
||||
"automatically-generate": "Générer Automatiquement",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI de autenticación",
|
||||
"authority": "Autoridade",
|
||||
"authorize-app": "Autorizar {{app}}",
|
||||
"auto-classification": "Clasificación automática",
|
||||
"auto-pii-confidence-score": "Puntuación de confianza automática de PII",
|
||||
"auto-tag-pii-uppercase": "Etiquetado automático de PII",
|
||||
"automatically-generate": "Xerar automaticamente",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI אימות",
|
||||
"authority": "רשות",
|
||||
"authorize-app": "אמת את {{app}}",
|
||||
"auto-classification": "סיווג אוטומטי",
|
||||
"auto-pii-confidence-score": "ציון ביטחון PII אוטומטי",
|
||||
"auto-tag-pii-uppercase": "תיוג PII אוטומטי",
|
||||
"automatically-generate": "צור באופן אוטומטי",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "認証URI",
|
||||
"authority": "Authority",
|
||||
"authorize-app": "Authorize {{app}}",
|
||||
"auto-classification": "自動分類",
|
||||
"auto-pii-confidence-score": "Auto PII Confidence Score",
|
||||
"auto-tag-pii-uppercase": "自動PIIタグ",
|
||||
"automatically-generate": "自動生成",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "Authenticatie-URI",
|
||||
"authority": "Autoriteit",
|
||||
"authorize-app": "Applicatie autoriseren {{app}}",
|
||||
"auto-classification": "Automatische classificatie",
|
||||
"auto-pii-confidence-score": "Automatische PII-vertrouwensscore",
|
||||
"auto-tag-pii-uppercase": "Automatisch taggen van PII",
|
||||
"automatically-generate": "Automatisch genereren",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "آدرس URI احراز هویت",
|
||||
"authority": "مرجع",
|
||||
"authorize-app": "مجوز دادن به {{app}}",
|
||||
"auto-classification": "طبقهبندی خودکار",
|
||||
"auto-pii-confidence-score": "امتیاز اعتماد PII خودکار",
|
||||
"auto-tag-pii-uppercase": "برچسب PII خودکار",
|
||||
"automatically-generate": "تولید خودکار",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI de Autenticação",
|
||||
"authority": "Autoridade",
|
||||
"authorize-app": "Autorizar {{app}}",
|
||||
"auto-classification": "Classificação Automática",
|
||||
"auto-pii-confidence-score": "Pontuação de Confiança Automática PII",
|
||||
"auto-tag-pii-uppercase": "Auto Tag PII",
|
||||
"automatically-generate": "Gerar Automaticamente",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI de Autenticação",
|
||||
"authority": "Autoridade",
|
||||
"authorize-app": "Autorizar {{app}}",
|
||||
"auto-classification": "Classificação Automática",
|
||||
"auto-pii-confidence-score": "Pontuação de Confiança Automática PII",
|
||||
"auto-tag-pii-uppercase": "Etiqueta Automática PII",
|
||||
"automatically-generate": "Gerar Automaticamente",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI аутентификации",
|
||||
"authority": "Власть",
|
||||
"authorize-app": "Authorize {{app}}",
|
||||
"auto-classification": "Автоклассификация",
|
||||
"auto-pii-confidence-score": "Оценка достоверности Auto PII",
|
||||
"auto-tag-pii-uppercase": "Автотег PII",
|
||||
"automatically-generate": "Автоматически генерировать",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "URI การรับรอง",
|
||||
"authority": "อำนาจ",
|
||||
"authorize-app": "อนุญาต {{app}}",
|
||||
"auto-classification": "การจำแนกประเภทอัตโนมัติ",
|
||||
"auto-pii-confidence-score": "คะแนนความมั่นใจ PII อัตโนมัติ",
|
||||
"auto-tag-pii-uppercase": "แท็ก PII อัตโนมัติ",
|
||||
"automatically-generate": "สร้างโดยอัตโนมัติ",
|
||||
|
@ -115,6 +115,7 @@
|
||||
"authentication-uri": "鉴权 URI",
|
||||
"authority": "授权",
|
||||
"authorize-app": "授权{{app}}",
|
||||
"auto-classification": "自动分类",
|
||||
"auto-pii-confidence-score": "自动计算 PII 信任值",
|
||||
"auto-tag-pii-uppercase": "自动标记 PII",
|
||||
"automatically-generate": "自动生成",
|
||||
|
@ -21,6 +21,7 @@ export interface IngestionPipelineLogByIdInterface {
|
||||
data_insight_task?: string;
|
||||
dbt_task?: string;
|
||||
elasticsearch_reindex_task?: string;
|
||||
auto_classification_task?: string;
|
||||
total?: string;
|
||||
after?: string;
|
||||
}
|
||||
|
@ -112,40 +112,45 @@ const LogsViewerPage = () => {
|
||||
|
||||
switch (pipelineType || ingestionDetails?.pipelineType) {
|
||||
case PipelineType.Metadata:
|
||||
setLogs(logs.concat(res.data?.ingestion_task || ''));
|
||||
setLogs(logs.concat(res.data?.ingestion_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.Application:
|
||||
setLogs(logs.concat(res.data?.application_task || ''));
|
||||
setLogs(logs.concat(res.data?.application_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.Profiler:
|
||||
setLogs(logs.concat(res.data?.profiler_task || ''));
|
||||
setLogs(logs.concat(res.data?.profiler_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.Usage:
|
||||
setLogs(logs.concat(res.data?.usage_task || ''));
|
||||
setLogs(logs.concat(res.data?.usage_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.Lineage:
|
||||
setLogs(logs.concat(res.data?.lineage_task || ''));
|
||||
setLogs(logs.concat(res.data?.lineage_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.Dbt:
|
||||
setLogs(logs.concat(res.data?.dbt_task || ''));
|
||||
setLogs(logs.concat(res.data?.dbt_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.TestSuite:
|
||||
setLogs(logs.concat(res.data?.test_suite_task || ''));
|
||||
setLogs(logs.concat(res.data?.test_suite_task ?? ''));
|
||||
|
||||
break;
|
||||
case PipelineType.DataInsight:
|
||||
setLogs(logs.concat(res.data?.data_insight_task || ''));
|
||||
setLogs(logs.concat(res.data?.data_insight_task ?? ''));
|
||||
|
||||
break;
|
||||
|
||||
case PipelineType.ElasticSearchReindex:
|
||||
setLogs(logs.concat(res.data?.elasticsearch_reindex_task || ''));
|
||||
setLogs(logs.concat(res.data?.elasticsearch_reindex_task ?? ''));
|
||||
|
||||
break;
|
||||
|
||||
case PipelineType.AutoClassification:
|
||||
setLogs(logs.concat(res.data?.auto_classification_task ?? ''));
|
||||
|
||||
break;
|
||||
|
||||
|
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright 2024 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { PipelineType } from '../generated/api/services/ingestionPipelines/createIngestionPipeline';
|
||||
import { DatabaseServiceType } from '../generated/entity/services/databaseService';
|
||||
import { MetadataServiceType } from '../generated/entity/services/metadataService';
|
||||
import { ServicesType } from '../interface/service.interface';
|
||||
import { getSupportedPipelineTypes } from './IngestionUtils';
|
||||
|
||||
describe('getSupportedPipelineTypes', () => {
|
||||
it('should return only return metadata pipeline types if config is undefined', () => {
|
||||
const serviceDetails = {};
|
||||
const result = getSupportedPipelineTypes(serviceDetails as ServicesType);
|
||||
|
||||
expect(result).toEqual([PipelineType.Metadata]);
|
||||
});
|
||||
|
||||
it('should return supported pipeline types based on config', () => {
|
||||
const serviceDetails: ServicesType = {
|
||||
id: '',
|
||||
name: '',
|
||||
serviceType: DatabaseServiceType.Athena,
|
||||
connection: {
|
||||
config: {
|
||||
supportsMetadataExtraction: true,
|
||||
supportsUsageExtraction: true,
|
||||
supportsLineageExtraction: true,
|
||||
supportsProfiler: true,
|
||||
supportsDBTExtraction: true,
|
||||
supportsViewLineageExtraction: true,
|
||||
},
|
||||
},
|
||||
};
|
||||
const result = getSupportedPipelineTypes(serviceDetails);
|
||||
|
||||
expect(result).toEqual([
|
||||
PipelineType.Metadata,
|
||||
PipelineType.Usage,
|
||||
PipelineType.Lineage,
|
||||
PipelineType.Profiler,
|
||||
PipelineType.AutoClassification,
|
||||
PipelineType.Dbt,
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return empty array if no pipeline types are supported', () => {
|
||||
const serviceDetails = {
|
||||
id: '',
|
||||
name: '',
|
||||
serviceType: DatabaseServiceType.Athena,
|
||||
connection: {
|
||||
config: {},
|
||||
},
|
||||
};
|
||||
const result = getSupportedPipelineTypes(serviceDetails);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should include DataInsight if supportsDataInsightExtraction is true', () => {
|
||||
const serviceDetails: ServicesType = {
|
||||
id: '',
|
||||
name: '',
|
||||
serviceType: MetadataServiceType.Alation,
|
||||
connection: {
|
||||
config: {
|
||||
supportsDataInsightExtraction: true,
|
||||
},
|
||||
},
|
||||
};
|
||||
const result = getSupportedPipelineTypes(serviceDetails);
|
||||
|
||||
expect(result).toContain(PipelineType.DataInsight);
|
||||
});
|
||||
|
||||
it('should include ElasticSearchReindex if supportsElasticSearchReindexingExtraction is true', () => {
|
||||
const serviceDetails = {
|
||||
id: '',
|
||||
name: '',
|
||||
serviceType: MetadataServiceType.AlationSink,
|
||||
connection: {
|
||||
config: {
|
||||
supportsElasticSearchReindexingExtraction: true,
|
||||
},
|
||||
},
|
||||
};
|
||||
const result = getSupportedPipelineTypes(serviceDetails);
|
||||
|
||||
expect(result).toContain(PipelineType.ElasticSearchReindex);
|
||||
});
|
||||
});
|
@ -14,7 +14,7 @@
|
||||
import { Typography } from 'antd';
|
||||
import { ExpandableConfig } from 'antd/lib/table/interface';
|
||||
import { t } from 'i18next';
|
||||
import { isEmpty, isUndefined, startCase } from 'lodash';
|
||||
import { isEmpty, isUndefined, startCase, uniq } from 'lodash';
|
||||
import { ServiceTypes } from 'Models';
|
||||
import React from 'react';
|
||||
import ErrorPlaceHolder from '../components/common/ErrorWithPlaceholder/ErrorPlaceHolder';
|
||||
@ -44,7 +44,6 @@ import {
|
||||
IngestionPipeline,
|
||||
StepSummary,
|
||||
} from '../generated/entity/services/ingestionPipelines/ingestionPipeline';
|
||||
import { Connection as MetadataConnection } from '../generated/entity/services/metadataService';
|
||||
import { SearchSourceAlias } from '../interface/search.interface';
|
||||
import { DataObj, ServicesType } from '../interface/service.interface';
|
||||
import { Transi18next } from './CommonUtils';
|
||||
@ -141,32 +140,33 @@ export const getBreadCrumbsArray = (
|
||||
};
|
||||
|
||||
export const getSupportedPipelineTypes = (serviceDetails: ServicesType) => {
|
||||
let pipelineType = [];
|
||||
const pipelineType: PipelineType[] = [];
|
||||
const config = serviceDetails?.connection?.config as Connection;
|
||||
if (config) {
|
||||
config?.supportsMetadataExtraction &&
|
||||
pipelineType.push(PipelineType.Metadata);
|
||||
config?.supportsUsageExtraction && pipelineType.push(PipelineType.Usage);
|
||||
(config?.supportsLineageExtraction ||
|
||||
config?.supportsViewLineageExtraction) &&
|
||||
pipelineType.push(PipelineType.Lineage);
|
||||
config?.supportsProfiler && pipelineType.push(PipelineType.Profiler);
|
||||
config?.supportsDBTExtraction && pipelineType.push(PipelineType.Dbt);
|
||||
(config as MetadataConnection)?.supportsDataInsightExtraction &&
|
||||
pipelineType.push(PipelineType.DataInsight);
|
||||
(config as MetadataConnection)?.supportsElasticSearchReindexingExtraction &&
|
||||
pipelineType.push(PipelineType.ElasticSearchReindex);
|
||||
} else {
|
||||
pipelineType = [
|
||||
PipelineType.Metadata,
|
||||
PipelineType.Usage,
|
||||
PipelineType.Lineage,
|
||||
PipelineType.Profiler,
|
||||
PipelineType.Dbt,
|
||||
];
|
||||
|
||||
if (isUndefined(config)) {
|
||||
return [PipelineType.Metadata];
|
||||
}
|
||||
|
||||
return pipelineType;
|
||||
const pipelineMapping: { [key: string]: PipelineType[] } = {
|
||||
supportsMetadataExtraction: [PipelineType.Metadata],
|
||||
supportsUsageExtraction: [PipelineType.Usage],
|
||||
supportsLineageExtraction: [PipelineType.Lineage],
|
||||
supportsViewLineageExtraction: [PipelineType.Lineage],
|
||||
supportsProfiler: [PipelineType.Profiler, PipelineType.AutoClassification],
|
||||
supportsDBTExtraction: [PipelineType.Dbt],
|
||||
supportsDataInsightExtraction: [PipelineType.DataInsight],
|
||||
supportsElasticSearchReindexingExtraction: [
|
||||
PipelineType.ElasticSearchReindex,
|
||||
],
|
||||
};
|
||||
|
||||
Object.keys(pipelineMapping).forEach((key) => {
|
||||
if (config[key as keyof Connection]) {
|
||||
pipelineType.push(...pipelineMapping[key]);
|
||||
}
|
||||
});
|
||||
|
||||
return uniq(pipelineType);
|
||||
};
|
||||
|
||||
export const getIngestionTypes = (
|
||||
|
@ -104,10 +104,15 @@ describe('Ingestion Workflow tests', () => {
|
||||
WorkflowType.Usage,
|
||||
ServiceCategory.PIPELINE_SERVICES
|
||||
);
|
||||
const autoClassificationSchema = getSchemaByWorkflowType(
|
||||
WorkflowType.AutoClassification,
|
||||
ServiceCategory.DATABASE_SERVICES
|
||||
);
|
||||
|
||||
expect(metadataSchema).toBeDefined();
|
||||
expect(profilerSchema).toBeDefined();
|
||||
expect(usageSchema).toBeDefined();
|
||||
expect(autoClassificationSchema).toBeDefined();
|
||||
});
|
||||
|
||||
it('should getSchemaByWorkflowType return a default object with for an unknown workflow type', () => {
|
||||
|
@ -19,6 +19,7 @@ import {
|
||||
} from '../generated/api/services/ingestionPipelines/createIngestionPipeline';
|
||||
import apiServiceMetadataPipeline from '../jsons/ingestionSchemas/apiServiceMetadataPipeline.json';
|
||||
import dashboardMetadataPipeline from '../jsons/ingestionSchemas/dashboardServiceMetadataPipeline.json';
|
||||
import databaseAutoClassificationPipeline from '../jsons/ingestionSchemas/databaseServiceAutoClassificationPipeline.json';
|
||||
import databaseMetadataPipeline from '../jsons/ingestionSchemas/databaseServiceMetadataPipeline.json';
|
||||
import databaseProfilerPipeline from '../jsons/ingestionSchemas/databaseServiceProfilerPipeline.json';
|
||||
import databaseLineagePipeline from '../jsons/ingestionSchemas/databaseServiceQueryLineagePipeline.json';
|
||||
@ -98,6 +99,12 @@ export const getSchemaByWorkflowType = (
|
||||
...databaseProfilerPipeline,
|
||||
};
|
||||
|
||||
break;
|
||||
case WorkflowType.AutoClassification:
|
||||
schema = {
|
||||
...databaseAutoClassificationPipeline,
|
||||
};
|
||||
|
||||
break;
|
||||
case WorkflowType.Usage:
|
||||
schema = {
|
||||
|
Loading…
x
Reference in New Issue
Block a user