diff --git a/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/data-quality/index.md b/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/data-quality/index.md index 9080a1f63e9..25be2a323a8 100644 --- a/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/data-quality/index.md +++ b/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/data-quality/index.md @@ -61,7 +61,7 @@ Navigate to the entity you want to add a test (we currently support quality test /%} ## Step 2: Select the Test Definition -Select the type of test you want to run and set the parameters (if any) for your test case. If you have select a `column` test, you will need to select which column you want to execute your test against. Give it a name and then submit it. +Select the type of test you want to run and set the parameters (if any) for your test case. If you have selected a `column` test, you will need to select which column you want to execute your test against. Give it a name and then submit it. **Note:** if you have a profiler workflow running, you will be able to visualize some context around your column or table data. diff --git a/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/profiler/index.md b/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/profiler/index.md index f8303f6c642..2199b32c39c 100644 --- a/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/profiler/index.md +++ b/openmetadata-docs/content/v1.3.x/connectors/ingestion/workflows/profiler/index.md @@ -10,7 +10,7 @@ Learn how to configure and run the Profiler Workflow to extract Profiler data an {% note %} -During data profiling for Datalake Profiling, we drop NaN (Not a Number) values from the DataFrame using the dropna() method. However, we make an exception for null values, which are retained. This ensures that our computations are accurate while handling missing data +For Datalake Profiling, we drop NaN (Not a Number) values from the DataFrame using the dropna() method to allow metric computation. However, we make an exception for null values, which are retained. This ensures that our computations are accurate while handling missing data {% /note %} @@ -27,7 +27,7 @@ This Pipeline will be in charge of feeding the Profiler tab of the Table Entity, {% image - src="/images/v1.3/features/ingestion/workflows/profiler/profiler-summary-colomn.png" + src="/images/v1.3/features/ingestion/workflows/profiler/profiler-summary-column.png" alt="Column profile summary page" caption="Column profile summary page" /%} diff --git a/openmetadata-docs/content/v1.4.x/menu.md b/openmetadata-docs/content/v1.4.x/menu.md index 874cc3868de..3e93412de08 100644 --- a/openmetadata-docs/content/v1.4.x/menu.md +++ b/openmetadata-docs/content/v1.4.x/menu.md @@ -850,22 +850,6 @@ site_menu: url: /connectors/ingestion/workflows/dbt/setup-multiple-dbt-projects - category: Connectors / Ingestion / Workflows / dbt / dbt troubleshooting url: /connectors/ingestion/workflows/dbt/dbt-troubleshooting - - category: Connectors / Ingestion / Workflows / Profiler - url: /connectors/ingestion/workflows/profiler - - category: Connectors / Ingestion / Workflows / Profiler / Metrics - url: /connectors/ingestion/workflows/profiler/metrics - - category: Connectors / Ingestion / Workflows / Profiler / Sample Data - url: /connectors/ingestion/workflows/profiler/external-sample-data - - category: Connectors / Ingestion / Workflows / Profiler / External Workflow - url: /connectors/ingestion/workflows/profiler/external-workflow - - category: Connectors / Ingestion / Workflows / Data Quality - url: /connectors/ingestion/workflows/data-quality - - category: Connectors / Ingestion / Workflows / Data Quality / Tests - url: /connectors/ingestion/workflows/data-quality/tests - - category: Connectors / Ingestion / Workflows / Data Quality / Custom Tests - url: /connectors/ingestion/workflows/data-quality/custom-tests - - category: Connectors / Ingestion / Workflows / Data Quality / Failed Rows Sample (Collate Only) - url: /connectors/ingestion/workflows/data-quality/failed-rows-sample - category: Connectors / Ingestion / Lineage url: /connectors/ingestion/lineage - category: Connectors / Ingestion / Lineage / Edit Data Lineage Manually @@ -891,6 +875,37 @@ site_menu: - category: Connectors / Ingestion / Best Practices url: /connectors/ingestion/best-practices + + - category: Quality & Observability + url: /quality-and-observability + color: violet-70 + icon: openmetadata + + - category: Quality & Observability / Profiler + url: /quality-and-observability/profiler + - category: Quality & Observability / Profiler / Metrics + url: /quality-and-observability/profiler/metrics + - category: Quality & Observability / Profiler / Sample Data + url: /quality-and-observability/profiler/external-sample-data + - category: Quality & Observability / Profiler / External Workflow + url: /quality-and-observability/profiler/external-workflow + - category: Quality & Observability / Profiler / Auto PII Tagging + url: /quality-and-observability/profiler/auto-pii-tagging + - category: Quality & Observability / Data Quality + url: /quality-and-observability/data-quality + - category: Quality & Observability / Data Quality / Tests + url: /quality-and-observability/data-quality/tests + - category: Quality & Observability / Data Quality / Custom Tests + url: /quality-and-observability/data-quality/custom-tests + - category: Quality & Observability / Data Quality / Incident Manager + url: /quality-and-observability/data-quality/incident-manager + - category: Quality & Observability / Data Quality / Alerts + url: /quality-and-observability/data-quality/alerts + - category: Quality & Observability / Data Quality / Root Cause Analysis + url: /quality-and-observability/data-quality/root-cause-analysis + isCollateOnly: true + + - category: Main Concepts url: /main-concepts color: violet-70 diff --git a/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/alerts.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/alerts.md new file mode 100644 index 00000000000..82e821f420a --- /dev/null +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/alerts.md @@ -0,0 +1,60 @@ +--- +title: Alerts +slug: /quality-and-observability/data-quality/alerts +--- + +# Alerts +OpenMetadata provides a native way to get alerted in case of test case failure allowing you to proactively resolve data incidents + +## Setting Up Alerts +To set up an alert on a test case or test suite, navigate to the observability menu and select `Alerts` and click on `Add Alert`. + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/alerts-menu.png" + alt="Alerts Menu" + caption="Alerts Menu" + /%} + +### Setp 1 - Select a Source +The first will be to select a source. For data quality you have 2 relevant options: +- `Test Case`: it will trigger an alert for the specific test case selected +- `Test Suite`: it will trigger an alert for any test case event linked to the test suite. This is a great way to group alerts and reducing notification fatigue + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/alert-source-selection.png" + alt="Alerts Menu" + caption="Alerts Menu" + /%} + + +### Step 2 - Select a Filtering Conditon (optional) +**Note:** if you do not set any filter the alert will apply to all test cases or test suite. + +You can filter alerts based on specific condition to narrow down which test suite/test case should trigger an alert. This is interesting for user to dispatch alerts to different channels/users. + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/alerts-filter.png" + alt="Alerts Menu" + caption="Alerts Menu" + /%} + +### Step 3 - Select a Triggering Conditon +Trigger section will allow you set the condition for which an alert should be triggered + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/alerts-trigger.png" + alt="Alerts Menu" + caption="Alerts Menu" + /%} + + +### Step 4 - Select a Destination +In the destination section you will be able to select between `internal` and `external` destination: +- `internal`: allow you to select the destination as an internal user, team or admin. The subscription set to this user, team or admin will be use to dispatch the alert +- `external`: allow you to select an external destination such as a slack or teams channel + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/alerts-destination.png" + alt="Alerts Menu" + caption="Alerts Menu" + /%} \ No newline at end of file diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/custom-tests.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/custom-tests.md similarity index 99% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/custom-tests.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/custom-tests.md index 59611a79493..5fdd4065813 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/custom-tests.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/custom-tests.md @@ -1,6 +1,6 @@ --- title: Custom Tests -slug: /connectors/ingestion/workflows/data-quality/custom-tests +slug: /quality-and-observability/data-quality/custom-tests --- diff --git a/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/incident-manager.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/incident-manager.md new file mode 100644 index 00000000000..34acd572fef --- /dev/null +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/incident-manager.md @@ -0,0 +1,58 @@ +--- +title: Incident Manager +slug: /quality-and-observability/data-quality/incident-manager +--- + +# Incident Manager + +## Opening and Triagging Incidents + In v1.1.0 we introduce the ability for user to manage and triagge incidents linked to failures. When a test case fail, it will automatically open a new incident and mark it as new. if enough information is available, OpenMetadata will automatically assign a severity to the incident - note that you can override this severity. It indicates that a new failure has happened. + +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-new.png" + alt="Test suite results table" + caption="Test suite results table" + /%} + +Once an incident has been open you will be able to triagge and manage it. You can perform different actions at this stage: +- `ack`: the incident will be mark as acknoweldge, informing users that people are aware of the on going incident. +- `assign`: the incident will be marked as assigned and a task will be opened for the assignee. +- `resolved`: a new incident can directly be marked as resolved - see section below for more details + +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png" + alt="Test suite results table" + caption="Test suite results table" + /%} +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack.png" + alt="Test suite results table" + caption="Test suite results table" + /%} + +When resolving and incident a user will be required to specify the reason and add a comment. This provides context regarding the incident and helps users further understand what might have gone wrong + +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-resolved-form.png" + alt="Test suite results table" + caption="Test suite results table" + /%} + +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-resolved.png" + alt="Test suite results table" + caption="Test suite results table" + /%} + + +## Incidents Context & History + +When clicking on an open incident you will different information: +**Open Incident:** this section will show you open incidents with the timeline and any comments/collaboration that might have been happening. +**Closed Incidents:** this section will show you incidents that have been resolved in the past with the timeline and any comments/collaboration that might have been happening and the resolution reason. + +{% image + src="/images/v1.4/features/ingestion/workflows/data-quality/incident-management-page.png" + alt="Test suite results table" + caption="Test suite results table" + /%} \ No newline at end of file diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/index.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/index.md similarity index 72% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/index.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/index.md index 6dce2608191..cf21236eedf 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/index.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/index.md @@ -1,6 +1,6 @@ --- title: Data Quality -slug: /connectors/ingestion/workflows/data-quality +slug: /quality-and-observability/data-quality --- # Data Quality @@ -36,7 +36,7 @@ This section will show you how to configure and run Data Quality pipelines with ## Main Concepts ### Test Suite -Test Suites are logical container allowing you to group related Test Cases together from different tables. +Test Suites are logical container allowing you to group related Test Cases together from different tables. This is a great approach to group test case alerts and reduce alerting overload. ### Test Definition Test Definitions are generic tests definition elements specific to a test such as: @@ -52,7 +52,7 @@ Tests cases are actual test that will be ran and executed against your entity. T **Note:** you will need to make sure you have the right permission in OpenMetadata to create a test. ## Step 1: Creating a Test Case -Navigate to the entity you want to add a test (we currently support quality test only for database entity). Go to `Profiler & Data Quality` tab. From there, click on the `Add Test` button in the upper right corner and select the type of test you want to implement +Navigate to the entity you want to add a test to (we currently support quality test only for database entity). Go to `Profiler & Data Quality` tab. From there, click on the `Add Test` button in the upper right corner and select the type of test you want to implement {% image src="/images/v1.4/features/ingestion/workflows/data-quality/add-test-case.png" @@ -61,7 +61,7 @@ Navigate to the entity you want to add a test (we currently support quality test /%} ## Step 2: Select the Test Definition -Select the type of test you want to run and set the parameters (if any) for your test case. If you have select a `column` test, you will need to select which column you want to execute your test against. Give it a name and then submit it. +Select the type of test you want to run and set the parameters (if any) for your test case. If you have selected a `column` test, you will need to select which column you want to execute your test against. Give it a name and then submit it. **Note:** if you have a profiler workflow running, you will be able to visualize some context around your column or table data. @@ -87,7 +87,7 @@ If it is the first test you are creating for this entity, you'll need to set an /%} ## Adding Test Suites Through the UI -Test Suites are logical container allowing you to group related Test Cases together from different tables. +Test Suites are logical container allowing you to group related Test Cases together from different tables. This is a great way to group related test cases together and set a single alert for test case failure. **Note:** you will need to make sure you have the right permission in OpenMetadata to create a test. ### Step 1: Creating a Test Suite @@ -158,39 +158,3 @@ In the top panel, click on the white background `Data Quality` button. This will alt="Test suite results table" caption="Test suite results table" /%} - - ## Test Case Resolution Workflow - In v1.1.0 we introduce the ability for user to flag the resolution status of failed test cases. When a test case fail, it will automatically be marked as new. It indicates that a new failure has happened. - -{% image - src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-new.png" - alt="Test suite results table" - caption="Test suite results table" - /%} - -The next step for a user is to mark the new failure as `ack` (acknowledged) signifying to users that someone is looking into test failure resolution. When hovering over the resolution status user will be able to see the time (UTC) and the user who acknowledge the failure - -{% image - src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png" - alt="Test suite results table" - caption="Test suite results table" - /%} -{% image - src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack.png" - alt="Test suite results table" - caption="Test suite results table" - /%} - - Then the user is able to mark a test as `resolved`. We made it mandatory for users to 1) select a reason and 2) add a comment when resolving failed test so that knowledge can be maintain inside the platform. - -{% image - src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-resolved-form.png" - alt="Test suite results table" - caption="Test suite results table" - /%} - -{% image - src="/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-resolved.png" - alt="Test suite results table" - caption="Test suite results table" - /%} diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/failed-rows-sample.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/root-cause-analysis.md similarity index 51% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/failed-rows-sample.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/root-cause-analysis.md index 8b26946f412..da441cc1643 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/failed-rows-sample.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/root-cause-analysis.md @@ -1,13 +1,13 @@ --- -title: Failed Rows Sample (Collate only) -slug: /connectors/ingestion/workflows/data-quality/failed-rows-sample +title: Root Cause Analysis +slug: /quality-and-observability/data-quality/root-cause-analysis --- -# Failed Rows Sample (Collate Only) +# Root Cause Analysis -Some tests will produce a failed sample upon failure. This allows the platform users to -understand the nature of the failure and take corrective actions. The failed sample will -be a subset of the rows that failed the test. +## Failed Rows Sample + +Some tests will produce a failed sample upon failure. This allows the platform users to understand the nature of the failure and take corrective actions. The failed sample will be a subset of the rows that failed the test. The sample will be collected when the option `computePassedFailedRowCount` is set. @@ -21,12 +21,20 @@ The sample will be collected when the option `computePassedFailedRowCount` is se - [Column Values to Be Between](/connectors/ingestion/workflows/data-quality/tests#column-values-to-be-between) - [Column Values Lengths to Be Between](/connectors/ingestion/workflows/data-quality/tests#column-values-lengths-to-be-between) -## Example +## Deleting Sample Rows +If you wish to delete sample rows, you can do so by clicking on the three dots above the table of sample rows. This will open a window with the `Delete` option. Note that failed sample rows will automatically be deleted upon test success. {% image -src="/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/set_compute_row_count.png" +src="/images/v1.4/features/ingestion/workflows/data-quality/sample-row-failure-deletion.png" alt="set compute row count" /%} -![test definition](/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_1.png) -![failed rows sampls](/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_2.png) +## Example + +{% image +src="/images/v1.4/features/ingestion/workflows/data-quality/set_compute_row_count.png" +alt="set compute row count" +/%} + +![test definition](/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_1.png) +![failed rows sampls](/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_2.png) diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/tests.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/tests.md similarity index 99% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/tests.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/tests.md index 27ec6b5c52f..aef4d52c1bc 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/data-quality/tests.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/data-quality/tests.md @@ -1,6 +1,6 @@ --- title: Tests -slug: /connectors/ingestion/workflows/data-quality/tests +slug: /quality-and-observability/data-quality/tests --- # Test diff --git a/openmetadata-docs/content/v1.4.x/quality-and-observability/index.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/index.md new file mode 100644 index 00000000000..0083445a848 --- /dev/null +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/index.md @@ -0,0 +1,34 @@ +--- +title: Quality & Observability +slug: /quality-and-observability +--- + +# Quality & Observability with OpenMetadata + +OpenMetadata offers a simple and easy-to-use solution for quality and observability. With no code tests, observability metrics, incident management, and root cause analysis (Collate feature), you have a unified solution for discovery, governance, and observability. + +## Observability Metrics + +{%inlineCalloutContainer%} + +{%inlineCallout + icon="celebration" + bold="Observability Metrics (Profiler)" + href="/quality-and-observability/profiler" %} +Deep dive into how to set up observability metrics in OpenMetadata! +{%/inlineCallout%} + +{%/inlineCalloutContainer%} + +## Quality + +{%inlineCalloutContainer%} + +{%inlineCallout + icon="celebration" + bold="Quality" + href="/quality-and-observability/data-quality" %} +Deep dive into how to set up quality tests, alert and triagge and resolve incidents! +{%/inlineCallout%} + +{%/inlineCalloutContainer%} \ No newline at end of file diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/auto_tagging.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/auto-pii-tagging.md similarity index 97% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/auto_tagging.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/auto-pii-tagging.md index a2c55ef4e15..8bea478d152 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/auto_tagging.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/auto-pii-tagging.md @@ -1,6 +1,6 @@ --- title: Auto PII Tagging -slug: /connectors/ingestion/auto_tagging +slug: /quality-and-observability/profiler/auto-pii-tagging --- # Auto PII Tagging diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/external_workflow.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/external_workflow.md similarity index 98% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/external_workflow.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/external_workflow.md index 3d7724ecc08..c2b8067b5a7 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/external_workflow.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/external_workflow.md @@ -1,6 +1,6 @@ --- title: External Profiler Workflow -slug: /connectors/ingestion/workflows/profiler/external-workflow +slug: /quality-and-observability/profiler/external-workflow --- # External Profiler Workflow diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/index.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/index.md similarity index 81% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/index.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/index.md index b2a2c1649d6..cb244693bd5 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/index.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/index.md @@ -1,6 +1,6 @@ --- title: Profiler Workflow -slug: /connectors/ingestion/workflows/profiler +slug: /quality-and-observability/profiler --- # Profiler Workflow @@ -10,7 +10,7 @@ Learn how to configure and run the Profiler Workflow to extract Profiler data an {% note %} -During data profiling for Datalake Profiling, we drop NaN (Not a Number) values from the DataFrame using the dropna() method. However, we make an exception for null values, which are retained. This ensures that our computations are accurate while handling missing data +For Datalake Profiling, we drop NaN (Not a Number) values from the DataFrame using the dropna() method to allow metric computation. However, we make an exception for null values, which are retained. This ensures that our computations are accurate while handling missing data {% /note %} @@ -27,7 +27,7 @@ This Pipeline will be in charge of feeding the Profiler tab of the Table Entity, {% image - src="/images/v1.4/features/ingestion/workflows/profiler/profiler-summary-colomn.png" + src="/images/v1.4/features/ingestion/workflows/profiler/profiler-summary-column.png" alt="Column profile summary page" caption="Column profile summary page" /%} @@ -55,42 +55,60 @@ Here you can enter the Profiler Ingestion details. #### Profiler Options -**Name** +**Name** Define the name of the Profiler Workflow. While we only support a single workflow for the Metadata and Usage ingestion, users can define different schedules and filters for Profiler workflows. As profiling is a costly task, this enables a fine-grained approach to profiling and running tests by specifying different filters for each pipeline. -**Database filter pattern (Optional)** +**Database filter pattern (Optional)** regex expression to filter databases. -**Schema filter pattern (Optional)** +**Schema filter pattern (Optional)** regex expression to filter schemas. -**Table filter pattern (Optional)** +**Table filter pattern (Optional)** regex expression to filter tables. -**Profile Sample (Optional)** +**Profile Sample (Optional)** Set the sample to be use by the profiler for the specific table. - `Percentage`: Value must be between 0 and 100 exclusive (0 < percentage < 100). This will sample the table based on a percentage - `Row Count`: The table will be sampled based on a number of rows (i.e. `1,000`, `2,000`), etc. ⚠️ This option is currently not support for Druid. Sampling leverage `RANDOM` functions in most database (some have specific sampling functions) and Druid provides neither of these option. We recommend using the partitioning or sample query option if you need to limit the amount of data scanned. -**Auto PII Tagging (Optional)** -Configuration to automatically tag columns that might contain sensitive information. +**Enable Debug Log** +Set the Enable Debug Log toggle to set the logging level of the process to debug. You can check these logs in the Ingestion tab of the service and dig deeper into any errors you might find. -- **Confidence (Optional)** +**Include Views** +If activated the profiler will compute metric for view entity types. Note that it can have a negative impact on the profiler performance. + +**Use FQN For Filtering Views** +Set this flag when you want to apply the filters on Fully Qualified Names (e.g service_name.db_name.schema_name.table_name) instead of applying them to the raw name of the asset (e.g table_name). +This Flag is useful in scenarios when you have different schemas with same name in multiple databases, or tables with same name in different schemas, and you want to filter out only one of them. + +**Generate Sample Data** +Whether the profiler should ingest sample data + +**Compute Metrics** +Set the Compute Metrics toggle off to not perform any metric computation during the profiler ingestion workflow. Used in combination with Ingest Sample Data toggle on allows you to only ingest sample data. + +**Process Pii Sensitive (Optional)** +Configuration to automatically tag columns that might contain sensitive information. PII data will be infered from the column name. If `Generate Sample Data` is toggled on OpenMetadata will leverage machine learning to infer which column may contain PII sensitive data. + +**Advanced Configuration** + +**PII Inference Confidence LevelConfidence (Optional)** If `Auto PII Tagging` is enable, this confidence level will determine the threshold to use for OpenMetadata's NLP model to consider a column as containing PII data. -**Thread Count (Optional)** +**Sample Data Rows Count** +Set the number of rows to ingest when Ingest Sample Data toggle is on. Defaults to 50. + +**Thread Count (Optional)** Number of thread to use when computing metrics for the profiler. For Snowflake users we recommend setting it to 1. There is a known issue with one of the dependency (`snowflake-connector-python`) affecting projects with certain environments. -**Timeout in Seconds (Optional)** +**Timeout in Seconds (Optional)** This will set the duration a profiling job against a table should wait before interrupting its execution and moving on to profiling the next table. It is important to note that the profiler will wait for the hanging query to terminiate before killing the execution. If there is a risk for your profiling job to hang, it is important to also set a query/connection timeout on your database engine. The default value for the profiler timeout is 12-hours. -**Ingest Sample Data** -Whether the profiler should ingest sample data - ### 3. Schedule and Deploy After clicking Next, you will be redirected to the Scheduling form. This will be the same as the Metadata and Usage Ingestions. Select your desired schedule and click on Deploy to find the usage pipeline being added to the Service Ingestions. @@ -152,6 +170,34 @@ Once you have picked the `Interval Type` you will need to define the configurati - `Start Range`: the start of the range (inclusive) - `End Range`: the end of the range (inclusive) +### 5. Updating Profiler setting at the platform level +The behavior of the profiler can be configured at the platform level. Navigating to `Settings > Preferences > Profiler Configuration` you will find settings to adjust the behavior of the profiler. + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/profiler-global-configuration.png" + alt="table profile global settings" + caption="table profile global settings" + /%} + +**Disabling All Metric Computation for a Data Type** +Select the data type you want to disable all metric for. Then toggle disable on. When running the profiler all metric computation will be skipped for the data type. + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/disable-metric-computation.png" + alt="table profile global settings" + caption="table profile global settings" + /%} + +**Disabling Specific Metric Computation for a Data Type** +Select the data type you want to disable a metric for. Then in the `Metric Type` section select the metric you to compute (or unselect the ones you don't want to compute). When running the profiler the unselected metric will not be computed. + +{% image + src="/images/v1.4/features/ingestion/workflows/profiler/disable-specific-metric-computation.png" + alt="table profile global settings" + caption="table profile global settings" + /%} + + ## YAML Configuration In the [connectors](/connectors) section we showcase how to run the metadata ingestion from a JSON file using the Airflow SDK or the CLI via metadata ingest. Running a profiler workflow is also possible using a JSON configuration file. diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/metrics.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/metrics.md similarity index 95% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/metrics.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/metrics.md index afd443f9984..5fd296555f6 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/metrics.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/metrics.md @@ -1,6 +1,6 @@ --- title: Metrics -slug: /connectors/ingestion/workflows/profiler/metrics +slug: /quality-and-observability/profiler/metrics --- # Metrics @@ -160,6 +160,9 @@ OpenMetadata uses the `QUERY_HISTORY_BY_WAREHOUSE` view of the `INFORMATION_SCHE OpenMetadata will look at the past 24-hours to fetch the operations that were performed against a table. +**Important** +For snowflake system, the system will parse the DDL query and attempt to match `database`, `schema`, and `table` name to entities in OpenMetadata. If the DDL query does not include all 3 elements we will not be able to ingest this metric. + ### Redshift OpenMetadata uses `stl_insert`, `stl_delete`, `svv_table_info`, and `stl_querytext` to fetch DML operations as well as the number of rows affected by these operations. You need to make sure the user running the profiler workflow has access to these views and tables. diff --git a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/sample_data.md b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/sample_data.md similarity index 99% rename from openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/sample_data.md rename to openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/sample_data.md index 9b2e571a457..dc67736898d 100644 --- a/openmetadata-docs/content/v1.4.x/connectors/ingestion/workflows/profiler/sample_data.md +++ b/openmetadata-docs/content/v1.4.x/quality-and-observability/profiler/sample_data.md @@ -1,6 +1,6 @@ --- title: External Storage for Sample Data -slug: /connectors/ingestion/workflows/profiler/external-sample-data +slug: /quality-and-observability/profiler/external-sample-data --- # External Storage for Sample Data diff --git a/openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/set_compute_row_count.png b/openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/set_compute_row_count.png deleted file mode 100644 index 556f2e6dcc0..00000000000 Binary files a/openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/set_compute_row_count.png and /dev/null differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alert-source-selection.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alert-source-selection.png new file mode 100644 index 00000000000..faae02ab4af Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alert-source-selection.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-destination.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-destination.png new file mode 100644 index 00000000000..b478bd09b31 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-destination.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-filter.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-filter.png new file mode 100644 index 00000000000..209c5a1188d Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-filter.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-menu.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-menu.png new file mode 100644 index 00000000000..be48329a6a7 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-menu.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-trigger.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-trigger.png new file mode 100644 index 00000000000..78aac09c5b9 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/alerts-trigger.png differ diff --git a/openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_1.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_1.png similarity index 100% rename from openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_1.png rename to openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_1.png diff --git a/openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_2.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_2.png similarity index 100% rename from openmetadata-docs/images/v1.4/connectors/ingestion/workflows/data-quality/data-quality/failed_rows_sample_2.png rename to openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/failed_rows_sample_2.png diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/incident-management-page.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/incident-management-page.png new file mode 100644 index 00000000000..a73bf7342d1 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/incident-management-page.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png index 98e78c9d95d..e590632a3f3 100644 Binary files a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/resolution-workflow-ack-form.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/sample-row-failure-deletion.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/sample-row-failure-deletion.png new file mode 100644 index 00000000000..b70d3ec6782 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/sample-row-failure-deletion.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/set_compute_row_count.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/set_compute_row_count.png new file mode 100644 index 00000000000..17c064e6baf Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/data-quality/set_compute_row_count.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alert-source-selection.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alert-source-selection.png new file mode 100644 index 00000000000..27cbea57772 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alert-source-selection.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-destination.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-destination.png new file mode 100644 index 00000000000..a89e15c3b09 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-destination.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-filter.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-filter.png new file mode 100644 index 00000000000..209c5a1188d Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-filter.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-menu.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-menu.png new file mode 100644 index 00000000000..be48329a6a7 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-menu.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-trigger.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-trigger.png new file mode 100644 index 00000000000..9e108ea2ce1 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/alerts-trigger.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-metric-computation.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-metric-computation.png new file mode 100644 index 00000000000..5bcf68ba5ff Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-metric-computation.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-specific-metric-computation.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-specific-metric-computation.png new file mode 100644 index 00000000000..6bdce9f0e14 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/disable-specific-metric-computation.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/profiler-global-configuration.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/profiler-global-configuration.png new file mode 100644 index 00000000000..0031f729297 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/profiler-global-configuration.png differ diff --git a/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/sample-row-failure-deletion.png b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/sample-row-failure-deletion.png new file mode 100644 index 00000000000..0fb2b4dbd57 Binary files /dev/null and b/openmetadata-docs/images/v1.4/features/ingestion/workflows/profiler/sample-row-failure-deletion.png differ