Docs: Updating datalake & dbt Cloud docs (#17983)

Co-authored-by: Prajwal Pandit <prajwalpandit@Prajwals-MacBook-Air.local>
This commit is contained in:
Prajwal214 2024-09-25 10:49:44 +05:30 committed by GitHub
parent 8dd6a84d1f
commit 30a091b466
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
49 changed files with 1307 additions and 815 deletions

View File

@ -1,12 +1,12 @@
{% connectorsListContainer %}
{% connectorInfoCard name="ADLS Datalake" stage="PROD" href="/connectors/database/adls-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Athena" stage="PROD" href="/connectors/database/athena" platform="OpenMetadata" / %}
{% connectorInfoCard name="AzureSQL" stage="PROD" href="/connectors/database/azuresql" platform="OpenMetadata" / %}
{% connectorInfoCard name="BigQuery" stage="PROD" href="/connectors/database/bigquery" platform="OpenMetadata" / %}
{% connectorInfoCard name="BigTable" stage="BETA" href="/connectors/database/bigtable" platform="OpenMetadata" / %}
{% connectorInfoCard name="Clickhouse" stage="PROD" href="/connectors/database/clickhouse" platform="OpenMetadata" / %}
{% connectorInfoCard name="Couchbase" stage="BETA" href="/connectors/database/couchbase" platform="OpenMetadata" / %}
{% connectorInfoCard name="Datalake" stage="PROD" href="/connectors/database/datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Databricks" stage="PROD" href="/connectors/database/databricks" platform="OpenMetadata" / %}
{% connectorInfoCard name="DB2" stage="PROD" href="/connectors/database/db2" platform="OpenMetadata" / %}
{% connectorInfoCard name="Delta Lake" stage="PROD" href="/connectors/database/deltalake" platform="OpenMetadata" / %}
@ -14,6 +14,7 @@
{% connectorInfoCard name="Doris" stage="PROD" href="/connectors/database/doris" platform="OpenMetadata" / %}
{% connectorInfoCard name="Druid" stage="PROD" href="/connectors/database/druid" platform="OpenMetadata" / %}
{% connectorInfoCard name="DynamoDB" stage="PROD" href="/connectors/database/dynamodb" platform="OpenMetadata" / %}
{% connectorInfoCard name="GCS Datalake" stage="PROD" href="/connectors/database/gcs-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Glue" stage="PROD" href="/connectors/database/glue" platform="OpenMetadata" / %}
{% connectorInfoCard name="Greenplum" stage="BETA" href="/connectors/database/greenplum" platform="OpenMetadata" / %}
{% connectorInfoCard name="Hive" stage="PROD" href="/connectors/database/hive" platform="OpenMetadata" / %}
@ -34,6 +35,7 @@
{% connectorInfoCard name="SingleStore" stage="PROD" href="/connectors/database/singlestore" platform="OpenMetadata" / %}
{% connectorInfoCard name="Snowflake" stage="PROD" href="/connectors/database/snowflake" platform="OpenMetadata" / %}
{% connectorInfoCard name="SQLite" stage="PROD" href="/connectors/database/sqlite" platform="OpenMetadata" / %}
{% connectorInfoCard name="S3 Datalake" stage="PROD" href="/connectors/database/s3-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Teradata" stage="PROD" href="/connectors/database/teradata" platform="OpenMetadata" / %}
{% connectorInfoCard name="Trino" stage="PROD" href="/connectors/database/trino" platform="OpenMetadata" / %}
{% connectorInfoCard name="Unity Catalog" stage="PROD" href="/connectors/database/unity-catalog" platform="OpenMetadata" / %}

View File

@ -1,6 +1,6 @@
{% connectorsListContainer %}
{% connectorInfoCard name="S3" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="S3 Storage" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="ADLS" stage="PROD" href="/connectors/storage/adls" platform="Collate" / %}
{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="Collate" / %}

View File

@ -1,12 +1,12 @@
{% connectorsListContainer %}
{% connectorInfoCard name="ADLS Datalake" stage="PROD" href="/connectors/database/adls-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Athena" stage="PROD" href="/connectors/database/athena" platform="OpenMetadata" / %}
{% connectorInfoCard name="AzureSQL" stage="PROD" href="/connectors/database/azuresql" platform="OpenMetadata" / %}
{% connectorInfoCard name="BigQuery" stage="PROD" href="/connectors/database/bigquery" platform="OpenMetadata" / %}
{% connectorInfoCard name="BigTable" stage="BETA" href="/connectors/database/bigtable" platform="OpenMetadata" / %}
{% connectorInfoCard name="Clickhouse" stage="PROD" href="/connectors/database/clickhouse" platform="OpenMetadata" / %}
{% connectorInfoCard name="Couchbase" stage="BETA" href="/connectors/database/couchbase" platform="OpenMetadata" / %}
{% connectorInfoCard name="Datalake" stage="PROD" href="/connectors/database/datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Databricks" stage="PROD" href="/connectors/database/databricks" platform="OpenMetadata" / %}
{% connectorInfoCard name="DB2" stage="PROD" href="/connectors/database/db2" platform="OpenMetadata" / %}
{% connectorInfoCard name="Delta Lake" stage="PROD" href="/connectors/database/deltalake" platform="OpenMetadata" / %}
@ -14,6 +14,7 @@
{% connectorInfoCard name="Doris" stage="PROD" href="/connectors/database/doris" platform="OpenMetadata" / %}
{% connectorInfoCard name="Druid" stage="PROD" href="/connectors/database/druid" platform="OpenMetadata" / %}
{% connectorInfoCard name="DynamoDB" stage="PROD" href="/connectors/database/dynamodb" platform="OpenMetadata" / %}
{% connectorInfoCard name="GCS Datalake" stage="PROD" href="/connectors/database/gcs-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Glue" stage="PROD" href="/connectors/database/glue" platform="OpenMetadata" / %}
{% connectorInfoCard name="Greenplum" stage="BETA" href="/connectors/database/greenplum" platform="OpenMetadata" / %}
{% connectorInfoCard name="Hive" stage="PROD" href="/connectors/database/hive" platform="OpenMetadata" / %}
@ -34,6 +35,7 @@
{% connectorInfoCard name="SingleStore" stage="PROD" href="/connectors/database/singlestore" platform="OpenMetadata" / %}
{% connectorInfoCard name="Snowflake" stage="PROD" href="/connectors/database/snowflake" platform="OpenMetadata" / %}
{% connectorInfoCard name="SQLite" stage="PROD" href="/connectors/database/sqlite" platform="OpenMetadata" / %}
{% connectorInfoCard name="S3 Datalake" stage="PROD" href="/connectors/database/s3-datalake" platform="OpenMetadata" / %}
{% connectorInfoCard name="Teradata" stage="PROD" href="/connectors/database/teradata" platform="OpenMetadata" / %}
{% connectorInfoCard name="Trino" stage="PROD" href="/connectors/database/trino" platform="OpenMetadata" / %}
{% connectorInfoCard name="Unity Catalog" stage="PROD" href="/connectors/database/unity-catalog" platform="OpenMetadata" / %}

View File

@ -1,6 +1,6 @@
{% connectorsListContainer %}
{% connectorInfoCard name="S3" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="S3 Storage" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="ADLS" stage="PROD" href="/connectors/storage/adls" platform="Collate" / %}
{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="Collate" / %}

View File

@ -32,6 +32,12 @@ site_menu:
- category: Connectors / Database
url: /connectors/database
- category: Connectors / Database / ADLS Datalake
url: /connectors/database/adls-datalake
- category: Connectors / Database / ADLS Datalake / Run Externally
url: /connectors/database/adls-datalake/yaml
- category: Connectors / Database / ADLS Datalake / Troubleshooting
url: /connectors/database/adls-datalake/troubleshooting
- category: Connectors / Database / Athena
url: /connectors/database/athena
- category: Connectors / Database / Athena / Run Externally
@ -68,12 +74,6 @@ site_menu:
url: /connectors/database/databricks/yaml
- category: Connectors / Database / Databricks / Troubleshooting
url: /connectors/database/databricks/troubleshooting
- category: Connectors / Database / Datalake
url: /connectors/database/datalake
- category: Connectors / Database / Datalake / Run Externally
url: /connectors/database/datalake/yaml
- category: Connectors / Database / Datalake / Troubleshooting
url: /connectors/database/datalake/troubleshooting
- category: Connectors / Database / DB2
url: /connectors/database/db2
- category: Connectors / Database / DB2 / Run Externally
@ -100,6 +100,10 @@ site_menu:
url: /connectors/database/dynamodb
- category: Connectors / Database / DynamoDB / Run Externally
url: /connectors/database/dynamodb/yaml
- category: Connectors / Database / GCS Datalake
url: /connectors/database/gcs-datalake
- category: Connectors / Database / GCS Datalake / Run Externally
url: /connectors/database/gcs-datalake/yaml
- category: Connectors / Database / Glue
url: /connectors/database/glue
- category: Connectors / Database / Glue / Run Externally
@ -194,6 +198,12 @@ site_menu:
url: /connectors/database/synapse/yaml
- category: Connectors / Database / Synapse / Troubleshooting
url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
url: /connectors/database/s3-datalake/yaml
- category: Connectors / Database / S3 Datalake / Troubleshooting
url: /connectors/database/s3-datalake/troubleshooting
- category: Connectors / Database / Trino
url: /connectors/database/trino
- category: Connectors / Database / Trino / Run Externally
@ -307,9 +317,9 @@ site_menu:
url: /connectors/pipeline/dagster
- category: Connectors / Pipeline / Dagster / Run Externally
url: /connectors/pipeline/dagster/yaml
- category: Connectors / Pipeline / DBTCloud
- category: Connectors / Pipeline / dbt Cloud
url: /connectors/pipeline/dbtcloud
- category: Connectors / Pipeline / DBTCloud / Run Externally
- category: Connectors / Pipeline / dbt Cloud / Run Externally
url: /connectors/pipeline/dbtcloud/yaml
- category: Connectors / Pipeline / KafkaConnect
url: /connectors/pipeline/kafkaconnect
@ -361,9 +371,9 @@ site_menu:
- category: Connectors / Storage
url: /connectors/storage
- category: Connectors / Storage / S3
- category: Connectors / Storage / S3 Storage
url: /connectors/storage/s3
- category: Connectors / Storage / S3 / Run Externally
- category: Connectors / Storage / S3 Storage / Run Externally
url: /connectors/storage/s3/yaml
- category: Connectors / Storage / GCS
url: /connectors/storage/gcs

View File

@ -0,0 +1,83 @@
---
title: ADLS Datalake
slug: /connectors/database/adls-datalake
---
{% connectorDetailsHeader
name="ADLS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the ADLS Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.5/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/adls-datalake/yaml"} /%}
## Requirements
{% note %}
The ADLS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
## Metadata Ingestion
{% partial
file="/v1.5/connectors/metadata-ingestion-ui.md"
variables={
connector: "Datalake",
selectServicePath: "/images/v1.5/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.5/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.5/connectors/datalake/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details for Azure
- **Azure Credentials**
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
- **Required Roles**
Please make sure the following roles associated with the data storage account.
- `Storage Blob Data Contributor`
- `Storage Queue Data Contributor`
The current approach for authentication is based on `app registration`, reach out to us on [slack](https://slack.open-metadata.org/) if you find the need for another auth system
{% partial file="/v1.5/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.5/connectors/test-connection.md" /%}
{% partial file="/v1.5/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.5/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.5/connectors/troubleshooting.md" /%}
{% partial file="/v1.5/connectors/database/related.md" /%}

View File

@ -1,16 +1,11 @@
---
title: Datalake Connector Troubleshooting
slug: /connectors/database/datalake/troubleshooting
title: ADLS Datalake Connector Troubleshooting
slug: /connectors/database/adls-datalake/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the Datalake connector.
* **'Access Denied' error when reading from S3 bucket**
Please, ensure you have a Bucket Policy with the permissions explained in the requirement section [here](/connectors/database/datalake).
Learn how to resolve the most common problems people encounter in the ADLS Datalake connector.
#### **'Azure Datalake'** credentials details
@ -20,13 +15,8 @@ Please, ensure you have a Bucket Policy with the permissions explained in the re
- Find and click on your application
- Select `Certificates & Secret` under `Manage` Section
{% image
src="/images/v1.5/connectors/datalake/troubleshoot-clientId.png"
alt="Configure service connection"
caption="Find Client ID" /%}

View File

@ -0,0 +1,114 @@
---
title: Run the ADLS Datalake Connector Externally
slug: /connectors/database/adls-datalake/yaml
---
{% connectorDetailsHeader
name="ADLS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the ADLS Datalake connector.
Configure and schedule ADLS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.5/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** ADLS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
### Python Requirements
{% partial file="/v1.5/connectors/python-requirements.md" /%}
#### Azure installation
```bash
pip3 install "openmetadata-ingestion[datalake-azure]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
### This is a sample config for Datalake using Azure:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=9 %}
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
# Datalake with Azure
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
```
```yaml {% srNumber=9 %}
securityConfig:
clientId: client-id
clientSecret: client-secret
tenantId: tenant-id
accountName: account-name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.5/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,32 +0,0 @@
---
title: Datalake Connector Troubleshooting
slug: /connectors/database/datalake/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the Datalake connector.
* **'Access Denied' error when reading from S3 bucket**
Please, ensure you have a Bucket Policy with the permissions explained in the requirement section [here](/connectors/database/datalake).
#### **'Azure Datalake'** credentials details
##### Where can I find 'Client Secret' from.
- Login to `Azure Portal`
- Find and click on your application
- Select `Certificates & Secret` under `Manage` Section
{% image
src="/images/v1.5/connectors/datalake/troubleshoot-clientId.png"
alt="Configure service connection"
caption="Find Client ID" /%}

View File

@ -1,292 +0,0 @@
---
title: Run the Datalake Connector Externally
slug: /connectors/database/datalake/yaml
---
{% connectorDetailsHeader
name="Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.5/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### S3 Permissions
To execute metadata extraction AWS account should have enough access to fetch required data. The <strong>Bucket Policy</strong> in AWS requires at least these permissions:
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::<my bucket>",
"arn:aws:s3:::<my bucket>/*"
]
}
]
}
```
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
### Python Requirements
{% partial file="/v1.5/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for GCS or S3:
#### S3 installation
```bash
pip3 install "openmetadata-ingestion[datalake-s3]"
```
#### GCS installation
```bash
pip3 install "openmetadata-ingestion[datalake-gcp]"
```
#### Azure installation
```bash
pip3 install "openmetadata-ingestion[datalake-azure]"
```
#### If version <0.13
You will be installing the requirements together for S3 and GCS
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
#### Source Configuration - Source Config using AWS S3
### This is a sample config for Datalake using AWS S3:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
* **awsAccessKeyId**: Enter your secure access key ID for your DynamoDB connection. The specified key ID should be authorized to read all databases you want to include in the metadata ingestion workflow.
* **awsSecretAccessKey**: Enter the Secret Access Key (the passcode key pair to the key ID from above).
* **awsRegion**: Specify the region in which your DynamoDB is located. This setting is required even if you have configured a local AWS profile.
* **schemaFilterPattern** and **tableFilterPattern**: Note that the `schemaFilterPattern` and `tableFilterPattern` both support regex as `include` or `exclude`. E.g.,
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
```
```yaml {% srNumber=1 %}
configSource:
securityConfig:
awsAccessKeyId: aws access key id
awsSecretAccessKey: aws secret access key
awsRegion: aws region
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
### This is a sample config for Datalake using GCS:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=5 %}
* **type**: Credentials type, e.g. `service_account`.
* **projectId**
* **privateKey**
* **privateKeyId**
* **clientEmail**
* **clientId**
* **authUri**: [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
* **tokenUri**: [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
* **authProviderX509CertUrl**: [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
* **clientX509CertUrl**
* **bucketName**: name of the bucket in GCS
* **Prefix**: prefix in gcp bucket
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
securityConfig:
```
```yaml {% srNumber=5 %}
gcpConfig:
type: type of account
projectId: project id
privateKeyId: private key id
privateKey: private key
clientEmail: client email
clientId: client id
authUri: https://accounts.google.com/o/oauth2/auth
tokenUri: https://oauth2.googleapis.com/token
authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs
clientX509CertUrl: clientX509 Certificate Url
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
### This is a sample config for Datalake using Azure:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=9 %}
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
# Datalake with Azure
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
```
```yaml {% srNumber=9 %}
securityConfig:
clientId: client-id
clientSecret: client-secret
tenantId: tenant-id
accountName: account-name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.5/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,5 +1,5 @@
---
title: Datalake Connector Troubleshooting
title: Domo Database Connector Troubleshooting
slug: /connectors/database/domo-database/troubleshoot
---

View File

@ -0,0 +1,82 @@
---
title: GCS Datalake
slug: /connectors/database/gcs-datalake
---
{% connectorDetailsHeader
name="GCS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the GCS Datalake connector.
Configure and schedule GCS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.5/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/gcs-datalake/yaml"} /%}
## Requirements
{% note %}
The GCS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
## Metadata Ingestion
{% partial
file="/v1.5/connectors/metadata-ingestion-ui.md"
variables={
connector: "Datalake",
selectServicePath: "/images/v1.5/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.5/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.5/connectors/datalake/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details for GCS
- **Bucket Name**: A bucket name in DataLake is a unique identifier used to organize and store data objects.
It's similar to a folder name, but it's used for object storage rather than file storage.
- **Prefix**: The prefix of a data source in datalake refers to the first part of the data path that identifies the source or origin of the data. It's used to organize and categorize data within the datalake, and can help users easily locate and access the data they need.
**GCS Credentials**
We support two ways of authenticating to GCS:
1. Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery:
1. Credentials type, e.g. `service_account`.
2. Project ID
3. Private Key ID
4. Private Key
5. Client Email
6. Client ID
7. Auth URI, [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
8. Token URI, [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
9. Authentication Provider X509 Certificate URL, [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
10. Client X509 Certificate URL
{% partial file="/v1.5/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.5/connectors/test-connection.md" /%}
{% partial file="/v1.5/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.5/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.5/connectors/troubleshooting.md" /%}
{% partial file="/v1.5/connectors/database/related.md" /%}

View File

@ -0,0 +1,133 @@
---
title: Run the GCS Datalake Connector Externally
slug: /connectors/database/gcs-datalake/yaml
---
{% connectorDetailsHeader
name="GCS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the GCS Datalake connector.
Configure and schedule GCS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.5/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** GCS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### Python Requirements
{% partial file="/v1.5/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for GCS
#### GCS installation
```bash
pip3 install "openmetadata-ingestion[datalake-gcp]"
```
#### If version <0.13
You will be installing the requirements for GCS
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
### This is a sample config for Datalake using GCS:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=5 %}
* **type**: Credentials type, e.g. `service_account`.
* **projectId**
* **privateKey**
* **privateKeyId**
* **clientEmail**
* **clientId**
* **authUri**: [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
* **tokenUri**: [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
* **authProviderX509CertUrl**: [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
* **clientX509CertUrl**
* **bucketName**: name of the bucket in GCS
* **Prefix**: prefix in gcp bucket
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
securityConfig:
```
```yaml {% srNumber=5 %}
gcpConfig:
type: type of account
projectId: project id
privateKeyId: private key id
privateKey: private key
clientEmail: client email
clientId: client id
authUri: https://accounts.google.com/o/oauth2/auth
tokenUri: https://oauth2.googleapis.com/token
authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs
clientX509CertUrl: clientX509 Certificate Url
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.5/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,30 +1,30 @@
---
title: Datalake
slug: /connectors/database/datalake
title: S3 Datalake
slug: /connectors/database/s3-datalake
---
{% connectorDetailsHeader
name="Datalake"
name="S3 Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the Datalake connector.
In this section, we provide guides and references to use the S3 Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
Configure and schedule S3 Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.5/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/datalake/yaml"} /%}
{% partial file="/v1.5/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/s3-datalake/yaml"} /%}
## Requirements
{% note %}
The Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
The S3 Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
### S3 Permissions
@ -50,13 +50,6 @@ To execute metadata extraction AWS account should have enough access to fetch re
}
```
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
## Metadata Ingestion
{% partial
@ -134,45 +127,6 @@ Find more information about the [Role Session Name](https://docs.aws.amazon.com/
Find more information about [Source Identity](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#:~:text=Required%3A%20No-,SourceIdentity,-The%20source%20identity).
#### Connection Details for GCS
- **Bucket Name**: A bucket name in DataLake is a unique identifier used to organize and store data objects.
It's similar to a folder name, but it's used for object storage rather than file storage.
- **Prefix**: The prefix of a data source in datalake refers to the first part of the data path that identifies the source or origin of the data. It's used to organize and categorize data within the datalake, and can help users easily locate and access the data they need.
**GCS Credentials**
We support two ways of authenticating to GCS:
1. Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery:
1. Credentials type, e.g. `service_account`.
2. Project ID
3. Private Key ID
4. Private Key
5. Client Email
6. Client ID
7. Auth URI, [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
8. Token URI, [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
9. Authentication Provider X509 Certificate URL, [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
10. Client X509 Certificate URL
#### Connection Details for Azure
- **Azure Credentials**
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
- **Required Roles**
Please make sure the following roles associated with the data storage account.
- `Storage Blob Data Contributor`
- `Storage Queue Data Contributor`
The current approach for authentication is based on `app registration`, reach out to us on [slack](https://slack.open-metadata.org/) if you find the need for another auth system
{% partial file="/v1.5/connectors/database/advanced-configuration.md" /%}

View File

@ -0,0 +1,15 @@
---
title: S3 Datalake Connector Troubleshooting
slug: /connectors/database/s3-datalake/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the S3 Datalake connector.
* **'Access Denied' error when reading from S3 bucket**
Please, ensure you have a Bucket Policy with the permissions explained in the requirement section [here](/connectors/database/s3-datalake).

View File

@ -0,0 +1,145 @@
---
title: Run the S3 Datalake Connector Externally
slug: /connectors/database/s3-datalake/yaml
---
{% connectorDetailsHeader
name="S3 Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the S3 Datalake connector.
Configure and schedule S3 Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.5/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** S3 Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### S3 Permissions
To execute metadata extraction AWS account should have enough access to fetch required data. The <strong>Bucket Policy</strong> in AWS requires at least these permissions:
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::<my bucket>",
"arn:aws:s3:::<my bucket>/*"
]
}
]
}
```
### Python Requirements
{% partial file="/v1.5/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for S3:
#### S3 installation
```bash
pip3 install "openmetadata-ingestion[datalake-s3]"
```
#### If version <0.13
You will be installing the requirements for S3
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
#### Source Configuration - Source Config using AWS S3
### This is a sample config for Datalake using AWS S3:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
* **awsAccessKeyId**: Enter your secure access key ID for your DynamoDB connection. The specified key ID should be authorized to read all databases you want to include in the metadata ingestion workflow.
* **awsSecretAccessKey**: Enter the Secret Access Key (the passcode key pair to the key ID from above).
* **awsRegion**: Specify the region in which your DynamoDB is located. This setting is required even if you have configured a local AWS profile.
* **schemaFilterPattern** and **tableFilterPattern**: Note that the `schemaFilterPattern` and `tableFilterPattern` both support regex as `include` or `exclude`. E.g.,
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
```
```yaml {% srNumber=1 %}
configSource:
securityConfig:
awsAccessKeyId: aws access key id
awsSecretAccessKey: aws secret access key
awsRegion: aws region
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.5/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -4,7 +4,7 @@ slug: /connectors/pipeline/dbtcloud
---
{% connectorDetailsHeader
name="DBTCloud"
name="dbt Cloud"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Pipelines", "Pipeline Status", "Lineage"]
@ -52,9 +52,9 @@ To know more about permissions required refer [here](https://docs.getdbt.com/doc
file="/v1.5/connectors/metadata-ingestion-ui.md"
variables={
connector: "DBTCloud",
selectServicePath: "/images/v1.5/connectors/dbtcloud/select-service.webp",
addNewServicePath: "/images/v1.5/connectors/dbtcloud/add-new-service.webp",
serviceConnectionPath: "/images/v1.5/connectors/dbtcloud/service-connection.webp",
selectServicePath: "/images/v1.5/connectors/dbtcloud/select-service.png",
addNewServicePath: "/images/v1.5/connectors/dbtcloud/add-new-service.png",
serviceConnectionPath: "/images/v1.5/connectors/dbtcloud/service-connection.png",
}
/%}

View File

@ -4,7 +4,7 @@ slug: /connectors/pipeline/dbtcloud/yaml
---
{% connectorDetailsHeader
name="DBTCloud"
name="dbt Cloud"
stage="PROD"
platform="Collate"
availableFeatures=["Pipelines", "Pipeline Status", "Tags"]

View File

@ -19,7 +19,7 @@ in the sources and send that to OpenMetadata. However, what happens with generic
In these systems we can have different types of information:
- Unstructured data, such as images or videos,
- Structured data in single and independent files (which can also be ingested with the [Data Lake connector](/connectors/database/datalake))
- Structured data in single and independent files (which can also be ingested with the [S3 Data Lake connector](/connectors/database/s3-datalake))
- Structured data in partitioned files, e.g., `my_table/year=2022/...parquet`, `my_table/year=2023/...parquet`, etc.
{% note %}

View File

@ -1,10 +1,10 @@
---
title: S3
title: S3 Storage
slug: /connectors/storage/s3
---
{% connectorDetailsHeader
name="S3"
name="S3 Storage"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Structured Containers", "Unstructured Containers"]

View File

@ -1,10 +1,10 @@
---
title: Run the S3 Connector Externally
title: Run the S3 Storage Connector Externally
slug: /connectors/storage/s3/yaml
---
{% connectorDetailsHeader
name="S3"
name="S3 Storage"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata"]

View File

@ -27,7 +27,7 @@ href="/connectors"%}
Refer to the Docs to ingest metadata from multiple sources - Databases, Dashboards, Pipelines, ML Models, Messaging, Storage, as well as Metadata services.
{%/inlineCallout%}
- **Database Services:** [Athena](/connectors/database/athena), [AzureSQL](/connectors/database/azuresql), [BigQuery](/connectors/database/bigquery), [Clickhouse](/connectors/database/clickhouse), [Databricks](/connectors/database/databricks), [Datalake](/connectors/database/datalake), [DB2](/connectors/database/db2), [DeltaLake](/connectors/database/deltalake), [Domo Database](/connectors/database/domo-database), [Druid](/connectors/database/druid), [DynamoDB](/connectors/database/dynamodb), [Glue](/connectors/database/glue), [Hive](/connectors/database/hive), [Impala](/connectors/database/impala), [MariaDB](/connectors/database/mariadb), [MongoDB](/connectors/database/mongodb), [MSSQL](/connectors/database/mssql), [MySQL](/connectors/database/mysql), [Oracle](/connectors/database/oracle), [PinotDB](/connectors/database/pinotdb), [Postgres](/connectors/database/postgres), [Presto](/connectors/database/presto), [Redshift](/connectors/database/redshift), [Salesforce](/connectors/database/salesforce), [SAP Hana](/connectors/database/sap-hana), [SAS](/connectors/database/sas), [SingleStore](/connectors/database/singlestore), [Snowflake](/connectors/database/snowflake), [SQLite](/connectors/database/sqlite), [Trino](/connectors/database/trino), and [Vertica](/connectors/database/vertica).
- **Database Services:** [ADLS Datalake](/connectors/database/adls-datalake), [Athena](/connectors/database/athena), [AzureSQL](/connectors/database/azuresql), [BigQuery](/connectors/database/bigquery), [Clickhouse](/connectors/database/clickhouse), [Databricks](/connectors/database/databricks), [DB2](/connectors/database/db2), [DeltaLake](/connectors/database/deltalake), [Domo Database](/connectors/database/domo-database), [Druid](/connectors/database/druid), [DynamoDB](/connectors/database/dynamodb), [GCS Datalake](/connectors/database/gcs-datalake), [Glue](/connectors/database/glue), [Hive](/connectors/database/hive), [Impala](/connectors/database/impala), [MariaDB](/connectors/database/mariadb), [MongoDB](/connectors/database/mongodb), [MSSQL](/connectors/database/mssql), [MySQL](/connectors/database/mysql), [Oracle](/connectors/database/oracle), [PinotDB](/connectors/database/pinotdb), [Postgres](/connectors/database/postgres), [Presto](/connectors/database/presto), [Redshift](/connectors/database/redshift), [Salesforce](/connectors/database/salesforce), [SAP Hana](/connectors/database/sap-hana), [SAS](/connectors/database/sas), [SingleStore](/connectors/database/singlestore), [Snowflake](/connectors/database/snowflake), [SQLite](/connectors/database/sqlite), [S3 Datalake](/connectors/database/s3-datalake), [Trino](/connectors/database/trino), and [Vertica](/connectors/database/vertica).
- **Dashboard Services:** [Domo Dashboard](/connectors/dashboard/domo-dashboard), [Looker](/connectors/dashboard/looker), [Metabase](/connectors/dashboard/metabase), [Mode](/connectors/dashboard/mode), [PowerBI](/connectors/dashboard/powerbi), [Qlik Sense](/connectors/dashboard/qliksense), [QuickSight](/connectors/dashboard/quicksight), [Redash](/connectors/dashboard/redash), [Superset](/connectors/dashboard/superset), and [Tableau](/connectors/dashboard/tableau).

View File

@ -7,7 +7,7 @@ slug: /how-to-guides/guide-for-data-users/data-ownership
## Data Asset Ownership
In OpenMetadata, either a **team** or an **individual user** can be the owner of a data asset. Owners have access to perform all the operations on a data asset. For example, edit description, tags, glossary terms, etc.
In OpenMetadata, either a **team** or **multiple user** can be the owner of a data asset. Owners have access to perform all the operations on a data asset. For example, edit description, tags, glossary terms, etc.
## Assign Data Ownership

View File

@ -220,6 +220,12 @@ site_menu:
- category: Connectors / Database
url: /connectors/database
- category: Connectors / Database / ADLS Datalake
url: /connectors/database/adls-datalake
- category: Connectors / Database / ADLS Datalake / Run Externally
url: /connectors/database/adls-datalake/yaml
- category: Connectors / Database / ADLS Datalake / Troubleshooting
url: /connectors/database/adls-datalake/troubleshooting
- category: Connectors / Database / Athena
url: /connectors/database/athena
- category: Connectors / Database / Athena / Run Externally
@ -256,12 +262,6 @@ site_menu:
url: /connectors/database/databricks/yaml
- category: Connectors / Database / Databricks / Troubleshooting
url: /connectors/database/databricks/troubleshooting
- category: Connectors / Database / Datalake
url: /connectors/database/datalake
- category: Connectors / Database / Datalake / Run Externally
url: /connectors/database/datalake/yaml
- category: Connectors / Database / Datalake / Troubleshooting
url: /connectors/database/datalake/troubleshooting
- category: Connectors / Database / DB2
url: /connectors/database/db2
- category: Connectors / Database / DB2 / Run Externally
@ -288,6 +288,10 @@ site_menu:
url: /connectors/database/dynamodb
- category: Connectors / Database / DynamoDB / Run Externally
url: /connectors/database/dynamodb/yaml
- category: Connectors / Database / GCS Datalake
url: /connectors/database/gcs-datalake
- category: Connectors / Database / GCS Datalake / Run Externally
url: /connectors/database/gcs-datalake/yaml
- category: Connectors / Database / Glue
url: /connectors/database/glue
- category: Connectors / Database / Glue / Run Externally
@ -388,6 +392,12 @@ site_menu:
url: /connectors/database/synapse/yaml
- category: Connectors / Database / Synapse / Troubleshooting
url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
url: /connectors/database/s3-datalake/yaml
- category: Connectors / Database / S3 Datalake / Troubleshooting
url: /connectors/database/s3-datalake/troubleshooting
- category: Connectors / Database / Teradata
url: /connectors/database/teradata
- category: Connectors / Database / Teradata / Run Externally
@ -505,9 +515,9 @@ site_menu:
url: /connectors/pipeline/dagster
- category: Connectors / Pipeline / Dagster / Run Externally
url: /connectors/pipeline/dagster/yaml
- category: Connectors / Pipeline / DBTCloud
- category: Connectors / Pipeline / dbt Cloud
url: /connectors/pipeline/dbtcloud
- category: Connectors / Pipeline / DBTCloud / Run Externally
- category: Connectors / Pipeline / dbt Cloud / Run Externally
url: /connectors/pipeline/dbtcloud/yaml
- category: Connectors / Pipeline / KafkaConnect
url: /connectors/pipeline/kafkaconnect
@ -559,9 +569,9 @@ site_menu:
- category: Connectors / Storage
url: /connectors/storage
- category: Connectors / Storage / S3
- category: Connectors / Storage / S3 Storage
url: /connectors/storage/s3
- category: Connectors / Storage / S3 / Run Externally
- category: Connectors / Storage / S3 Storage / Run Externally
url: /connectors/storage/s3/yaml
- category: Connectors / Storage / GCS
url: /connectors/storage/gcs

View File

@ -43,6 +43,12 @@ site_menu:
- category: Connectors / Database
url: /connectors/database
- category: Connectors / Database / ADLS Datalake
url: /connectors/database/adls-datalake
- category: Connectors / Database / ADLS Datalake / Run Externally
url: /connectors/database/adls-datalake/yaml
- category: Connectors / Database / ADLS Datalake / Troubleshooting
url: /connectors/database/adls-datalake/troubleshooting
- category: Connectors / Database / Athena
url: /connectors/database/athena
- category: Connectors / Database / Athena / Run Externally
@ -79,12 +85,6 @@ site_menu:
url: /connectors/database/databricks/yaml
- category: Connectors / Database / Databricks / Troubleshooting
url: /connectors/database/databricks/troubleshooting
- category: Connectors / Database / Datalake
url: /connectors/database/datalake
- category: Connectors / Database / Datalake / Run Externally
url: /connectors/database/datalake/yaml
- category: Connectors / Database / Datalake / Troubleshooting
url: /connectors/database/datalake/troubleshooting
- category: Connectors / Database / DB2
url: /connectors/database/db2
- category: Connectors / Database / DB2 / Run Externally
@ -111,6 +111,10 @@ site_menu:
url: /connectors/database/dynamodb
- category: Connectors / Database / DynamoDB / Run Externally
url: /connectors/database/dynamodb/yaml
- category: Connectors / Database / GCS Datalake
url: /connectors/database/gcs-datalake
- category: Connectors / Database / GCS Datalake / Run Externally
url: /connectors/database/gcs-datalake/yaml
- category: Connectors / Database / Glue
url: /connectors/database/glue
- category: Connectors / Database / Glue / Run Externally
@ -205,6 +209,12 @@ site_menu:
url: /connectors/database/synapse/yaml
- category: Connectors / Database / Synapse / Troubleshooting
url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
url: /connectors/database/s3-datalake/yaml
- category: Connectors / Database / S3 Datalake / Troubleshooting
url: /connectors/database/s3-datalake/troubleshooting
- category: Connectors / Database / Trino
url: /connectors/database/trino
- category: Connectors / Database / Trino / Run Externally
@ -318,9 +328,9 @@ site_menu:
url: /connectors/pipeline/dagster
- category: Connectors / Pipeline / Dagster / Run Externally
url: /connectors/pipeline/dagster/yaml
- category: Connectors / Pipeline / DBTCloud
- category: Connectors / Pipeline / dbt Cloud
url: /connectors/pipeline/dbtcloud
- category: Connectors / Pipeline / DBTCloud / Run Externally
- category: Connectors / Pipeline / dbt Cloud / Run Externally
url: /connectors/pipeline/dbtcloud/yaml
- category: Connectors / Pipeline / KafkaConnect
url: /connectors/pipeline/kafkaconnect
@ -377,9 +387,9 @@ site_menu:
- category: Connectors / Storage
url: /connectors/storage
- category: Connectors / Storage / S3
- category: Connectors / Storage / S3 Storage
url: /connectors/storage/s3
- category: Connectors / Storage / S3 / Run Externally
- category: Connectors / Storage / S3 Storage / Run Externally
url: /connectors/storage/s3/yaml
- category: Connectors / Storage / GCS
url: /connectors/storage/gcs

View File

@ -0,0 +1,83 @@
---
title: ADLS Datalake
slug: /connectors/database/adls-datalake
---
{% connectorDetailsHeader
name="ADLS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the ADLS Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.6/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/adls-datalake/yaml"} /%}
## Requirements
{% note %}
The ADLS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
## Metadata Ingestion
{% partial
file="/v1.6/connectors/metadata-ingestion-ui.md"
variables={
connector: "Datalake",
selectServicePath: "/images/v1.6/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.6/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.6/connectors/datalake/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details for Azure
- **Azure Credentials**
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
- **Required Roles**
Please make sure the following roles associated with the data storage account.
- `Storage Blob Data Contributor`
- `Storage Queue Data Contributor`
The current approach for authentication is based on `app registration`, reach out to us on [slack](https://slack.open-metadata.org/) if you find the need for another auth system
{% partial file="/v1.6/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.6/connectors/test-connection.md" /%}
{% partial file="/v1.6/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.6/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.6/connectors/troubleshooting.md" /%}
{% partial file="/v1.6/connectors/database/related.md" /%}

View File

@ -0,0 +1,22 @@
---
title: ADLS Datalake Connector Troubleshooting
slug: /connectors/database/adls-datalake/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the ADLS Datalake connector.
#### **'Azure Datalake'** credentials details
##### Where can I find 'Client Secret' from.
- Login to `Azure Portal`
- Find and click on your application
- Select `Certificates & Secret` under `Manage` Section
{% image
src="/images/v1.6/connectors/datalake/troubleshoot-clientId.png"
alt="Configure service connection"
caption="Find Client ID" /%}

View File

@ -0,0 +1,114 @@
---
title: Run the ADLS Datalake Connector Externally
slug: /connectors/database/adls-datalake/yaml
---
{% connectorDetailsHeader
name="ADLS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the ADLS Datalake connector.
Configure and schedule ADLS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.6/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** ADLS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
### Python Requirements
{% partial file="/v1.6/connectors/python-requirements.md" /%}
#### Azure installation
```bash
pip3 install "openmetadata-ingestion[datalake-azure]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
### This is a sample config for Datalake using Azure:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=9 %}
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
{% /codeInfo %}
{% partial file="/v1.6/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
# Datalake with Azure
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
```
```yaml {% srNumber=9 %}
securityConfig:
clientId: client-id
clientSecret: client-secret
tenantId: tenant-id
accountName: account-name
prefix: prefix
```
{% partial file="/v1.6/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.6/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,292 +0,0 @@
---
title: Run the Datalake Connector Externally
slug: /connectors/database/datalake/yaml
---
{% connectorDetailsHeader
name="Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.5/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### S3 Permissions
To execute metadata extraction AWS account should have enough access to fetch required data. The <strong>Bucket Policy</strong> in AWS requires at least these permissions:
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::<my bucket>",
"arn:aws:s3:::<my bucket>/*"
]
}
]
}
```
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
### Python Requirements
{% partial file="/v1.5/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for GCS or S3:
#### S3 installation
```bash
pip3 install "openmetadata-ingestion[datalake-s3]"
```
#### GCS installation
```bash
pip3 install "openmetadata-ingestion[datalake-gcp]"
```
#### Azure installation
```bash
pip3 install "openmetadata-ingestion[datalake-azure]"
```
#### If version <0.13
You will be installing the requirements together for S3 and GCS
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
#### Source Configuration - Source Config using AWS S3
### This is a sample config for Datalake using AWS S3:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
* **awsAccessKeyId**: Enter your secure access key ID for your DynamoDB connection. The specified key ID should be authorized to read all databases you want to include in the metadata ingestion workflow.
* **awsSecretAccessKey**: Enter the Secret Access Key (the passcode key pair to the key ID from above).
* **awsRegion**: Specify the region in which your DynamoDB is located. This setting is required even if you have configured a local AWS profile.
* **schemaFilterPattern** and **tableFilterPattern**: Note that the `schemaFilterPattern` and `tableFilterPattern` both support regex as `include` or `exclude`. E.g.,
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
```
```yaml {% srNumber=1 %}
configSource:
securityConfig:
awsAccessKeyId: aws access key id
awsSecretAccessKey: aws secret access key
awsRegion: aws region
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
### This is a sample config for Datalake using GCS:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=5 %}
* **type**: Credentials type, e.g. `service_account`.
* **projectId**
* **privateKey**
* **privateKeyId**
* **clientEmail**
* **clientId**
* **authUri**: [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
* **tokenUri**: [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
* **authProviderX509CertUrl**: [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
* **clientX509CertUrl**
* **bucketName**: name of the bucket in GCS
* **Prefix**: prefix in gcp bucket
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
securityConfig:
```
```yaml {% srNumber=5 %}
gcpConfig:
type: type of account
projectId: project id
privateKeyId: private key id
privateKey: private key
clientEmail: client email
clientId: client id
authUri: https://accounts.google.com/o/oauth2/auth
tokenUri: https://oauth2.googleapis.com/token
authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs
clientX509CertUrl: clientX509 Certificate Url
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
### This is a sample config for Datalake using Azure:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=9 %}
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
{% /codeInfo %}
{% partial file="/v1.5/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
# Datalake with Azure
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
```
```yaml {% srNumber=9 %}
securityConfig:
clientId: client-id
clientSecret: client-secret
tenantId: tenant-id
accountName: account-name
prefix: prefix
```
{% partial file="/v1.5/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.5/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.5/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.5/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,5 +1,5 @@
---
title: Datalake Connector Troubleshooting
title: Domo Database Connector Troubleshooting
slug: /connectors/database/domo-database/troubleshoot
---

View File

@ -0,0 +1,82 @@
---
title: GCS Datalake
slug: /connectors/database/gcs-datalake
---
{% connectorDetailsHeader
name="GCS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the GCS Datalake connector.
Configure and schedule GCS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.6/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/gcs-datalake/yaml"} /%}
## Requirements
{% note %}
The GCS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
## Metadata Ingestion
{% partial
file="/v1.6/connectors/metadata-ingestion-ui.md"
variables={
connector: "Datalake",
selectServicePath: "/images/v1.6/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.6/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.6/connectors/datalake/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details for GCS
- **Bucket Name**: A bucket name in DataLake is a unique identifier used to organize and store data objects.
It's similar to a folder name, but it's used for object storage rather than file storage.
- **Prefix**: The prefix of a data source in datalake refers to the first part of the data path that identifies the source or origin of the data. It's used to organize and categorize data within the datalake, and can help users easily locate and access the data they need.
**GCS Credentials**
We support two ways of authenticating to GCS:
1. Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery:
1. Credentials type, e.g. `service_account`.
2. Project ID
3. Private Key ID
4. Private Key
5. Client Email
6. Client ID
7. Auth URI, [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
8. Token URI, [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
9. Authentication Provider X509 Certificate URL, [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
10. Client X509 Certificate URL
{% partial file="/v1.6/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.6/connectors/test-connection.md" /%}
{% partial file="/v1.6/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.6/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.6/connectors/troubleshooting.md" /%}
{% partial file="/v1.6/connectors/database/related.md" /%}

View File

@ -0,0 +1,133 @@
---
title: Run the GCS Datalake Connector Externally
slug: /connectors/database/gcs-datalake/yaml
---
{% connectorDetailsHeader
name="GCS Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the GCS Datalake connector.
Configure and schedule GCS Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.6/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** GCS Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### Python Requirements
{% partial file="/v1.6/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for GCS
#### GCS installation
```bash
pip3 install "openmetadata-ingestion[datalake-gcp]"
```
#### If version <0.13
You will be installing the requirements for GCS
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
### This is a sample config for Datalake using GCS:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=5 %}
* **type**: Credentials type, e.g. `service_account`.
* **projectId**
* **privateKey**
* **privateKeyId**
* **clientEmail**
* **clientId**
* **authUri**: [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
* **tokenUri**: [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
* **authProviderX509CertUrl**: [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
* **clientX509CertUrl**
* **bucketName**: name of the bucket in GCS
* **Prefix**: prefix in gcp bucket
{% /codeInfo %}
{% partial file="/v1.6/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
configSource:
securityConfig:
```
```yaml {% srNumber=5 %}
gcpConfig:
type: type of account
projectId: project id
privateKeyId: private key id
privateKey: private key
clientEmail: client email
clientId: client id
authUri: https://accounts.google.com/o/oauth2/auth
tokenUri: https://oauth2.googleapis.com/token
authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs
clientX509CertUrl: clientX509 Certificate Url
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.6/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.6/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -1,30 +1,30 @@
---
title: Datalake
slug: /connectors/database/datalake
title: S3 Datalake
slug: /connectors/database/s3-datalake
---
{% connectorDetailsHeader
name="Datalake"
name="S3 Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the Datalake connector.
In this section, we provide guides and references to use the S3 Datalake connector.
Configure and schedule Datalake metadata and profiler workflows from the OpenMetadata UI:
Configure and schedule S3 Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [Data Profiler](/how-to-guides/data-quality-observability/profiler/workflow)
- [Data Quality](/how-to-guides/data-quality-observability/quality)
{% partial file="/v1.5/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/datalake/yaml"} /%}
{% partial file="/v1.6/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/s3-datalake/yaml"} /%}
## Requirements
{% note %}
The Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
The S3 Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
{% /note %}
### S3 Permissions
@ -50,22 +50,15 @@ To execute metadata extraction AWS account should have enough access to fetch re
}
```
### ADLS Permissions
To extract metadata from Azure ADLS (Storage Account - StorageV2), you will need an **App Registration** with the following
permissions on the Storage Account:
- Storage Blob Data Contributor
- Storage Queue Data Contributor
## Metadata Ingestion
{% partial
file="/v1.5/connectors/metadata-ingestion-ui.md"
file="/v1.6/connectors/metadata-ingestion-ui.md"
variables={
connector: "Datalake",
selectServicePath: "/images/v1.5/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.5/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.5/connectors/datalake/service-connection.png",
selectServicePath: "/images/v1.6/connectors/datalake/select-service.png",
addNewServicePath: "/images/v1.6/connectors/datalake/add-new-service.png",
serviceConnectionPath: "/images/v1.6/connectors/datalake/service-connection.png",
}
/%}
@ -134,58 +127,19 @@ Find more information about the [Role Session Name](https://docs.aws.amazon.com/
Find more information about [Source Identity](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#:~:text=Required%3A%20No-,SourceIdentity,-The%20source%20identity).
#### Connection Details for GCS
- **Bucket Name**: A bucket name in DataLake is a unique identifier used to organize and store data objects.
It's similar to a folder name, but it's used for object storage rather than file storage.
- **Prefix**: The prefix of a data source in datalake refers to the first part of the data path that identifies the source or origin of the data. It's used to organize and categorize data within the datalake, and can help users easily locate and access the data they need.
**GCS Credentials**
We support two ways of authenticating to GCS:
1. Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery:
1. Credentials type, e.g. `service_account`.
2. Project ID
3. Private Key ID
4. Private Key
5. Client Email
6. Client ID
7. Auth URI, [https://accounts.google.com/o/oauth2/auth](https://accounts.google.com/o/oauth2/auth) by default
8. Token URI, [https://oauth2.googleapis.com/token](https://oauth2.googleapis.com/token) by default
9. Authentication Provider X509 Certificate URL, [https://www.googleapis.com/oauth2/v1/certs](https://www.googleapis.com/oauth2/v1/certs) by default
10. Client X509 Certificate URL
#### Connection Details for Azure
- **Azure Credentials**
- **Client ID** : Client ID of the data storage account
- **Client Secret** : Client Secret of the account
- **Tenant ID** : Tenant ID under which the data storage account falls
- **Account Name** : Account Name of the data Storage
- **Required Roles**
Please make sure the following roles associated with the data storage account.
- `Storage Blob Data Contributor`
- `Storage Queue Data Contributor`
The current approach for authentication is based on `app registration`, reach out to us on [slack](https://slack.open-metadata.org/) if you find the need for another auth system
{% partial file="/v1.5/connectors/database/advanced-configuration.md" /%}
{% partial file="/v1.6/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.5/connectors/test-connection.md" /%}
{% partial file="/v1.6/connectors/test-connection.md" /%}
{% partial file="/v1.5/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.6/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.5/connectors/ingestion-schedule-and-deploy.md" /%}
{% partial file="/v1.6/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.5/connectors/troubleshooting.md" /%}
{% partial file="/v1.6/connectors/troubleshooting.md" /%}
{% partial file="/v1.5/connectors/database/related.md" /%}
{% partial file="/v1.6/connectors/database/related.md" /%}

View File

@ -0,0 +1,15 @@
---
title: S3 Datalake Connector Troubleshooting
slug: /connectors/database/s3-datalake/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the S3 Datalake connector.
* **'Access Denied' error when reading from S3 bucket**
Please, ensure you have a Bucket Policy with the permissions explained in the requirement section [here](/connectors/database/s3-datalake).

View File

@ -0,0 +1,145 @@
---
title: Run the S3 Datalake Connector Externally
slug: /connectors/database/s3-datalake/yaml
---
{% connectorDetailsHeader
name="S3 Datalake"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Data Profiler", "Data Quality"]
unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
/ %}
In this section, we provide guides and references to use the S3 Datalake connector.
Configure and schedule S3 Datalake metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
- [dbt Integration](#dbt-integration)
{% partial file="/v1.6/connectors/external-ingestion-deployment.md" /%}
## Requirements
**Note:** S3 Datalake connector supports extracting metadata from file types `JSON`, `CSV`, `TSV` & `Parquet`.
### S3 Permissions
To execute metadata extraction AWS account should have enough access to fetch required data. The <strong>Bucket Policy</strong> in AWS requires at least these permissions:
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::<my bucket>",
"arn:aws:s3:::<my bucket>/*"
]
}
]
}
```
### Python Requirements
{% partial file="/v1.6/connectors/python-requirements.md" /%}
If running OpenMetadata version greater than 0.13, you will need to install the Datalake ingestion for S3:
#### S3 installation
```bash
pip3 install "openmetadata-ingestion[datalake-s3]"
```
#### If version <0.13
You will be installing the requirements for S3
```bash
pip3 install "openmetadata-ingestion[datalake]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas. Here you can find the structure to create a connection to Datalake.
In order to create and run a Metadata Ingestion workflow, we will follow the steps to create a YAML configuration able to connect to the source, process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following JSON Schema.
## 1. Define the YAML Config
#### Source Configuration - Source Config using AWS S3
### This is a sample config for Datalake using AWS S3:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
* **awsAccessKeyId**: Enter your secure access key ID for your DynamoDB connection. The specified key ID should be authorized to read all databases you want to include in the metadata ingestion workflow.
* **awsSecretAccessKey**: Enter the Secret Access Key (the passcode key pair to the key ID from above).
* **awsRegion**: Specify the region in which your DynamoDB is located. This setting is required even if you have configured a local AWS profile.
* **schemaFilterPattern** and **tableFilterPattern**: Note that the `schemaFilterPattern` and `tableFilterPattern` both support regex as `include` or `exclude`. E.g.,
{% /codeInfo %}
{% partial file="/v1.6/connectors/yaml/database/source-config-def.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: datalake
serviceName: local_datalake
serviceConnection:
config:
type: Datalake
```
```yaml {% srNumber=1 %}
configSource:
securityConfig:
awsAccessKeyId: aws access key id
awsSecretAccessKey: aws secret access key
awsRegion: aws region
bucketName: bucket name
prefix: prefix
```
{% partial file="/v1.6/connectors/yaml/database/source-config.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.6/connectors/yaml/ingestion-cli.md" /%}
## dbt Integration
You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).

View File

@ -4,7 +4,7 @@ slug: /connectors/pipeline/dbtcloud
---
{% connectorDetailsHeader
name="DBTCloud"
name="dbt Cloud"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Pipelines", "Pipeline Status", "Lineage"]
@ -52,9 +52,9 @@ To know more about permissions required refer [here](https://docs.getdbt.com/doc
file="/v1.5/connectors/metadata-ingestion-ui.md"
variables={
connector: "DBTCloud",
selectServicePath: "/images/v1.6/connectors/dbtcloud/select-service.webp",
addNewServicePath: "/images/v1.6/connectors/dbtcloud/add-new-service.webp",
serviceConnectionPath: "/images/v1.6/connectors/dbtcloud/service-connection.webp",
selectServicePath: "/images/v1.6/connectors/dbtcloud/select-service.png",
addNewServicePath: "/images/v1.6/connectors/dbtcloud/add-new-service.png",
serviceConnectionPath: "/images/v1.6/connectors/dbtcloud/service-connection.png",
}
/%}

View File

@ -4,7 +4,7 @@ slug: /connectors/pipeline/dbtcloud/yaml
---
{% connectorDetailsHeader
name="DBTCloud"
name="dbt Cloud"
stage="PROD"
platform="Collate"
availableFeatures=["Pipelines", "Pipeline Status", "Tags"]

View File

@ -19,7 +19,7 @@ in the sources and send that to OpenMetadata. However, what happens with generic
In these systems we can have different types of information:
- Unstructured data, such as images or videos,
- Structured data in single and independent files (which can also be ingested with the [Data Lake connector](/connectors/database/datalake))
- Structured data in single and independent files (which can also be ingested with the [S3 Data Lake connector](/connectors/database/s3-datalake))
- Structured data in partitioned files, e.g., `my_table/year=2022/...parquet`, `my_table/year=2023/...parquet`, etc.
{% note %}

View File

@ -1,10 +1,10 @@
---
title: S3
title: S3 Storage
slug: /connectors/storage/s3
---
{% connectorDetailsHeader
name="S3"
name="S3 Storage"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata", "Structured Containers", "Unstructured Containers"]

View File

@ -1,10 +1,10 @@
---
title: Run the S3 Connector Externally
title: Run the S3 Storage Connector Externally
slug: /connectors/storage/s3/yaml
---
{% connectorDetailsHeader
name="S3"
name="S3 Storage"
stage="PROD"
platform="OpenMetadata"
availableFeatures=["Metadata"]

View File

@ -27,7 +27,7 @@ href="/connectors"%}
Refer to the Docs to ingest metadata from multiple sources - Databases, Dashboards, Pipelines, ML Models, Messaging, Storage, as well as Metadata services.
{%/inlineCallout%}
- **Database Services:** [Athena](/connectors/database/athena), [AzureSQL](/connectors/database/azuresql), [BigQuery](/connectors/database/bigquery), [Clickhouse](/connectors/database/clickhouse), [Databricks](/connectors/database/databricks), [Datalake](/connectors/database/datalake), [DB2](/connectors/database/db2), [DeltaLake](/connectors/database/deltalake), [Domo Database](/connectors/database/domo-database), [Druid](/connectors/database/druid), [DynamoDB](/connectors/database/dynamodb), [Glue](/connectors/database/glue), [Hive](/connectors/database/hive), [Impala](/connectors/database/impala), [MariaDB](/connectors/database/mariadb), [MongoDB](/connectors/database/mongodb), [MSSQL](/connectors/database/mssql), [MySQL](/connectors/database/mysql), [Oracle](/connectors/database/oracle), [PinotDB](/connectors/database/pinotdb), [Postgres](/connectors/database/postgres), [Presto](/connectors/database/presto), [Redshift](/connectors/database/redshift), [Salesforce](/connectors/database/salesforce), [SAP Hana](/connectors/database/sap-hana), [SAS](/connectors/database/sas), [SingleStore](/connectors/database/singlestore), [Snowflake](/connectors/database/snowflake), [SQLite](/connectors/database/sqlite), [Trino](/connectors/database/trino), and [Vertica](/connectors/database/vertica).
- **Database Services:** [ADLS Datalake](/connectors/database/adls-datalake), [Athena](/connectors/database/athena), [AzureSQL](/connectors/database/azuresql), [BigQuery](/connectors/database/bigquery), [Clickhouse](/connectors/database/clickhouse), [Databricks](/connectors/database/databricks), [DB2](/connectors/database/db2), [DeltaLake](/connectors/database/deltalake), [Domo Database](/connectors/database/domo-database), [Druid](/connectors/database/druid), [DynamoDB](/connectors/database/dynamodb), [GCS Datalake](/connectors/database/gcs-datalake), [Glue](/connectors/database/glue), [Hive](/connectors/database/hive), [Impala](/connectors/database/impala), [MariaDB](/connectors/database/mariadb), [MongoDB](/connectors/database/mongodb), [MSSQL](/connectors/database/mssql), [MySQL](/connectors/database/mysql), [Oracle](/connectors/database/oracle), [PinotDB](/connectors/database/pinotdb), [Postgres](/connectors/database/postgres), [Presto](/connectors/database/presto), [Redshift](/connectors/database/redshift), [Salesforce](/connectors/database/salesforce), [SAP Hana](/connectors/database/sap-hana), [SAS](/connectors/database/sas), [SingleStore](/connectors/database/singlestore), [Snowflake](/connectors/database/snowflake), [SQLite](/connectors/database/sqlite), [S3 Datalake](/connectors/database/s3-datalake), [Trino](/connectors/database/trino), and [Vertica](/connectors/database/vertica).
- **Dashboard Services:** [Domo Dashboard](/connectors/dashboard/domo-dashboard), [Looker](/connectors/dashboard/looker), [Metabase](/connectors/dashboard/metabase), [Mode](/connectors/dashboard/mode), [PowerBI](/connectors/dashboard/powerbi), [Qlik Sense](/connectors/dashboard/qliksense), [QuickSight](/connectors/dashboard/quicksight), [Redash](/connectors/dashboard/redash), [Superset](/connectors/dashboard/superset), and [Tableau](/connectors/dashboard/tableau).

View File

@ -7,7 +7,7 @@ slug: /how-to-guides/guide-for-data-users/data-ownership
## Data Asset Ownership
In OpenMetadata, either a **team** or an **individual user** can be the owner of a data asset. Owners have access to perform all the operations on a data asset. For example, edit description, tags, glossary terms, etc.
In OpenMetadata, either a **team** or **multiple user** can be the owner of a data asset. Owners have access to perform all the operations on a data asset. For example, edit description, tags, glossary terms, etc.
## Assign Data Ownership

View File

@ -226,6 +226,12 @@ site_menu:
url: /connectors/api/rest/yaml
- category: Connectors / Database
url: /connectors/database
- category: Connectors / Database / ADLS Datalake
url: /connectors/database/adls-datalake
- category: Connectors / Database / ADLS Datalake / Run Externally
url: /connectors/database/adls-datalake/yaml
- category: Connectors / Database / ADLS Datalake / Troubleshooting
url: /connectors/database/adls-datalake/troubleshooting
- category: Connectors / Database / Athena
url: /connectors/database/athena
- category: Connectors / Database / Athena / Run Externally
@ -262,12 +268,6 @@ site_menu:
url: /connectors/database/databricks/yaml
- category: Connectors / Database / Databricks / Troubleshooting
url: /connectors/database/databricks/troubleshooting
- category: Connectors / Database / Datalake
url: /connectors/database/datalake
- category: Connectors / Database / Datalake / Run Externally
url: /connectors/database/datalake/yaml
- category: Connectors / Database / Datalake / Troubleshooting
url: /connectors/database/datalake/troubleshooting
- category: Connectors / Database / DB2
url: /connectors/database/db2
- category: Connectors / Database / DB2 / Run Externally
@ -294,6 +294,10 @@ site_menu:
url: /connectors/database/dynamodb
- category: Connectors / Database / DynamoDB / Run Externally
url: /connectors/database/dynamodb/yaml
- category: Connectors / Database / GCS Datalake
url: /connectors/database/gcs-datalake
- category: Connectors / Database / GCS Datalake / Run Externally
url: /connectors/database/gcs-datalake/yaml
- category: Connectors / Database / Glue
url: /connectors/database/glue
- category: Connectors / Database / Glue / Run Externally
@ -388,6 +392,12 @@ site_menu:
url: /connectors/database/synapse/yaml
- category: Connectors / Database / Synapse / Troubleshooting
url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
url: /connectors/database/s3-datalake/yaml
- category: Connectors / Database / S3 Datalake / Troubleshooting
url: /connectors/database/s3-datalake/troubleshooting
- category: Connectors / Database / Teradata
url: /connectors/database/teradata
- category: Connectors / Database / Teradata / Run Externally
@ -505,9 +515,9 @@ site_menu:
url: /connectors/pipeline/dagster
- category: Connectors / Pipeline / Dagster / Run Externally
url: /connectors/pipeline/dagster/yaml
- category: Connectors / Pipeline / DBTCloud
- category: Connectors / Pipeline / dbt Cloud
url: /connectors/pipeline/dbtcloud
- category: Connectors / Pipeline / DBTCloud / Run Externally
- category: Connectors / Pipeline / dbt Cloud / Run Externally
url: /connectors/pipeline/dbtcloud/yaml
- category: Connectors / Pipeline / KafkaConnect
url: /connectors/pipeline/kafkaconnect
@ -568,9 +578,9 @@ site_menu:
- category: Connectors / Storage
url: /connectors/storage
- category: Connectors / Storage / S3
- category: Connectors / Storage / S3 Storage
url: /connectors/storage/s3
- category: Connectors / Storage / S3 / Run Externally
- category: Connectors / Storage / S3 Storage / Run Externally
url: /connectors/storage/s3/yaml
- category: Connectors / Storage / GCS
url: /connectors/storage/gcs

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 187 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 187 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 KiB