Fixed #8418 added new connector couchbase (#12836)

This commit is contained in:
vanshika18 2023-08-29 11:16:32 +05:30 committed by GitHub
parent 407900e5b4
commit 3cc15e6d0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 969 additions and 2 deletions

View File

@ -0,0 +1,23 @@
source:
type: couchbase
serviceName: local_couchbase
serviceConnection:
config:
type: Couchbase
bucket: bucket
username: username
password: password
hostport: hostport
sourceConfig:
config:
type: DatabaseMetadata
sink:
type: metadata-rest
config: {}
workflowConfig:
loggerLevel: DEBUG
openMetadataServerConfig:
hostPort: http://localhost:8585/api
authProvider: openmetadata
securityConfig:
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"

View File

@ -198,6 +198,7 @@ plugins: Dict[str, Set[str]] = {
"looker": {"looker-sdk>=22.20.0", "lkml~=1.3"},
"mlflow": {"mlflow-skinny~=1.30", "alembic~=1.10.2"},
"mongo": {"pymongo~=4.3", VERSIONS["pandas"]},
"couchbase": {"couchbase~=4.1"},
"mssql": {"sqlalchemy-pytds~=0.3"},
"mssql-odbc": {VERSIONS["pyodbc"]},
"mysql": {VERSIONS["pymysql"]},

View File

@ -0,0 +1,23 @@
source:
type: couchbase
serviceName: local_couchbase
serviceConnection:
config:
type: Couchbase
bucket: bucket
username: username
password: password
hostport: hostport
sourceConfig:
config:
type: DatabaseMetadata
sink:
type: metadata-rest
config: {}
workflowConfig:
loggerLevel: DEBUG
openMetadataServerConfig:
hostPort: http://localhost:8585/api
authProvider: openmetadata
securityConfig:
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"

View File

@ -0,0 +1,88 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Source connection handler
"""
from functools import partial
from typing import Any, Optional
from pydantic import BaseModel
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import (
CouchbaseConnection,
)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
def get_connection(connection: CouchbaseConnection):
"""
Create connection
"""
# pylint: disable=import-outside-toplevel
from couchbase.auth import PasswordAuthenticator
from couchbase.cluster import Cluster
from couchbase.options import ClusterOptions
auth = PasswordAuthenticator(
connection.username, connection.password.get_secret_value()
)
url = f"{connection.scheme.value}://{connection.hostport}"
couchbase_cluster = Cluster.connect(url, ClusterOptions(auth))
return couchbase_cluster
def test_connection(
metadata: OpenMetadata,
client: Any,
service_connection: CouchbaseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
) -> None:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
# pylint: disable=import-outside-toplevel
from couchbase.cluster import Cluster
class SchemaHolder(BaseModel):
database: Optional[str]
holder = SchemaHolder()
def test_get_databases(client: Cluster, holder: SchemaHolder):
buckets = client.buckets()
list_bucket = buckets.get_all_buckets()
for database in list_bucket:
holder.database = database.name
break
def test_get_collections(client: Cluster, holder: SchemaHolder):
database = client.bucket(holder.database)
collection_manager = database.collections()
collection_manager.get_all_scopes()
test_fn = {
"GetDatabases": partial(test_get_databases, client, holder),
"GetCollections": partial(test_get_collections, client, holder),
}
test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
)

View File

@ -0,0 +1,129 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Couchbase source methods.
"""
import traceback
from typing import Dict, Iterable, List
from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import (
CouchbaseConnection,
)
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.ingestion.api.source import InvalidSourceException
from metadata.ingestion.source.database.common_nosql_source import (
SAMPLE_SIZE,
CommonNoSQLSource,
)
from metadata.ingestion.source.database.couchbase.queries import (
COUCHBASE_GET_DATA,
COUCHBASE_SQL_STATEMENT,
)
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
class CouchbaseSource(CommonNoSQLSource):
"""
Implements the necessary methods to extract
Database metadata from Dynamo Source
"""
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
super().__init__(config, metadata_config)
self.couchbase = self.connection_obj
@classmethod
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
connection: CouchbaseConnection = config.serviceConnection.__root__.config
if not isinstance(connection, CouchbaseConnection):
raise InvalidSourceException(
f"Expected CouchbaseConnection, but got {connection}"
)
return cls(config, metadata_config)
def get_database_names(self) -> Iterable[str]:
try:
if self.service_connection.bucket:
yield self.service_connection.__dict__.get("bucket")
else:
buckets = self.couchbase.buckets()
for bucket_name in buckets.get_all_buckets():
yield bucket_name.name
except Exception as exp:
logger.debug(f"Failed to fetch bucket name: {exp}")
logger.debug(traceback.format_exc())
def get_schema_name_list(self) -> List[str]:
"""
Method to get list of schema names available within NoSQL db
need to be overridden by sources
"""
try:
database_name = self.context.database.name.__root__
bucket = self.couchbase.bucket(database_name)
collection_manager = bucket.collections()
self.context.scope_dict = {
scope.name: scope for scope in collection_manager.get_all_scopes()
}
return [scopes.name for scopes in collection_manager.get_all_scopes()]
except Exception as exp:
logger.debug(
f"Failed to list scope for bucket names [{database_name}]: {exp}"
)
logger.debug(traceback.format_exc())
return []
def get_table_name_list(self, schema_name: str) -> List[str]:
"""
Method to get list of table names available within schema db
"""
try:
scope_object = self.context.scope_dict.get(schema_name)
return [collection.name for collection in scope_object.collections]
except Exception as exp:
logger.debug(
f"Failed to list collection names for scope [{schema_name}]: {exp}"
)
logger.debug(traceback.format_exc())
return []
def get_table_columns_dict(self, schema_name: str, table_name: str) -> List[Dict]:
"""
Method to get actual data available within table
need to be overridden by sources
"""
try:
database_name = self.context.database.name.__root__
query = COUCHBASE_SQL_STATEMENT.format(table_name=table_name)
result = self.couchbase.query(query)
for row in result.rows():
if len(row) > 0:
query_coln = COUCHBASE_GET_DATA.format(
database_name=database_name,
schema_name=schema_name,
table_name=table_name,
sample_size=SAMPLE_SIZE,
)
query_iter = self.couchbase.query(query_coln)
return list(query_iter.rows())
except Exception as exp:
logger.debug(f"Failed to list column names for table [{table_name}]: {exp}")
logger.debug(traceback.format_exc())
return []

View File

@ -0,0 +1,23 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SQL Queries used during ingestion
"""
import textwrap
COUCHBASE_SQL_STATEMENT = textwrap.dedent(
""" SELECT * FROM system:indexes WHERE keyspace_id = '{table_name}' AND is_primary = TRUE """
)
COUCHBASE_GET_DATA = textwrap.dedent(
""" select crc.* from `{database_name}`.`{schema_name}`.`{table_name}` crc limit {sample_size} """
)

View File

@ -0,0 +1,226 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Couchbase using the topology
"""
from unittest import TestCase
from unittest.mock import patch
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.table import Column, DataType, TableType
from metadata.generated.schema.entity.services.databaseService import (
DatabaseConnection,
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.source.database.couchbase.metadata import CouchbaseSource
mock_couch_config = {
"source": {
"type": "couchbase",
"serviceName": "local_couchbase",
"serviceConnection": {
"config": {
"type": "Couchbase",
"bucket": "default",
"username": "username",
"password": "password",
"hostport": "localhost",
},
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
MOCK_DATABASE_SERVICE = DatabaseService(
id="85811038-099a-11ed-861d-0242ac120002",
name="local_couchbase",
connection=DatabaseConnection(),
serviceType=DatabaseServiceType.Couchbase,
)
MOCK_DATABASE = Database(
id="2aaa012e-099a-11ed-861d-0242ac120002",
name="default",
fullyQualifiedName="local_couchbase.default",
displayName="default",
description="",
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002",
type="databaseService",
),
)
MOCK_DATABASE_SCHEMA = DatabaseSchema(
id="2aaa012e-099a-11ed-861d-0242ac120056",
name="default",
fullyQualifiedName="local_couchbase.default.default",
displayName="default",
description="",
database=EntityReference(
id="2aaa012e-099a-11ed-861d-0242ac120002",
type="database",
),
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002",
type="databaseService",
),
)
MOCK_JSON_TABLE_DATA = [
{
"name": "mayur",
"age": 25,
"is_married": False,
"address": {"line": "random address"},
},
{"name": "onkar", "age": 26, "is_married": True},
]
MOCK_CREATE_TABLE = [
CreateTableRequest(
name="random_table",
tableType=TableType.Regular,
columns=[
Column(
name="name",
displayName="name",
dataType=DataType.STRING,
dataTypeDisplay=DataType.STRING.value,
),
Column(
name="age",
displayName="age",
dataType=DataType.INT,
dataTypeDisplay=DataType.INT.value,
),
Column(
name="is_married",
displayName="is_married",
dataType=DataType.BOOLEAN,
dataTypeDisplay=DataType.BOOLEAN.value,
),
Column(
name="address",
displayName="address",
dataType=DataType.RECORD,
dataTypeDisplay=DataType.RECORD.value,
children=[
Column(
name="line",
dataType=DataType.STRING,
dataTypeDisplay=DataType.STRING.value,
)
],
),
],
tableConstraints=None,
databaseSchema="local_couchbase.default.default",
)
]
EXPECTED_DATABASE_NAMES = ["default"]
EXPECTED_DATABASE_SCHEMA_NAMES = [
"random_schema",
"random1_schema",
]
MOCK_DATABASE_SCHEMA_NAMES = [
"random_schema",
"random1_schema",
]
EXPECTED_TABLE_NAMES = [
("random_table", TableType.Regular),
("random1_table", TableType.Regular),
]
MOCK_TABLE_NAMES = [
"random_table",
"random1_table",
]
def custom_column_compare(self, other):
return (
self.name == other.name
and self.description == other.description
and self.children == other.children
)
class CouchbaseUnitTest(TestCase):
@patch(
"metadata.ingestion.source.database.couchbase.metadata.CouchbaseSource.test_connection"
)
@patch("metadata.ingestion.source.database.couchbase.connection.get_connection")
def __init__(self, methodName, get_connection, test_connection) -> None:
super().__init__(methodName)
get_connection.return_value = False
test_connection.return_value = False
self.config = OpenMetadataWorkflowConfig.parse_obj(mock_couch_config)
self.couch_source = CouchbaseSource.create(
mock_couch_config["source"],
self.config.workflowConfig.openMetadataServerConfig,
)
self.couch_source.context.__dict__["database_service"] = MOCK_DATABASE_SERVICE
self.couch_source.context.__dict__["database"] = MOCK_DATABASE
self.couch_source.context.__dict__["database_schema"] = MOCK_DATABASE_SCHEMA
def test_database_names(self):
assert EXPECTED_DATABASE_NAMES == list(self.couch_source.get_database_names())
def test_database_schema_names(self):
with patch.object(
CouchbaseSource,
"get_schema_name_list",
return_value=MOCK_DATABASE_SCHEMA_NAMES,
):
assert EXPECTED_DATABASE_SCHEMA_NAMES == list(
self.couch_source.get_database_schema_names()
)
def test_table_names(self):
with patch.object(
CouchbaseSource, "get_table_name_list", return_value=MOCK_TABLE_NAMES
):
assert EXPECTED_TABLE_NAMES == list(
self.couch_source.get_tables_name_and_type()
)
def test_yield_tables(self):
Column.__eq__ = custom_column_compare
with patch.object(
CouchbaseSource, "get_table_columns_dict", return_value=MOCK_JSON_TABLE_DATA
):
assert MOCK_CREATE_TABLE == list(
self.couch_source.yield_table(EXPECTED_TABLE_NAMES[0])
)

View File

@ -0,0 +1,81 @@
---
title: Couchbase
slug: /connectors/database/couchbase
---
# Couchbase
{% multiTablesWrapper %}
| Feature | Status |
| :----------------- | :--------------------------- |
| Stage | PROD |
| Metadata | {% icon iconName="check" /%} |
| Query Usage | {% icon iconName="cross" /%} |
| Data Profiler | {% icon iconName="cross" /%} |
| Data Quality | {% icon iconName="cross" /%} |
| Lineage | {% icon iconName="cross" /%} |
| DBT | {% icon iconName="cross" /%} |
| Supported Versions | -- |
| Feature | Status |
| :----------- | :--------------------------- |
| Lineage | {% icon iconName="cross" /%} |
| Table-level | {% icon iconName="cross" /%} |
| Column-level | {% icon iconName="cross" /%} |
{% /multiTablesWrapper %}
In this section, we provide guides and references to use the Couchbase connector.
Configure and schedule Couchbase metadata workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/couchbase/yaml"} /%}
## Requirements
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides.
{%/inlineCallout%}
## Metadata Ingestion
{% partial
file="/v1.2.0/connectors/metadata-ingestion-ui.md"
variables={
connector: "Couchbase",
selectServicePath: "/images/v1.2.0/connectors/couchbase/select-service.png",
addNewServicePath: "/images/v1.2.0/connectors/couchbase/add-new-service.png",
serviceConnectionPath: "/images/v1.2.0/connectors/couchbase/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details
- **Username**: Username to connect to Couchbase.
- **Password**: Password to connect to Couchbase.
- **Hostport**: If couchbase is hosted on cloud then the hostport parameter specifies the connection string and if you are using couchbase server then the hostport parameter specifies hostname of the Couchbase. This should be specified as a string in the format `hostname` or `xyz.cloud.couchbase.com`. E.g., `localhost`.
- **bucketName**: Optional name to give to the bucket in OpenMetadata. If left blank, If left blank, we will ingest all the bucket names.
{% partial file="/v1.2.0/connectors/database/advanced-configuration.md" /%}
{% /extraContent %}
{% partial file="/v1.2.0/connectors/test-connection.md" /%}
{% partial file="/v1.2.0/connectors/database/configure-ingestion.md" /%}
{% partial file="/v1.2.0/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.2.0/connectors/troubleshooting.md" /%}
{% partial file="/v1.2.0/connectors/database/related.md" /%}

View File

@ -0,0 +1,226 @@
---
title: Run the Couchbase Connector Externally
slug: /connectors/database/couchbase/yaml
---
# Run the Couchbase Connector Externally
{% multiTablesWrapper %}
| Feature | Status |
| :----------------- | :--------------------------- |
| Stage | PROD |
| Metadata | {% icon iconName="check" /%} |
| Query Usage | {% icon iconName="cross" /%} |
| Data Profiler | {% icon iconName="cross" /%} |
| Data Quality | {% icon iconName="cross" /%} |
| Lineage | {% icon iconName="cross" /%} |
| DBT | {% icon iconName="cross" /%} |
| Supported Versions | -- |
| Feature | Status |
| :----------- | :--------------------------- |
| Lineage | {% icon iconName="cross" /%} |
| Table-level | {% icon iconName="cross" /%} |
| Column-level | {% icon iconName="cross" /%} |
{% /multiTablesWrapper %}
In this section, we provide guides and references to use the Couchbase connector.
Configure and schedule Couchbase metadata workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/couchbase/yaml"} /%}
{% partial file="/v1.2.0/connectors/external-ingestion-deployment.md" /%}
## Requirements
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides.
{%/inlineCallout%}
### Python Requirements
To run the Couchbase ingestion, you will need to install:
```bash
pip3 install "openmetadata-ingestion[couchbase]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas.
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/couchbaseConnection.json)
you can find the structure to create a connection to Couchbase.
In order to create and run a Metadata Ingestion workflow, we will follow
the steps to create a YAML configuration able to connect to the source,
process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
### 1. Define the YAML Config
This is a sample config for Couchbase:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
**username**: Username to connect to Couchbase.
{% /codeInfo %}
{% codeInfo srNumber=2 %}
**password**: Password to connect to Couchbase.
{% /codeInfo %}
{% codeInfo srNumber=3 %}
**hostport**: If couchbase is hosted on cloud then the hostport parameter specifies the connection string and if you are using couchbase server then the hostport parameter specifies hostname of the Couchbase. This should be specified as a string in the format `hostname` or `xyz.cloud.couchbase.com`. E.g., `localhost`.
{% /codeInfo %}
{% codeInfo srNumber=4 %}
**bucketName**: Optional name to give to the bucket name in OpenMetadata. If left blank, we will ingest all the bucket names.
{% /codeInfo %}
#### Source Configuration - Source Config
{% codeInfo srNumber=5 %}
The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json):
**markDeletedTables**: To flag tables as soft-deleted if they are not present anymore in the source system.
**includeTables**: true or false, to ingest table data. Default is true.
**includeViews**: true or false, to ingest views definitions.
**databaseFilterPattern**, **schemaFilterPattern**, **tableFilternPattern**: Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database)
{% /codeInfo %}
#### Sink Configuration
{% codeInfo srNumber=6 %}
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
{% /codeInfo %}
{% partial file="/v1.2.0/connectors/workflow-config.md" /%}
#### Advanced Configuration
{% codeInfo srNumber=7 %}
**Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
{% /codeInfo %}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml
source:
type: couchbase
serviceName: local_couchbase
serviceConnection:
config:
type: Couchbase
```yaml {% srNumber=1 %}
username: username
```
```yaml {% srNumber=2 %}
password: password
```
```yaml {% srNumber=3 %}
hostport: localhost
```
```yaml {% srNumber=4 %}
bucket: custom_bucket_name
```
```yaml {% srNumber=5 %}
sourceConfig:
config:
type: DatabaseMetadata
markDeletedTables: true
includeTables: true
includeViews: true
# includeTags: true
# databaseFilterPattern:
# includes:
# - database1
# - database2
# excludes:
# - database3
# - database4
# schemaFilterPattern:
# includes:
# - schema1
# - schema2
# excludes:
# - schema3
# - schema4
# tableFilterPattern:
# includes:
# - users
# - type_test
# excludes:
# - table3
# - table4
```
```yaml {% srNumber=6 %}
sink:
type: metadata-rest
config: {}
```
{% partial file="/v1.2.0/connectors/workflow-config-yaml.md" /%}
{% /codeBlock %}
{% /codePreview %}
### 2. Run with the CLI
First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run:
```bash
metadata ingest -c <path-to-yaml>
```
Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration,
you will be able to extract metadata from different sources.
## Related
{% tilesContainer %}
{% tile
title="Ingest with Airflow"
description="Configure the ingestion using Airflow SDK"
link="/connectors/database/mongodb/airflow"
/ %}
{% /tilesContainer %}

View File

@ -289,6 +289,10 @@ site_menu:
url: /connectors/database/mongodb
- category: Connectors / Database / MongoDB / Run Externally
url: /connectors/database/mongodb/yaml
- category: Connectors / Database / Couchbase
url: /connectors/database/couchbase
- category: Connectors / Database / Couchbase / Run Externally
url: /connectors/database/couchbase/yaml
- category: Connectors / Database / MSSQL
url: /connectors/database/mssql
- category: Connectors / Database / MSSQL / Run Externally
@ -1058,7 +1062,13 @@ site_menu:
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mongodb/mongodbvalues
- category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MongoDBConnection
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mongodbconnection
- category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MssqlConnection
- category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database /
Couchbase
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/couchbase
CouchbaseConnection
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/couchbaseconnection
- category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database /
MssqlConnection
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mssqlconnection
- category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MysqlConnection
url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mysqlconnection

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 361 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 432 KiB

View File

@ -0,0 +1,22 @@
{
"name": "Couchbase",
"fullyQualifiedName": "Couchbase",
"displayName": "Couchbase Test Connection",
"description": "This Test Connection validates the access against the database and basic metadata extraction of collections.",
"steps": [
{
"name": "GetDatabases",
"description": "List all the databases available to the user.",
"errorMessage": "Failed to fetch databases, please validate if the user has enough privilege to fetch databases.",
"mandatory": true
},
{
"name": "GetCollections",
"description": "List all the collection available within a randomly chosen database available to the user.",
"errorMessage": "Failed to fetch collection, please validate if the user has `listCollection` privilege on available databases",
"mandatory": true
}
]
}

View File

@ -0,0 +1,60 @@
{
"$id": "https://open-metadata.org/schema/entity/services/connections/database/couchbaseConnection.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Couchbase Connection",
"description": "Couchbase Connection Config",
"type": "object",
"javaType": "org.openmetadata.schema.services.connections.database.CouchbaseConnection",
"definitions": {
"couchbaseType": {
"description": "Service type.",
"type": "string",
"enum": ["Couchbase"],
"default": "Couchbase"
},
"couchbaseScheme": {
"description": "Couchbase driver scheme options.",
"type": "string",
"enum": ["couchbase"],
"default": "couchbase"
}
},
"properties": {
"type": {
"title": "Service Type",
"description": "Service Type",
"$ref": "#/definitions/couchbaseType",
"default": "Couchbase"
},
"scheme": {
"title": "Connection Scheme",
"description": "Couchbase driver scheme options.",
"$ref": "#/definitions/couchbaseScheme",
"default": "couchbase"
},
"bucket": {
"title": "Connection Bucket",
"description": "Couchbase connection Bucket options.",
"type": "string"
},
"username": {
"title": "Username",
"description": "Username to connect to Couchbase. This user should have privileges to read all the metadata in Couchbase.",
"type": "string"
},
"password": {
"title": "Password",
"description": "Password to connect to Couchbase.",
"type": "string",
"format": "password"
},
"hostport": {
"title": "Hostport",
"description": "Hostname of the Couchbase service.",
"type": "string"
}
},
"additionalProperties": false,
"required": ["hostport", "username", "password"]
}

View File

@ -47,7 +47,8 @@
"CustomDatabase",
"Dbt",
"SapHana",
"MongoDB"
"MongoDB",
"Couchbase"
],
"javaEnums": [
{
@ -148,6 +149,9 @@
},
{
"name": "MongoDB"
},
{
"name": "Couchbase"
}
]
},
@ -253,6 +257,9 @@
},
{
"$ref": "./connections/database/mongoDBConnection.json"
},
{
"$ref": "./connections/database/couchbaseConnection.json"
}
]
}

View File

@ -0,0 +1,34 @@
# Couchbase
In this section, we provide guides and references to use the Couchbase connector. You can view the full documentation for Couchbase [here](https://docs.open-metadata.org/connectors/database/couchbase).
## Requirements
To extract metadata, the user used in the connection needs to have all necessary access permission.
You can find further information on the Couchbase connector in the [here](https://docs.open-metadata.org/connectors/database/couchbase).
## Connection Details
$$section
### Username $(id="username")
Username to connect to Couchbase..
$$
$$section
### Password $(id="password")
Password to connect to Couchbase.
$$
$$section
### Hostport $(id="hostport")
This parameter specifies the hostname/ endpoint of your client connection of the Couchbase instance.
$$section
### Bucket Name $(id="bucket")
In OpenMetadata, the Database Service hierarchy works as follows:
```
Database Service > Bucket > Schema > Table
```
In the case of Couchbase, if you don't provide bucket name then by default it will ingest all availabe buckets.
$$

View File

@ -0,0 +1 @@
<svg viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid"><path d="M128 0C57.426 0 0 57.233 0 128c0 70.574 57.233 128 128 128 70.574 0 128-57.233 128-128S198.574 0 128 0zm86.429 150.429c0 7.734-4.447 14.502-13.148 16.048-15.082 2.707-46.792 4.254-73.281 4.254-26.49 0-58.2-1.547-73.281-4.254-8.7-1.546-13.148-8.314-13.148-16.048v-49.885c0-7.734 5.994-14.888 13.148-16.049 4.447-.773 14.888-1.546 23.01-1.546 3.093 0 5.606 2.32 5.606 5.994v34.997l44.858-.967 44.858.967V88.943c0-3.674 2.514-5.994 5.608-5.994 8.12 0 18.562.773 23.009 1.546 7.347 1.16 13.148 8.315 13.148 16.049-.387 16.435-.387 33.257-.387 49.885z" fill="#ED2226"/></svg>

After

Width:  |  Height:  |  Size: 672 B

View File

@ -32,6 +32,7 @@ import athena from '../assets/img/service-icon-athena.png';
import atlas from '../assets/img/service-icon-atlas.svg';
import azuresql from '../assets/img/service-icon-azuresql.png';
import clickhouse from '../assets/img/service-icon-clickhouse.png';
import couchbase from '../assets/img/service-icon-couchbase.svg';
import dagster from '../assets/img/service-icon-dagster.png';
import databrick from '../assets/img/service-icon-databrick.png';
import datalake from '../assets/img/service-icon-datalake.png';
@ -174,6 +175,7 @@ export const MS_AZURE = msAzure;
export const SPLINE = spline;
export const MONGODB = mongodb;
export const QLIK_SENSE = qlikSense;
export const COUCHBASE = couchbase;
export const PLUS = plus;
export const NOSERVICE = noService;
@ -382,6 +384,7 @@ export const BETA_SERVICES = [
PipelineServiceType.Spline,
DatabaseServiceType.MongoDB,
DashboardServiceType.QlikSense,
DatabaseServiceType.Couchbase,
];
export const TEST_CONNECTION_INITIAL_MESSAGE = i18n.t(

View File

@ -18,6 +18,7 @@ import athenaConnection from '../jsons/connectionSchemas/connections/database/at
import azureSQLConnection from '../jsons/connectionSchemas/connections/database/azureSQLConnection.json';
import bigQueryConnection from '../jsons/connectionSchemas/connections/database/bigQueryConnection.json';
import clickhouseConnection from '../jsons/connectionSchemas/connections/database/clickhouseConnection.json';
import couchbaseConnection from '../jsons/connectionSchemas/connections/database/couchbaseConnection.json';
import customDatabaseConnection from '../jsons/connectionSchemas/connections/database/customDatabaseConnection.json';
import databricksConnection from '../jsons/connectionSchemas/connections/database/databricksConnection.json';
import DatalakeConnection from '../jsons/connectionSchemas/connections/database/datalakeConnection.json';
@ -200,6 +201,11 @@ export const getDatabaseConfig = (type: DatabaseServiceType) => {
break;
}
case DatabaseServiceType.Couchbase: {
schema = couchbaseConnection;
break;
}
case DatabaseServiceType.PinotDB: {
schema = pinotConnection;

View File

@ -32,6 +32,7 @@ import {
AZURESQL,
BIGQUERY,
CLICKHOUSE,
COUCHBASE,
CUSTOM_STORAGE_DEFAULT,
DAGSTER,
DASHBOARD_DEFAULT,
@ -212,6 +213,9 @@ export const serviceTypeLogo = (type: string) => {
case DatabaseServiceType.MongoDB:
return MONGODB;
case DatabaseServiceType.Couchbase:
return COUCHBASE;
case MessagingServiceType.Kafka:
return KAFKA;