diff --git a/ingestion/examples/workflows/couchbase.yaml b/ingestion/examples/workflows/couchbase.yaml new file mode 100644 index 00000000000..3e32bc6cf72 --- /dev/null +++ b/ingestion/examples/workflows/couchbase.yaml @@ -0,0 +1,23 @@ +source: + type: couchbase + serviceName: local_couchbase + serviceConnection: + config: + type: Couchbase + bucket: bucket + username: username + password: password + hostport: hostport + sourceConfig: + config: + type: DatabaseMetadata +sink: + type: metadata-rest + config: {} +workflowConfig: + loggerLevel: DEBUG + openMetadataServerConfig: + hostPort: http://localhost:8585/api + authProvider: openmetadata + securityConfig: + "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/setup.py b/ingestion/setup.py index c3bd2947e71..28429f38df9 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -198,6 +198,7 @@ plugins: Dict[str, Set[str]] = { "looker": {"looker-sdk>=22.20.0", "lkml~=1.3"}, "mlflow": {"mlflow-skinny~=1.30", "alembic~=1.10.2"}, "mongo": {"pymongo~=4.3", VERSIONS["pandas"]}, + "couchbase": {"couchbase~=4.1"}, "mssql": {"sqlalchemy-pytds~=0.3"}, "mssql-odbc": {VERSIONS["pyodbc"]}, "mysql": {VERSIONS["pymysql"]}, diff --git a/ingestion/src/metadata/examples/workflows/couchbase.yaml b/ingestion/src/metadata/examples/workflows/couchbase.yaml new file mode 100644 index 00000000000..3e32bc6cf72 --- /dev/null +++ b/ingestion/src/metadata/examples/workflows/couchbase.yaml @@ -0,0 +1,23 @@ +source: + type: couchbase + serviceName: local_couchbase + serviceConnection: + config: + type: Couchbase + bucket: bucket + username: username + password: password + hostport: hostport + sourceConfig: + config: + type: DatabaseMetadata +sink: + type: metadata-rest + config: {} +workflowConfig: + loggerLevel: DEBUG + openMetadataServerConfig: + hostPort: http://localhost:8585/api + authProvider: openmetadata + securityConfig: + "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/__init__.py b/ingestion/src/metadata/ingestion/source/database/couchbase/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py b/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py new file mode 100644 index 00000000000..616a095bd41 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py @@ -0,0 +1,88 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Source connection handler +""" +from functools import partial +from typing import Any, Optional + +from pydantic import BaseModel + +from metadata.generated.schema.entity.automations.workflow import ( + Workflow as AutomationWorkflow, +) +from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import ( + CouchbaseConnection, +) +from metadata.ingestion.connections.test_connections import test_connection_steps +from metadata.ingestion.ometa.ometa_api import OpenMetadata + + +def get_connection(connection: CouchbaseConnection): + """ + Create connection + """ + # pylint: disable=import-outside-toplevel + from couchbase.auth import PasswordAuthenticator + from couchbase.cluster import Cluster + from couchbase.options import ClusterOptions + + auth = PasswordAuthenticator( + connection.username, connection.password.get_secret_value() + ) + url = f"{connection.scheme.value}://{connection.hostport}" + couchbase_cluster = Cluster.connect(url, ClusterOptions(auth)) + return couchbase_cluster + + +def test_connection( + metadata: OpenMetadata, + client: Any, + service_connection: CouchbaseConnection, + automation_workflow: Optional[AutomationWorkflow] = None, +) -> None: + """ + Test connection. This can be executed either as part + of a metadata workflow or during an Automation Workflow + """ + + # pylint: disable=import-outside-toplevel + from couchbase.cluster import Cluster + + class SchemaHolder(BaseModel): + database: Optional[str] + + holder = SchemaHolder() + + def test_get_databases(client: Cluster, holder: SchemaHolder): + buckets = client.buckets() + list_bucket = buckets.get_all_buckets() + for database in list_bucket: + holder.database = database.name + break + + def test_get_collections(client: Cluster, holder: SchemaHolder): + database = client.bucket(holder.database) + collection_manager = database.collections() + collection_manager.get_all_scopes() + + test_fn = { + "GetDatabases": partial(test_get_databases, client, holder), + "GetCollections": partial(test_get_collections, client, holder), + } + + test_connection_steps( + metadata=metadata, + test_fn=test_fn, + service_type=service_connection.type.value, + automation_workflow=automation_workflow, + ) diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py b/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py new file mode 100644 index 00000000000..d6f3583c42e --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py @@ -0,0 +1,129 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Couchbase source methods. +""" + +import traceback +from typing import Dict, Iterable, List + +from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import ( + CouchbaseConnection, +) +from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + OpenMetadataConnection, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + Source as WorkflowSource, +) +from metadata.ingestion.api.source import InvalidSourceException +from metadata.ingestion.source.database.common_nosql_source import ( + SAMPLE_SIZE, + CommonNoSQLSource, +) +from metadata.ingestion.source.database.couchbase.queries import ( + COUCHBASE_GET_DATA, + COUCHBASE_SQL_STATEMENT, +) +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + + +class CouchbaseSource(CommonNoSQLSource): + """ + Implements the necessary methods to extract + Database metadata from Dynamo Source + """ + + def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): + super().__init__(config, metadata_config) + self.couchbase = self.connection_obj + + @classmethod + def create(cls, config_dict, metadata_config: OpenMetadataConnection): + config: WorkflowSource = WorkflowSource.parse_obj(config_dict) + connection: CouchbaseConnection = config.serviceConnection.__root__.config + if not isinstance(connection, CouchbaseConnection): + raise InvalidSourceException( + f"Expected CouchbaseConnection, but got {connection}" + ) + return cls(config, metadata_config) + + def get_database_names(self) -> Iterable[str]: + try: + if self.service_connection.bucket: + yield self.service_connection.__dict__.get("bucket") + else: + buckets = self.couchbase.buckets() + for bucket_name in buckets.get_all_buckets(): + yield bucket_name.name + except Exception as exp: + logger.debug(f"Failed to fetch bucket name: {exp}") + logger.debug(traceback.format_exc()) + + def get_schema_name_list(self) -> List[str]: + """ + Method to get list of schema names available within NoSQL db + need to be overridden by sources + """ + try: + database_name = self.context.database.name.__root__ + bucket = self.couchbase.bucket(database_name) + collection_manager = bucket.collections() + self.context.scope_dict = { + scope.name: scope for scope in collection_manager.get_all_scopes() + } + return [scopes.name for scopes in collection_manager.get_all_scopes()] + except Exception as exp: + logger.debug( + f"Failed to list scope for bucket names [{database_name}]: {exp}" + ) + logger.debug(traceback.format_exc()) + return [] + + def get_table_name_list(self, schema_name: str) -> List[str]: + """ + Method to get list of table names available within schema db + """ + try: + scope_object = self.context.scope_dict.get(schema_name) + return [collection.name for collection in scope_object.collections] + except Exception as exp: + logger.debug( + f"Failed to list collection names for scope [{schema_name}]: {exp}" + ) + logger.debug(traceback.format_exc()) + return [] + + def get_table_columns_dict(self, schema_name: str, table_name: str) -> List[Dict]: + """ + Method to get actual data available within table + need to be overridden by sources + """ + try: + database_name = self.context.database.name.__root__ + query = COUCHBASE_SQL_STATEMENT.format(table_name=table_name) + result = self.couchbase.query(query) + for row in result.rows(): + if len(row) > 0: + query_coln = COUCHBASE_GET_DATA.format( + database_name=database_name, + schema_name=schema_name, + table_name=table_name, + sample_size=SAMPLE_SIZE, + ) + query_iter = self.couchbase.query(query_coln) + return list(query_iter.rows()) + except Exception as exp: + logger.debug(f"Failed to list column names for table [{table_name}]: {exp}") + logger.debug(traceback.format_exc()) + return [] diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py b/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py new file mode 100644 index 00000000000..40a783eb4c1 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py @@ -0,0 +1,23 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +SQL Queries used during ingestion +""" + +import textwrap + +COUCHBASE_SQL_STATEMENT = textwrap.dedent( + """ SELECT * FROM system:indexes WHERE keyspace_id = '{table_name}' AND is_primary = TRUE """ +) + +COUCHBASE_GET_DATA = textwrap.dedent( + """ select crc.* from `{database_name}`.`{schema_name}`.`{table_name}` crc limit {sample_size} """ +) diff --git a/ingestion/tests/unit/topology/database/test_couchbase.py b/ingestion/tests/unit/topology/database/test_couchbase.py new file mode 100644 index 00000000000..50b109ceca2 --- /dev/null +++ b/ingestion/tests/unit/topology/database/test_couchbase.py @@ -0,0 +1,226 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test Couchbase using the topology +""" + +from unittest import TestCase +from unittest.mock import patch + +from metadata.generated.schema.api.data.createTable import CreateTableRequest +from metadata.generated.schema.entity.data.database import Database +from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema +from metadata.generated.schema.entity.data.table import Column, DataType, TableType +from metadata.generated.schema.entity.services.databaseService import ( + DatabaseConnection, + DatabaseService, + DatabaseServiceType, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + OpenMetadataWorkflowConfig, +) +from metadata.generated.schema.type.entityReference import EntityReference +from metadata.ingestion.source.database.couchbase.metadata import CouchbaseSource + +mock_couch_config = { + "source": { + "type": "couchbase", + "serviceName": "local_couchbase", + "serviceConnection": { + "config": { + "type": "Couchbase", + "bucket": "default", + "username": "username", + "password": "password", + "hostport": "localhost", + }, + }, + "sourceConfig": {"config": {"type": "DatabaseMetadata"}}, + }, + "sink": {"type": "metadata-rest", "config": {}}, + "workflowConfig": { + "openMetadataServerConfig": { + "hostPort": "http://localhost:8585/api", + "authProvider": "openmetadata", + "securityConfig": { + "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" + }, + } + }, +} + +MOCK_DATABASE_SERVICE = DatabaseService( + id="85811038-099a-11ed-861d-0242ac120002", + name="local_couchbase", + connection=DatabaseConnection(), + serviceType=DatabaseServiceType.Couchbase, +) + +MOCK_DATABASE = Database( + id="2aaa012e-099a-11ed-861d-0242ac120002", + name="default", + fullyQualifiedName="local_couchbase.default", + displayName="default", + description="", + service=EntityReference( + id="85811038-099a-11ed-861d-0242ac120002", + type="databaseService", + ), +) + +MOCK_DATABASE_SCHEMA = DatabaseSchema( + id="2aaa012e-099a-11ed-861d-0242ac120056", + name="default", + fullyQualifiedName="local_couchbase.default.default", + displayName="default", + description="", + database=EntityReference( + id="2aaa012e-099a-11ed-861d-0242ac120002", + type="database", + ), + service=EntityReference( + id="85811038-099a-11ed-861d-0242ac120002", + type="databaseService", + ), +) + +MOCK_JSON_TABLE_DATA = [ + { + "name": "mayur", + "age": 25, + "is_married": False, + "address": {"line": "random address"}, + }, + {"name": "onkar", "age": 26, "is_married": True}, +] + +MOCK_CREATE_TABLE = [ + CreateTableRequest( + name="random_table", + tableType=TableType.Regular, + columns=[ + Column( + name="name", + displayName="name", + dataType=DataType.STRING, + dataTypeDisplay=DataType.STRING.value, + ), + Column( + name="age", + displayName="age", + dataType=DataType.INT, + dataTypeDisplay=DataType.INT.value, + ), + Column( + name="is_married", + displayName="is_married", + dataType=DataType.BOOLEAN, + dataTypeDisplay=DataType.BOOLEAN.value, + ), + Column( + name="address", + displayName="address", + dataType=DataType.RECORD, + dataTypeDisplay=DataType.RECORD.value, + children=[ + Column( + name="line", + dataType=DataType.STRING, + dataTypeDisplay=DataType.STRING.value, + ) + ], + ), + ], + tableConstraints=None, + databaseSchema="local_couchbase.default.default", + ) +] + +EXPECTED_DATABASE_NAMES = ["default"] + +EXPECTED_DATABASE_SCHEMA_NAMES = [ + "random_schema", + "random1_schema", +] + +MOCK_DATABASE_SCHEMA_NAMES = [ + "random_schema", + "random1_schema", +] + +EXPECTED_TABLE_NAMES = [ + ("random_table", TableType.Regular), + ("random1_table", TableType.Regular), +] + +MOCK_TABLE_NAMES = [ + "random_table", + "random1_table", +] + + +def custom_column_compare(self, other): + return ( + self.name == other.name + and self.description == other.description + and self.children == other.children + ) + + +class CouchbaseUnitTest(TestCase): + @patch( + "metadata.ingestion.source.database.couchbase.metadata.CouchbaseSource.test_connection" + ) + @patch("metadata.ingestion.source.database.couchbase.connection.get_connection") + def __init__(self, methodName, get_connection, test_connection) -> None: + super().__init__(methodName) + get_connection.return_value = False + test_connection.return_value = False + + self.config = OpenMetadataWorkflowConfig.parse_obj(mock_couch_config) + self.couch_source = CouchbaseSource.create( + mock_couch_config["source"], + self.config.workflowConfig.openMetadataServerConfig, + ) + self.couch_source.context.__dict__["database_service"] = MOCK_DATABASE_SERVICE + self.couch_source.context.__dict__["database"] = MOCK_DATABASE + self.couch_source.context.__dict__["database_schema"] = MOCK_DATABASE_SCHEMA + + def test_database_names(self): + assert EXPECTED_DATABASE_NAMES == list(self.couch_source.get_database_names()) + + def test_database_schema_names(self): + with patch.object( + CouchbaseSource, + "get_schema_name_list", + return_value=MOCK_DATABASE_SCHEMA_NAMES, + ): + assert EXPECTED_DATABASE_SCHEMA_NAMES == list( + self.couch_source.get_database_schema_names() + ) + + def test_table_names(self): + with patch.object( + CouchbaseSource, "get_table_name_list", return_value=MOCK_TABLE_NAMES + ): + assert EXPECTED_TABLE_NAMES == list( + self.couch_source.get_tables_name_and_type() + ) + + def test_yield_tables(self): + Column.__eq__ = custom_column_compare + with patch.object( + CouchbaseSource, "get_table_columns_dict", return_value=MOCK_JSON_TABLE_DATA + ): + assert MOCK_CREATE_TABLE == list( + self.couch_source.yield_table(EXPECTED_TABLE_NAMES[0]) + ) diff --git a/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/index.md b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/index.md new file mode 100644 index 00000000000..6ebc7254c29 --- /dev/null +++ b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/index.md @@ -0,0 +1,81 @@ +--- +title: Couchbase +slug: /connectors/database/couchbase +--- + +# Couchbase + +{% multiTablesWrapper %} + +| Feature | Status | +| :----------------- | :--------------------------- | +| Stage | PROD | +| Metadata | {% icon iconName="check" /%} | +| Query Usage | {% icon iconName="cross" /%} | +| Data Profiler | {% icon iconName="cross" /%} | +| Data Quality | {% icon iconName="cross" /%} | +| Lineage | {% icon iconName="cross" /%} | +| DBT | {% icon iconName="cross" /%} | +| Supported Versions | -- | + +| Feature | Status | +| :----------- | :--------------------------- | +| Lineage | {% icon iconName="cross" /%} | +| Table-level | {% icon iconName="cross" /%} | +| Column-level | {% icon iconName="cross" /%} | + +{% /multiTablesWrapper %} + + +In this section, we provide guides and references to use the Couchbase connector. + +Configure and schedule Couchbase metadata workflows from the OpenMetadata UI: + +- [Requirements](#requirements) +- [Metadata Ingestion](#metadata-ingestion) + +{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/couchbase/yaml"} /%} + +## Requirements + +{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%} +To deploy OpenMetadata, check the Deployment guides. +{%/inlineCallout%} + +## Metadata Ingestion + +{% partial + file="/v1.2.0/connectors/metadata-ingestion-ui.md" + variables={ + connector: "Couchbase", + selectServicePath: "/images/v1.2.0/connectors/couchbase/select-service.png", + addNewServicePath: "/images/v1.2.0/connectors/couchbase/add-new-service.png", + serviceConnectionPath: "/images/v1.2.0/connectors/couchbase/service-connection.png", +} +/%} + +{% stepsContainer %} +{% extraContent parentTagName="stepsContainer" %} + +#### Connection Details + +- **Username**: Username to connect to Couchbase. +- **Password**: Password to connect to Couchbase. +- **Hostport**: If couchbase is hosted on cloud then the hostport parameter specifies the connection string and if you are using couchbase server then the hostport parameter specifies hostname of the Couchbase. This should be specified as a string in the format `hostname` or `xyz.cloud.couchbase.com`. E.g., `localhost`. +- **bucketName**: Optional name to give to the bucket in OpenMetadata. If left blank, If left blank, we will ingest all the bucket names. + +{% partial file="/v1.2.0/connectors/database/advanced-configuration.md" /%} + +{% /extraContent %} + +{% partial file="/v1.2.0/connectors/test-connection.md" /%} + +{% partial file="/v1.2.0/connectors/database/configure-ingestion.md" /%} + +{% partial file="/v1.2.0/connectors/ingestion-schedule-and-deploy.md" /%} + +{% /stepsContainer %} + +{% partial file="/v1.2.0/connectors/troubleshooting.md" /%} + +{% partial file="/v1.2.0/connectors/database/related.md" /%} diff --git a/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/yaml.md b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/yaml.md new file mode 100644 index 00000000000..a6825ad3369 --- /dev/null +++ b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/couchbase/yaml.md @@ -0,0 +1,226 @@ +--- +title: Run the Couchbase Connector Externally +slug: /connectors/database/couchbase/yaml +--- + +# Run the Couchbase Connector Externally + +{% multiTablesWrapper %} + +| Feature | Status | +| :----------------- | :--------------------------- | +| Stage | PROD | +| Metadata | {% icon iconName="check" /%} | +| Query Usage | {% icon iconName="cross" /%} | +| Data Profiler | {% icon iconName="cross" /%} | +| Data Quality | {% icon iconName="cross" /%} | +| Lineage | {% icon iconName="cross" /%} | +| DBT | {% icon iconName="cross" /%} | +| Supported Versions | -- | + +| Feature | Status | +| :----------- | :--------------------------- | +| Lineage | {% icon iconName="cross" /%} | +| Table-level | {% icon iconName="cross" /%} | +| Column-level | {% icon iconName="cross" /%} | + +{% /multiTablesWrapper %} + +In this section, we provide guides and references to use the Couchbase connector. + +Configure and schedule Couchbase metadata workflows from the OpenMetadata UI: + +- [Requirements](#requirements) +- [Metadata Ingestion](#metadata-ingestion) + +{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/couchbase/yaml"} /%} + +{% partial file="/v1.2.0/connectors/external-ingestion-deployment.md" /%} + +## Requirements + +{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%} +To deploy OpenMetadata, check the Deployment guides. +{%/inlineCallout%} + +### Python Requirements + +To run the Couchbase ingestion, you will need to install: + +```bash +pip3 install "openmetadata-ingestion[couchbase]" +``` + +## Metadata Ingestion + +All connectors are defined as JSON Schemas. +[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/couchbaseConnection.json) +you can find the structure to create a connection to Couchbase. + +In order to create and run a Metadata Ingestion workflow, we will follow +the steps to create a YAML configuration able to connect to the source, +process the Entities if needed, and reach the OpenMetadata server. + +The workflow is modeled around the following +[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json) + +### 1. Define the YAML Config + +This is a sample config for Couchbase: + +{% codePreview %} + +{% codeInfoContainer %} + +#### Source Configuration - Service Connection + +{% codeInfo srNumber=1 %} + +**username**: Username to connect to Couchbase. + +{% /codeInfo %} + +{% codeInfo srNumber=2 %} + +**password**: Password to connect to Couchbase. + +{% /codeInfo %} + +{% codeInfo srNumber=3 %} + +**hostport**: If couchbase is hosted on cloud then the hostport parameter specifies the connection string and if you are using couchbase server then the hostport parameter specifies hostname of the Couchbase. This should be specified as a string in the format `hostname` or `xyz.cloud.couchbase.com`. E.g., `localhost`. + +{% /codeInfo %} + +{% codeInfo srNumber=4 %} + +**bucketName**: Optional name to give to the bucket name in OpenMetadata. If left blank, we will ingest all the bucket names. + +{% /codeInfo %} + +#### Source Configuration - Source Config + +{% codeInfo srNumber=5 %} + +The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json): + +**markDeletedTables**: To flag tables as soft-deleted if they are not present anymore in the source system. + +**includeTables**: true or false, to ingest table data. Default is true. + +**includeViews**: true or false, to ingest views definitions. + +**databaseFilterPattern**, **schemaFilterPattern**, **tableFilternPattern**: Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database) + +{% /codeInfo %} + +#### Sink Configuration + +{% codeInfo srNumber=6 %} + +To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`. + +{% /codeInfo %} + +{% partial file="/v1.2.0/connectors/workflow-config.md" /%} + +#### Advanced Configuration + +{% codeInfo srNumber=7 %} + +**Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs. + +{% /codeInfo %} + + +{% /codeInfoContainer %} + +{% codeBlock fileName="filename.yaml" %} + +```yaml +source: + type: couchbase + serviceName: local_couchbase + serviceConnection: + config: + type: Couchbase + +```yaml {% srNumber=1 %} + username: username +``` +```yaml {% srNumber=2 %} + password: password +``` +```yaml {% srNumber=3 %} + hostport: localhost +``` + +```yaml {% srNumber=4 %} + bucket: custom_bucket_name +``` + +```yaml {% srNumber=5 %} + sourceConfig: + config: + type: DatabaseMetadata + markDeletedTables: true + includeTables: true + includeViews: true + # includeTags: true + # databaseFilterPattern: + # includes: + # - database1 + # - database2 + # excludes: + # - database3 + # - database4 + # schemaFilterPattern: + # includes: + # - schema1 + # - schema2 + # excludes: + # - schema3 + # - schema4 + # tableFilterPattern: + # includes: + # - users + # - type_test + # excludes: + # - table3 + # - table4 +``` + +```yaml {% srNumber=6 %} +sink: + type: metadata-rest + config: {} +``` + +{% partial file="/v1.2.0/connectors/workflow-config-yaml.md" /%} + +{% /codeBlock %} + +{% /codePreview %} + +### 2. Run with the CLI + +First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run: + +```bash +metadata ingest -c +``` + +Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration, +you will be able to extract metadata from different sources. + +## Related + +{% tilesContainer %} + +{% tile + title="Ingest with Airflow" + description="Configure the ingestion using Airflow SDK" + link="/connectors/database/mongodb/airflow" + / %} + +{% /tilesContainer %} diff --git a/openmetadata-docs/content/v1.2.0-SNAPSHOT/menu.md b/openmetadata-docs/content/v1.2.0-SNAPSHOT/menu.md index 36461582de3..4e95b4f534e 100644 --- a/openmetadata-docs/content/v1.2.0-SNAPSHOT/menu.md +++ b/openmetadata-docs/content/v1.2.0-SNAPSHOT/menu.md @@ -289,6 +289,10 @@ site_menu: url: /connectors/database/mongodb - category: Connectors / Database / MongoDB / Run Externally url: /connectors/database/mongodb/yaml + - category: Connectors / Database / Couchbase + url: /connectors/database/couchbase + - category: Connectors / Database / Couchbase / Run Externally + url: /connectors/database/couchbase/yaml - category: Connectors / Database / MSSQL url: /connectors/database/mssql - category: Connectors / Database / MSSQL / Run Externally @@ -1058,7 +1062,13 @@ site_menu: url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mongodb/mongodbvalues - category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MongoDBConnection url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mongodbconnection - - category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MssqlConnection + - category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / + Couchbase + url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/couchbase + CouchbaseConnection + url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/couchbaseconnection + - category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / + MssqlConnection url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mssqlconnection - category: Main Concepts / Metadata Standard / Schemas / Entity / Services / Connections / Database / MysqlConnection url: /main-concepts/metadata-standard/schemas/entity/services/connections/database/mysqlconnection diff --git a/openmetadata-docs/images/v1.2.0/connectors/couchbase/add-new-service.png b/openmetadata-docs/images/v1.2.0/connectors/couchbase/add-new-service.png new file mode 100644 index 00000000000..dede21cc8d9 Binary files /dev/null and b/openmetadata-docs/images/v1.2.0/connectors/couchbase/add-new-service.png differ diff --git a/openmetadata-docs/images/v1.2.0/connectors/couchbase/select-service.png b/openmetadata-docs/images/v1.2.0/connectors/couchbase/select-service.png new file mode 100644 index 00000000000..addce30c931 Binary files /dev/null and b/openmetadata-docs/images/v1.2.0/connectors/couchbase/select-service.png differ diff --git a/openmetadata-docs/images/v1.2.0/connectors/couchbase/service-connection.png b/openmetadata-docs/images/v1.2.0/connectors/couchbase/service-connection.png new file mode 100644 index 00000000000..e76e76c541f Binary files /dev/null and b/openmetadata-docs/images/v1.2.0/connectors/couchbase/service-connection.png differ diff --git a/openmetadata-service/src/main/resources/json/data/testConnections/database/couchbase.json b/openmetadata-service/src/main/resources/json/data/testConnections/database/couchbase.json new file mode 100644 index 00000000000..7ee5c6713d7 --- /dev/null +++ b/openmetadata-service/src/main/resources/json/data/testConnections/database/couchbase.json @@ -0,0 +1,22 @@ +{ + "name": "Couchbase", + "fullyQualifiedName": "Couchbase", + "displayName": "Couchbase Test Connection", + "description": "This Test Connection validates the access against the database and basic metadata extraction of collections.", + "steps": [ + { + "name": "GetDatabases", + "description": "List all the databases available to the user.", + "errorMessage": "Failed to fetch databases, please validate if the user has enough privilege to fetch databases.", + "mandatory": true + }, + { + "name": "GetCollections", + "description": "List all the collection available within a randomly chosen database available to the user.", + "errorMessage": "Failed to fetch collection, please validate if the user has `listCollection` privilege on available databases", + "mandatory": true + } + ] + } + + \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/couchbaseConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/couchbaseConnection.json new file mode 100644 index 00000000000..c9a6e3698bf --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/couchbaseConnection.json @@ -0,0 +1,60 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/couchbaseConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Couchbase Connection", + "description": "Couchbase Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.CouchbaseConnection", + "definitions": { + "couchbaseType": { + "description": "Service type.", + "type": "string", + "enum": ["Couchbase"], + "default": "Couchbase" + }, + "couchbaseScheme": { + "description": "Couchbase driver scheme options.", + "type": "string", + "enum": ["couchbase"], + "default": "couchbase" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/couchbaseType", + "default": "Couchbase" + }, + "scheme": { + "title": "Connection Scheme", + "description": "Couchbase driver scheme options.", + "$ref": "#/definitions/couchbaseScheme", + "default": "couchbase" + }, + + "bucket": { + "title": "Connection Bucket", + "description": "Couchbase connection Bucket options.", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Username to connect to Couchbase. This user should have privileges to read all the metadata in Couchbase.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to Couchbase.", + "type": "string", + "format": "password" + }, + "hostport": { + "title": "Hostport", + "description": "Hostname of the Couchbase service.", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["hostport", "username", "password"] +} \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json index 3dd12c51975..6fea33451ef 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json @@ -47,7 +47,8 @@ "CustomDatabase", "Dbt", "SapHana", - "MongoDB" + "MongoDB", + "Couchbase" ], "javaEnums": [ { @@ -148,6 +149,9 @@ }, { "name": "MongoDB" + }, + { + "name": "Couchbase" } ] }, @@ -253,6 +257,9 @@ }, { "$ref": "./connections/database/mongoDBConnection.json" + }, + { + "$ref": "./connections/database/couchbaseConnection.json" } ] } diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Couchbase.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Couchbase.md new file mode 100644 index 00000000000..b5fa2c6c815 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Couchbase.md @@ -0,0 +1,34 @@ +# Couchbase +In this section, we provide guides and references to use the Couchbase connector. You can view the full documentation for Couchbase [here](https://docs.open-metadata.org/connectors/database/couchbase). +## Requirements +To extract metadata, the user used in the connection needs to have all necessary access permission. + +You can find further information on the Couchbase connector in the [here](https://docs.open-metadata.org/connectors/database/couchbase). + +## Connection Details + +$$section +### Username $(id="username") +Username to connect to Couchbase.. +$$ + +$$section +### Password $(id="password") +Password to connect to Couchbase. +$$ + +$$section +### Hostport $(id="hostport") + +This parameter specifies the hostname/ endpoint of your client connection of the Couchbase instance. + +$$section +### Bucket Name $(id="bucket") +In OpenMetadata, the Database Service hierarchy works as follows: +``` +Database Service > Bucket > Schema > Table +``` +In the case of Couchbase, if you don't provide bucket name then by default it will ingest all availabe buckets. +$$ + + diff --git a/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-couchbase.svg b/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-couchbase.svg new file mode 100644 index 00000000000..0189d6db882 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-couchbase.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts index 18b6cfcd69b..f0eb9f21951 100644 --- a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts +++ b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts @@ -32,6 +32,7 @@ import athena from '../assets/img/service-icon-athena.png'; import atlas from '../assets/img/service-icon-atlas.svg'; import azuresql from '../assets/img/service-icon-azuresql.png'; import clickhouse from '../assets/img/service-icon-clickhouse.png'; +import couchbase from '../assets/img/service-icon-couchbase.svg'; import dagster from '../assets/img/service-icon-dagster.png'; import databrick from '../assets/img/service-icon-databrick.png'; import datalake from '../assets/img/service-icon-datalake.png'; @@ -174,6 +175,7 @@ export const MS_AZURE = msAzure; export const SPLINE = spline; export const MONGODB = mongodb; export const QLIK_SENSE = qlikSense; +export const COUCHBASE = couchbase; export const PLUS = plus; export const NOSERVICE = noService; @@ -382,6 +384,7 @@ export const BETA_SERVICES = [ PipelineServiceType.Spline, DatabaseServiceType.MongoDB, DashboardServiceType.QlikSense, + DatabaseServiceType.Couchbase, ]; export const TEST_CONNECTION_INITIAL_MESSAGE = i18n.t( diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts b/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts index 7a54c43be45..0356f00ab66 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts @@ -18,6 +18,7 @@ import athenaConnection from '../jsons/connectionSchemas/connections/database/at import azureSQLConnection from '../jsons/connectionSchemas/connections/database/azureSQLConnection.json'; import bigQueryConnection from '../jsons/connectionSchemas/connections/database/bigQueryConnection.json'; import clickhouseConnection from '../jsons/connectionSchemas/connections/database/clickhouseConnection.json'; +import couchbaseConnection from '../jsons/connectionSchemas/connections/database/couchbaseConnection.json'; import customDatabaseConnection from '../jsons/connectionSchemas/connections/database/customDatabaseConnection.json'; import databricksConnection from '../jsons/connectionSchemas/connections/database/databricksConnection.json'; import DatalakeConnection from '../jsons/connectionSchemas/connections/database/datalakeConnection.json'; @@ -200,6 +201,11 @@ export const getDatabaseConfig = (type: DatabaseServiceType) => { break; } + case DatabaseServiceType.Couchbase: { + schema = couchbaseConnection; + + break; + } case DatabaseServiceType.PinotDB: { schema = pinotConnection; diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtils.tsx b/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtils.tsx index a94d188a66f..6f4fa2bc33f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtils.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtils.tsx @@ -32,6 +32,7 @@ import { AZURESQL, BIGQUERY, CLICKHOUSE, + COUCHBASE, CUSTOM_STORAGE_DEFAULT, DAGSTER, DASHBOARD_DEFAULT, @@ -212,6 +213,9 @@ export const serviceTypeLogo = (type: string) => { case DatabaseServiceType.MongoDB: return MONGODB; + case DatabaseServiceType.Couchbase: + return COUCHBASE; + case MessagingServiceType.Kafka: return KAFKA;