From cfc02ee196608d4f3a8ce5edb34a9bc2d44d0622 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 6 Apr 2021 15:38:25 -0700 Subject: [PATCH] feat(ingest): add Oracle db support (#2347) --- metadata-ingestion/README.md | 23 +++++++++++++++++++ metadata-ingestion/setup.py | 2 ++ .../src/datahub/ingestion/source/oracle.py | 19 +++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/oracle.py diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 675427fc00..0b1c964480 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -41,6 +41,7 @@ We use a plugin architecture so that you can install only the dependencies you a | mssql | `pip install 'acryl-datahub[mssql]'` | SQL Server source | | mysql | `pip install 'acryl-datahub[mysql]'` | MySQL source | | postgres | `pip install 'acryl-datahub[postgres]'` | Postgres source | +| oracle | `pip install 'acryl-datahub[oracle]'` | Oracle source | | snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source | | mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source | | ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source | @@ -264,6 +265,28 @@ source: # options is same as above ``` +### Oracle `oracle` + +Extracts: + +- List of databases, schema, and tables +- Column types associated with each table + +```yml +source: + type: oracle + config: + # For more details on authentication, see the documentation: + # https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect and + # https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings. + username: user + password: pass + host_port: localhost:5432 + database: dbname + # table_pattern/schema_pattern is same as above + # options is same as above +``` + ### Google BigQuery `bigquery` Extracts: diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 320043fe32..469a0a47d2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -74,6 +74,7 @@ plugins: Dict[str, Set[str]] = { "mysql": sql_common | {"pymysql>=1.0.2"}, "postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"}, "snowflake": sql_common | {"snowflake-sqlalchemy"}, + "oracle": sql_common | {"cx_Oracle"}, "ldap": {"python-ldap>=2.4"}, "druid": sql_common | {"pydruid>=0.6.2"}, "mongodb": {"pymongo>=3.11"}, @@ -176,6 +177,7 @@ setuptools.setup( "mongodb = datahub.ingestion.source.mongodb:MongoDBSource", "mssql = datahub.ingestion.source.mssql:SQLServerSource", "mysql = datahub.ingestion.source.mysql:MySQLSource", + "oracle = datahub.ingestion.source.oracle:OracleSource", "postgres = datahub.ingestion.source.postgres:PostgresSource", "snowflake = datahub.ingestion.source.snowflake:SnowflakeSource", ], diff --git a/metadata-ingestion/src/datahub/ingestion/source/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/oracle.py new file mode 100644 index 0000000000..0d493e51f7 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/oracle.py @@ -0,0 +1,19 @@ +# This import verifies that the dependencies are available. +import cx_Oracle # noqa: F401 + +from .sql_common import BasicSQLAlchemyConfig, SQLAlchemySource + + +class OracleConfig(BasicSQLAlchemyConfig): + # defaults + scheme = "oracle+cx_oracle" + + +class OracleSource(SQLAlchemySource): + def __init__(self, config, ctx): + super().__init__(config, ctx, "oracle") + + @classmethod + def create(cls, config_dict, ctx): + config = OracleConfig.parse_obj(config_dict) + return cls(config, ctx)