feat(ingest): support Oracle service names (#2676)

This commit is contained in:
Harshal Sheth 2021-06-11 17:27:34 -07:00 committed by GitHub
parent 1857f85242
commit 1b539220d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 0 deletions

View File

@ -387,6 +387,8 @@ Extracts:
- List of databases, schema, and tables
- Column types associated with each table
Using the Oracle source requires that you've also installed the correct drivers; see the [cx_Oracle docs](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html). The easiest one is the [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html).
```yml
source:
type: oracle
@ -398,6 +400,7 @@ source:
password: pass
host_port: localhost:5432
database: dbname
service_name: svc # omit database if using this option
# table_pattern/schema_pattern is same as above
# options is same as above
```
@ -730,6 +733,8 @@ sink:
connection:
bootstrap: "localhost:9092"
producer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/index.html#serializingproducer
schema_registry_url: "http://localhost:8081"
schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
```
### Console `console`

View File

@ -144,6 +144,7 @@ base_dev_requirements = {
"looker",
"glue",
"hive",
"oracle",
"datahub-kafka",
"datahub-rest",
# airflow is added below

View File

@ -1,5 +1,8 @@
from typing import Optional
# This import verifies that the dependencies are available.
import cx_Oracle # noqa: F401
import pydantic
from .sql_common import BasicSQLAlchemyConfig, SQLAlchemySource
@ -8,6 +11,23 @@ class OracleConfig(BasicSQLAlchemyConfig):
# defaults
scheme = "oracle+cx_oracle"
service_name: Optional[str]
@pydantic.validator("service_name")
def check_service_name(cls, v, values):
if values.get("database") and v:
raise ValueError(
"specify one of 'database' and 'service_name', but not both"
)
return v
def get_sql_alchemy_url(self):
url = super().get_sql_alchemy_url()
if self.service_name:
assert not self.database
url = f"{url}/?service_name={self.service_name}"
return url
class OracleSource(SQLAlchemySource):
def __init__(self, config, ctx):

View File

@ -0,0 +1,31 @@
import pytest
from datahub.ingestion.source.oracle import OracleConfig
def test_oracle_config():
base_config = {
"username": "user",
"password": "password",
"host_port": "host:1521",
}
config = OracleConfig.parse_obj(
{
**base_config,
"service_name": "svc01",
}
)
assert (
config.get_sql_alchemy_url()
== "oracle+cx_oracle://user:password@host:1521/?service_name=svc01"
)
with pytest.raises(ValueError):
config = OracleConfig.parse_obj(
{
**base_config,
"database": "db",
"service_name": "svc01",
}
)