mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 04:10:43 +00:00
feat(ingest): add generic sqlalchemy source (#2389)
This commit is contained in:
parent
3ddf163a91
commit
fb6f74b1da
@ -41,8 +41,9 @@ We use a plugin architecture so that you can install only the dependencies you a
|
|||||||
| hive | `pip install 'acryl-datahub[hive]'` | Hive source |
|
| hive | `pip install 'acryl-datahub[hive]'` | Hive source |
|
||||||
| mssql | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
|
| mssql | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
|
||||||
| mysql | `pip install 'acryl-datahub[mysql]'` | MySQL source |
|
| mysql | `pip install 'acryl-datahub[mysql]'` | MySQL source |
|
||||||
| postgres | `pip install 'acryl-datahub[postgres]'` | Postgres source |
|
|
||||||
| oracle | `pip install 'acryl-datahub[oracle]'` | Oracle source |
|
| oracle | `pip install 'acryl-datahub[oracle]'` | Oracle source |
|
||||||
|
| postgres | `pip install 'acryl-datahub[postgres]'` | Postgres source |
|
||||||
|
| sqlalchemy | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
|
||||||
| snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
|
| snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
|
||||||
| mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
|
| mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
|
||||||
| ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
|
| ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
|
||||||
@ -369,6 +370,28 @@ source:
|
|||||||
# options is same as above
|
# options is same as above
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Other databases using SQLAlchemy `sqlalchemy`
|
||||||
|
|
||||||
|
The `sqlalchemy` source is useful if we don't have a pre-built source for your chosen
|
||||||
|
database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
|
||||||
|
defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
|
||||||
|
|
||||||
|
Extracts:
|
||||||
|
|
||||||
|
- List of schemas and tables
|
||||||
|
- Column types associated with each table
|
||||||
|
|
||||||
|
```yml
|
||||||
|
source:
|
||||||
|
type: sqlalchemy
|
||||||
|
config:
|
||||||
|
# See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
|
||||||
|
connect_uri: "dialect+driver://username:password@host:port/database"
|
||||||
|
options: {} # same as above
|
||||||
|
schema_pattern: {} # same as above
|
||||||
|
table_pattern: {} # same as above
|
||||||
|
```
|
||||||
|
|
||||||
### MongoDB `mongodb`
|
### MongoDB `mongodb`
|
||||||
|
|
||||||
Extracts:
|
Extracts:
|
||||||
|
|||||||
@ -161,6 +161,7 @@ setuptools.setup(
|
|||||||
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
||||||
"datahub.ingestion.source.plugins": [
|
"datahub.ingestion.source.plugins": [
|
||||||
"file = datahub.ingestion.source.mce_file:MetadataFileSource",
|
"file = datahub.ingestion.source.mce_file:MetadataFileSource",
|
||||||
|
"sqlalchemy = datahub.ingestion.source.sql_generic:SQLAlchemyGenericSource",
|
||||||
"athena = datahub.ingestion.source.athena:AthenaSource",
|
"athena = datahub.ingestion.source.athena:AthenaSource",
|
||||||
"bigquery = datahub.ingestion.source.bigquery:BigQuerySource",
|
"bigquery = datahub.ingestion.source.bigquery:BigQuerySource",
|
||||||
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
||||||
|
|||||||
@ -0,0 +1,20 @@
|
|||||||
|
from datahub.ingestion.api.common import PipelineContext
|
||||||
|
from .sql_common import SQLAlchemyConfig, SQLAlchemySource
|
||||||
|
|
||||||
|
|
||||||
|
class SQLAlchemyGenericConfig(SQLAlchemyConfig):
|
||||||
|
platform: str
|
||||||
|
connect_uri: str
|
||||||
|
|
||||||
|
def get_sql_alchemy_url(self):
|
||||||
|
return self.connect_uri
|
||||||
|
|
||||||
|
|
||||||
|
class SQLAlchemyGenericSource(SQLAlchemySource):
|
||||||
|
def __init__(self, config: SQLAlchemyGenericConfig, ctx: PipelineContext):
|
||||||
|
super().__init__(config, ctx, config.platform)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create(cls, config_dict, ctx):
|
||||||
|
config = SQLAlchemyGenericConfig.parse_obj(config_dict)
|
||||||
|
return cls(config, ctx)
|
||||||
Loading…
x
Reference in New Issue
Block a user