mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-02 03:39:03 +00:00
feat(ingest): add generic sqlalchemy source (#2389)
This commit is contained in:
parent
3ddf163a91
commit
fb6f74b1da
@ -41,8 +41,9 @@ We use a plugin architecture so that you can install only the dependencies you a
|
||||
| hive | `pip install 'acryl-datahub[hive]'` | Hive source |
|
||||
| mssql | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
|
||||
| mysql | `pip install 'acryl-datahub[mysql]'` | MySQL source |
|
||||
| postgres | `pip install 'acryl-datahub[postgres]'` | Postgres source |
|
||||
| oracle | `pip install 'acryl-datahub[oracle]'` | Oracle source |
|
||||
| postgres | `pip install 'acryl-datahub[postgres]'` | Postgres source |
|
||||
| sqlalchemy | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
|
||||
| snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
|
||||
| mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
|
||||
| ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
|
||||
@ -369,6 +370,28 @@ source:
|
||||
# options is same as above
|
||||
```
|
||||
|
||||
### Other databases using SQLAlchemy `sqlalchemy`
|
||||
|
||||
The `sqlalchemy` source is useful if we don't have a pre-built source for your chosen
|
||||
database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
|
||||
defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
|
||||
|
||||
Extracts:
|
||||
|
||||
- List of schemas and tables
|
||||
- Column types associated with each table
|
||||
|
||||
```yml
|
||||
source:
|
||||
type: sqlalchemy
|
||||
config:
|
||||
# See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
|
||||
connect_uri: "dialect+driver://username:password@host:port/database"
|
||||
options: {} # same as above
|
||||
schema_pattern: {} # same as above
|
||||
table_pattern: {} # same as above
|
||||
```
|
||||
|
||||
### MongoDB `mongodb`
|
||||
|
||||
Extracts:
|
||||
|
||||
@ -161,6 +161,7 @@ setuptools.setup(
|
||||
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
||||
"datahub.ingestion.source.plugins": [
|
||||
"file = datahub.ingestion.source.mce_file:MetadataFileSource",
|
||||
"sqlalchemy = datahub.ingestion.source.sql_generic:SQLAlchemyGenericSource",
|
||||
"athena = datahub.ingestion.source.athena:AthenaSource",
|
||||
"bigquery = datahub.ingestion.source.bigquery:BigQuerySource",
|
||||
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from .sql_common import SQLAlchemyConfig, SQLAlchemySource
|
||||
|
||||
|
||||
class SQLAlchemyGenericConfig(SQLAlchemyConfig):
|
||||
platform: str
|
||||
connect_uri: str
|
||||
|
||||
def get_sql_alchemy_url(self):
|
||||
return self.connect_uri
|
||||
|
||||
|
||||
class SQLAlchemyGenericSource(SQLAlchemySource):
|
||||
def __init__(self, config: SQLAlchemyGenericConfig, ctx: PipelineContext):
|
||||
super().__init__(config, ctx, config.platform)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, ctx):
|
||||
config = SQLAlchemyGenericConfig.parse_obj(config_dict)
|
||||
return cls(config, ctx)
|
||||
Loading…
x
Reference in New Issue
Block a user