mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-13 19:17:17 +00:00
feat(ingest): add example crawler for MS SQL (#1803)
Also fix the incorrect assumption on column comments & add sample docker-compose file
This commit is contained in:
parent
a13c1e10e6
commit
03e3d49445
@ -54,6 +54,6 @@ You can integrate any data platform to DataHub easily. As long as you have a way
|
|||||||
We have provided example [ETL ingestion](architecture/metadata-ingestion.md) scripts for:
|
We have provided example [ETL ingestion](architecture/metadata-ingestion.md) scripts for:
|
||||||
- Hive
|
- Hive
|
||||||
- Kafka
|
- Kafka
|
||||||
- RDBMS (MySQL, Oracle, Postgres etc)
|
- RDBMS (MySQL, Oracle, Postgres, MS SQL etc)
|
||||||
- Data warehouse (Snowflake, BigQuery etc)
|
- Data warehouse (Snowflake, BigQuery etc)
|
||||||
- LDAP
|
- LDAP
|
||||||
|
|||||||
@ -8,10 +8,6 @@ from sqlalchemy import create_engine
|
|||||||
from sqlalchemy import types
|
from sqlalchemy import types
|
||||||
from sqlalchemy.engine import reflection
|
from sqlalchemy.engine import reflection
|
||||||
|
|
||||||
URL = 'mysql+pymysql://datahub:datahub@localhost:3306' # e.g. mysql+pymysql://username:password@hostname:port
|
|
||||||
OPTIONS = {} # e.g. {"encoding": "latin1"}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class KafkaConfig:
|
class KafkaConfig:
|
||||||
avsc_path = '../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc'
|
avsc_path = '../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc'
|
||||||
@ -57,7 +53,7 @@ def build_dataset_mce(platform, dataset_name, columns):
|
|||||||
"fieldPath": column["name"],
|
"fieldPath": column["name"],
|
||||||
"nativeDataType": repr(column["type"]),
|
"nativeDataType": repr(column["type"]),
|
||||||
"type": { "type":get_column_type(column["type"]) },
|
"type": { "type":get_column_type(column["type"]) },
|
||||||
"description": column["comment"]
|
"description": column.get("comment", None)
|
||||||
})
|
})
|
||||||
|
|
||||||
schema_metadata = {
|
schema_metadata = {
|
||||||
|
|||||||
12
metadata-ingestion/sql-etl/mssql.yml
Normal file
12
metadata-ingestion/sql-etl/mssql.yml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
version: '3.1'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: mcr.microsoft.com/mssql/server
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
ACCEPT_EULA: Y
|
||||||
|
SA_PASSWORD: DatahubR0cks
|
||||||
|
ports:
|
||||||
|
- "1433:1433"
|
||||||
8
metadata-ingestion/sql-etl/mssql_etl.py
Normal file
8
metadata-ingestion/sql-etl/mssql_etl.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from common import run
|
||||||
|
|
||||||
|
# See https://github.com/m32/sqlalchemy-tds for more details
|
||||||
|
URL = '' # e.g. mssql+pytds://username:password@hostname:port
|
||||||
|
OPTIONS = {}
|
||||||
|
PLATFORM = 'mssql'
|
||||||
|
|
||||||
|
run(URL, OPTIONS, PLATFORM)
|
||||||
1
metadata-ingestion/sql-etl/mssql_etl.txt
Normal file
1
metadata-ingestion/sql-etl/mssql_etl.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
sqlalchemy-pytds==0.3
|
||||||
Loading…
x
Reference in New Issue
Block a user