mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-12 02:25:43 +00:00
feat(ingest): add example crawler for MS SQL (#1803)
Also fix the incorrect assumption on column comments & add sample docker-compose file
This commit is contained in:
parent
a13c1e10e6
commit
03e3d49445
@ -54,6 +54,6 @@ You can integrate any data platform to DataHub easily. As long as you have a way
|
||||
We have provided example [ETL ingestion](architecture/metadata-ingestion.md) scripts for:
|
||||
- Hive
|
||||
- Kafka
|
||||
- RDBMS (MySQL, Oracle, Postgres etc)
|
||||
- RDBMS (MySQL, Oracle, Postgres, MS SQL etc)
|
||||
- Data warehouse (Snowflake, BigQuery etc)
|
||||
- LDAP
|
||||
|
||||
@ -8,10 +8,6 @@ from sqlalchemy import create_engine
|
||||
from sqlalchemy import types
|
||||
from sqlalchemy.engine import reflection
|
||||
|
||||
URL = 'mysql+pymysql://datahub:datahub@localhost:3306' # e.g. mysql+pymysql://username:password@hostname:port
|
||||
OPTIONS = {} # e.g. {"encoding": "latin1"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KafkaConfig:
|
||||
avsc_path = '../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc'
|
||||
@ -57,7 +53,7 @@ def build_dataset_mce(platform, dataset_name, columns):
|
||||
"fieldPath": column["name"],
|
||||
"nativeDataType": repr(column["type"]),
|
||||
"type": { "type":get_column_type(column["type"]) },
|
||||
"description": column["comment"]
|
||||
"description": column.get("comment", None)
|
||||
})
|
||||
|
||||
schema_metadata = {
|
||||
|
||||
12
metadata-ingestion/sql-etl/mssql.yml
Normal file
12
metadata-ingestion/sql-etl/mssql.yml
Normal file
@ -0,0 +1,12 @@
|
||||
version: '3.1'
|
||||
|
||||
services:
|
||||
|
||||
postgres:
|
||||
image: mcr.microsoft.com/mssql/server
|
||||
restart: always
|
||||
environment:
|
||||
ACCEPT_EULA: Y
|
||||
SA_PASSWORD: DatahubR0cks
|
||||
ports:
|
||||
- "1433:1433"
|
||||
8
metadata-ingestion/sql-etl/mssql_etl.py
Normal file
8
metadata-ingestion/sql-etl/mssql_etl.py
Normal file
@ -0,0 +1,8 @@
|
||||
from common import run
|
||||
|
||||
# See https://github.com/m32/sqlalchemy-tds for more details
|
||||
URL = '' # e.g. mssql+pytds://username:password@hostname:port
|
||||
OPTIONS = {}
|
||||
PLATFORM = 'mssql'
|
||||
|
||||
run(URL, OPTIONS, PLATFORM)
|
||||
1
metadata-ingestion/sql-etl/mssql_etl.txt
Normal file
1
metadata-ingestion/sql-etl/mssql_etl.txt
Normal file
@ -0,0 +1 @@
|
||||
sqlalchemy-pytds==0.3
|
||||
Loading…
x
Reference in New Issue
Block a user