feat(ingest): add example crawler for MS SQL (#1803)

Also fix the incorrect assumption on column comments & add sample docker-compose file
This commit is contained in:
Mars Lan 2020-08-12 08:51:39 -07:00 committed by GitHub
parent a13c1e10e6
commit 03e3d49445
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 23 additions and 6 deletions

View File

@ -54,6 +54,6 @@ You can integrate any data platform to DataHub easily. As long as you have a way
We have provided example [ETL ingestion](architecture/metadata-ingestion.md) scripts for:
- Hive
- Kafka
- RDBMS (MySQL, Oracle, Postgres etc)
- RDBMS (MySQL, Oracle, Postgres, MS SQL etc)
- Data warehouse (Snowflake, BigQuery etc)
- LDAP

View File

@ -8,10 +8,6 @@ from sqlalchemy import create_engine
from sqlalchemy import types
from sqlalchemy.engine import reflection
URL = 'mysql+pymysql://datahub:datahub@localhost:3306' # e.g. mysql+pymysql://username:password@hostname:port
OPTIONS = {} # e.g. {"encoding": "latin1"}
@dataclass
class KafkaConfig:
avsc_path = '../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc'
@ -57,7 +53,7 @@ def build_dataset_mce(platform, dataset_name, columns):
"fieldPath": column["name"],
"nativeDataType": repr(column["type"]),
"type": { "type":get_column_type(column["type"]) },
"description": column["comment"]
"description": column.get("comment", None)
})
schema_metadata = {

View File

@ -0,0 +1,12 @@
version: '3.1'
services:
postgres:
image: mcr.microsoft.com/mssql/server
restart: always
environment:
ACCEPT_EULA: Y
SA_PASSWORD: DatahubR0cks
ports:
- "1433:1433"

View File

@ -0,0 +1,8 @@
from common import run
# See https://github.com/m32/sqlalchemy-tds for more details
URL = '' # e.g. mssql+pytds://username:password@hostname:port
OPTIONS = {}
PLATFORM = 'mssql'
run(URL, OPTIONS, PLATFORM)

View File

@ -0,0 +1 @@
sqlalchemy-pytds==0.3