mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-24 16:38:19 +00:00
feature(ingest): add bigquery ETL script (#1711)
Also fix minor issues in the common script
This commit is contained in:
parent
fa9fe5e110
commit
221c9af220
8
metadata-ingestion/sql-etl/bigquery_etl.py
Normal file
8
metadata-ingestion/sql-etl/bigquery_etl.py
Normal file
@ -0,0 +1,8 @@
|
||||
from common import run
|
||||
|
||||
# See https://github.com/mxmzdlv/pybigquery/ for more details
|
||||
URL = '' # e.g. bigquery://project_id
|
||||
OPTIONS = {} # e.g. {"credentials_path": "/path/to/keyfile.json"}
|
||||
PLATFORM = 'bigquery'
|
||||
|
||||
run(URL, OPTIONS, PLATFORM)
|
||||
1
metadata-ingestion/sql-etl/bigquery_etl.txt
Normal file
1
metadata-ingestion/sql-etl/bigquery_etl.txt
Normal file
@ -0,0 +1 @@
|
||||
pybigquery==0.4.15
|
||||
@ -55,7 +55,7 @@ def build_dataset_mce(platform, dataset_name, columns):
|
||||
for column in columns:
|
||||
fields.append({
|
||||
"fieldPath": column["name"],
|
||||
"nativeDataType": str(column["type"]),
|
||||
"nativeDataType": repr(column["type"]),
|
||||
"type": { "type":get_column_type(column["type"]) }
|
||||
})
|
||||
|
||||
@ -102,7 +102,7 @@ def run(url, options, platform, kafka_config = KafkaConfig()):
|
||||
engine = create_engine(url, **options)
|
||||
inspector = reflection.Inspector.from_engine(engine)
|
||||
for schema in inspector.get_schema_names():
|
||||
for table in inspector.get_table_names(schema):
|
||||
columns = inspector.get_columns(table, schema)
|
||||
mce = build_dataset_mce(platform, f'{schema}.{table}', columns)
|
||||
produce_dataset_mce(mce, kafka_config)
|
||||
for table in inspector.get_table_names(schema):
|
||||
columns = inspector.get_columns(table, schema)
|
||||
mce = build_dataset_mce(platform, f'{schema}.{table}', columns)
|
||||
produce_dataset_mce(mce, kafka_config)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from common import run
|
||||
|
||||
# See https://github.com/PyMySQL/PyMySQL for more details
|
||||
URL = '' # e.g. mysql+pymysql://username:password@hostname:port
|
||||
OPTIONS = {} # e.g. {"encoding": "latin1"}
|
||||
PLATFORM = 'mysql'
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user