mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-26 01:23:16 +00:00
Classified the ETL jobs in metadata-ingestion.
This commit is contained in:
parent
28b876f323
commit
81a38f87f7
@ -34,7 +34,7 @@ optional arguments:
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Bootstrapping Data Hub
|
## Bootstrapping Data Hub
|
||||||
If you want to quickly ingest lots of sample data and test Data Hub in action, you can run below command:
|
Leverage the mce-cli to quickly ingest lots of sample data and test Data Hub in action, you can run below command:
|
||||||
```
|
```
|
||||||
➜ python mce_cli.py produce -d bootstrap_mce.dat
|
➜ python mce_cli.py produce -d bootstrap_mce.dat
|
||||||
Producing MetadataChangeEvent records to topic MetadataChangeEvent. ^c to exit.
|
Producing MetadataChangeEvent records to topic MetadataChangeEvent. ^c to exit.
|
||||||
@ -45,7 +45,7 @@ Flushing records...
|
|||||||
This will bootstrap Data Hub with sample datasets and sample users.
|
This will bootstrap Data Hub with sample datasets and sample users.
|
||||||
|
|
||||||
## Ingest metadata from LDAP server to Data Hub
|
## Ingest metadata from LDAP server to Data Hub
|
||||||
The ldap_etl.py provides you ETL channel to communicate with your LDAP server.
|
The ldap_etl provides you ETL channel to communicate with your LDAP server.
|
||||||
```
|
```
|
||||||
➜ Config your LDAP server environmental variable in the file
|
➜ Config your LDAP server environmental variable in the file
|
||||||
LDAPSERVER # Your server host.
|
LDAPSERVER # Your server host.
|
||||||
|
@ -14,10 +14,10 @@ PAGESIZE = PAGESIZE
|
|||||||
ATTRLIST = ['cn', 'title', 'mail', 'sAMAccountName', 'department','manager']
|
ATTRLIST = ['cn', 'title', 'mail', 'sAMAccountName', 'department','manager']
|
||||||
SEARCHFILTER='SEARCHFILTER'
|
SEARCHFILTER='SEARCHFILTER'
|
||||||
|
|
||||||
AVROLOADPATH = 'AVROLOADPATH'
|
AVROLOADPATH = '../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc'
|
||||||
KAFKATOPIC = 'KAFKATOPIC'
|
KAFKATOPIC = 'MetadataChangeEvent'
|
||||||
BOOTSTRAP = 'BOOTSTRAP'
|
BOOTSTRAP = 'localhost:9092'
|
||||||
SCHEMAREGISTRY = 'SCHEMAREGISTRY'
|
SCHEMAREGISTRY = 'http://localhost:8081'
|
||||||
|
|
||||||
def create_controls(pagesize):
|
def create_controls(pagesize):
|
||||||
"""
|
"""
|
@ -1,7 +1,8 @@
|
|||||||
|
#! /usr/bin/python
|
||||||
import argparse
|
import argparse
|
||||||
from confluent_kafka import avro
|
from confluent_kafka import avro
|
||||||
|
|
||||||
record_schema = avro.load("../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc")
|
record_schema = avro.load("../../metadata-events/mxe-schemas/src/renamed/avro/com/linkedin/mxe/MetadataChangeEvent.avsc")
|
||||||
topic = "MetadataChangeEvent"
|
topic = "MetadataChangeEvent"
|
||||||
|
|
||||||
class MetadataChangeEvent(object):
|
class MetadataChangeEvent(object):
|
Loading…
x
Reference in New Issue
Block a user