mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-24 01:50:06 +00:00
Streamline serde
This commit is contained in:
parent
761b27893b
commit
ae88fbf727
@ -10,7 +10,6 @@ from confluent_kafka import SerializingProducer
|
||||
from confluent_kafka.serialization import StringSerializer
|
||||
from confluent_kafka.schema_registry import SchemaRegistryClient
|
||||
from confluent_kafka.schema_registry.avro import AvroSerializer
|
||||
from gometa.metadata import json_converter
|
||||
from gometa.metadata.schema_classes import SCHEMA_JSON_STR
|
||||
from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
|
||||
@ -44,32 +43,25 @@ class DatahubKafkaSink(Sink):
|
||||
self.config = config
|
||||
self.report = SinkReport()
|
||||
|
||||
mce_schema = MetadataChangeEvent.RECORD_SCHEMA
|
||||
|
||||
producer_config = {
|
||||
"bootstrap.servers": self.config.connection.bootstrap,
|
||||
"schema.registry.url": self.config.connection.schema_registry_url,
|
||||
**self.config.connection.producer_config,
|
||||
}
|
||||
|
||||
schema_registry_conf = {
|
||||
'url': self.config.connection.schema_registry_url,
|
||||
**self.config.connection.schema_registry_config,
|
||||
}
|
||||
schema_registry_client = SchemaRegistryClient(schema_registry_conf)
|
||||
|
||||
def convert_mce_to_dict(mce, ctx):
|
||||
tuple_encoding = json_converter.with_tuple_union().to_json_object(mce)
|
||||
def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
|
||||
tuple_encoding = mce.to_obj(tuples=True)
|
||||
return tuple_encoding
|
||||
avro_serializer = AvroSerializer(SCHEMA_JSON_STR, schema_registry_client, to_dict=convert_mce_to_dict)
|
||||
|
||||
producer_conf = {
|
||||
producer_config = {
|
||||
"bootstrap.servers": self.config.connection.bootstrap,
|
||||
'key.serializer': StringSerializer('utf_8'),
|
||||
'value.serializer': avro_serializer,
|
||||
**self.config.connection.producer_config,
|
||||
}
|
||||
|
||||
self.producer = SerializingProducer(producer_conf)
|
||||
self.producer = SerializingProducer(producer_config)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, ctx: PipelineContext):
|
||||
|
@ -9,7 +9,6 @@ from requests.exceptions import HTTPError
|
||||
from gometa.ingestion.api.sink import Sink, WriteCallback, SinkReport
|
||||
from gometa.ingestion.api.common import RecordEnvelope, WorkUnit
|
||||
import json
|
||||
from gometa.metadata import json_converter
|
||||
from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
from gometa.metadata import (
|
||||
ChartSnapshotClass,
|
||||
@ -96,7 +95,7 @@ class DatahubRestSink(Sink):
|
||||
mce = record_envelope.record
|
||||
url = self.get_ingest_endpoint(mce)
|
||||
|
||||
raw_mce_obj = json_converter.to_json_object(mce.proposedSnapshot)
|
||||
raw_mce_obj = mce.proposedSnapshot.to_obj()
|
||||
|
||||
mce_obj = _rest_li_ify(raw_mce_obj)
|
||||
snapshot = {'snapshot': mce_obj}
|
||||
|
@ -5,7 +5,6 @@ import os
|
||||
import pathlib
|
||||
import logging
|
||||
import json
|
||||
from gometa.metadata import json_converter
|
||||
from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -44,7 +43,7 @@ class FileSink(Sink):
|
||||
|
||||
def write_record_async(self, record_envelope: RecordEnvelope[MetadataChangeEvent], write_callback: WriteCallback):
|
||||
mce = record_envelope.record
|
||||
obj = json_converter.to_json_object(mce, MetadataChangeEvent.RECORD_SCHEMA)
|
||||
obj = mce.to_obj()
|
||||
|
||||
if self.wrote_something:
|
||||
self.file.write(',\n')
|
||||
|
@ -4,7 +4,6 @@ from pydantic import BaseModel
|
||||
from typing import Optional, Iterable
|
||||
from gometa.ingestion.api.source import Source, SourceReport
|
||||
from gometa.ingestion.source.metadata_common import MetadataWorkUnit
|
||||
from gometa.metadata import json_converter
|
||||
from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
|
||||
class MetadataFileSourceConfig(BaseModel):
|
||||
@ -27,7 +26,7 @@ class MetadataFileSource(Source):
|
||||
mce_obj_list = [mce_obj_list]
|
||||
|
||||
for i, obj in enumerate(mce_obj_list):
|
||||
mce = json_converter.from_json_object(obj, MetadataChangeEvent.RECORD_SCHEMA)
|
||||
mce: MetadataChangeEvent = MetadataChangeEvent.from_obj(obj)
|
||||
wu = MetadataWorkUnit(f"file://{self.config.filename}:{i}", mce)
|
||||
self.report.report_workunit(wu)
|
||||
yield wu
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user