mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	support for multiple mce-s in a file
This commit is contained in:
		
							parent
							
								
									58554725aa
								
							
						
					
					
						commit
						7b12fc9827
					
				
							
								
								
									
										67
									
								
								metadata-ingestion/recipes/mce_list.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								metadata-ingestion/recipes/mce_list.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,67 @@
 | 
			
		||||
[
 | 
			
		||||
{
 | 
			
		||||
    "auditHeader": null,
 | 
			
		||||
    "proposedSnapshot": {
 | 
			
		||||
        "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
 | 
			
		||||
            "urn": "urn:li:corpuser:harshal",
 | 
			
		||||
            "aspects": [
 | 
			
		||||
                {
 | 
			
		||||
                    "com.linkedin.pegasus2avro.identity.CorpUserInfo": {
 | 
			
		||||
                        "active": true,
 | 
			
		||||
                        "displayName": {
 | 
			
		||||
                            "string": "Harshal Sheth"
 | 
			
		||||
                        },
 | 
			
		||||
                        "email": "harshal@sheth.io",
 | 
			
		||||
                        "title": {
 | 
			
		||||
                            "string": "who knows?"
 | 
			
		||||
                        },
 | 
			
		||||
                        "managerUrn": null,
 | 
			
		||||
                        "departmentId": null,
 | 
			
		||||
                        "departmentName": null,
 | 
			
		||||
                        "firstName": null,
 | 
			
		||||
                        "lastName": null,
 | 
			
		||||
                        "fullName": {
 | 
			
		||||
                            "string": "Harshal Sheth"
 | 
			
		||||
                        },
 | 
			
		||||
                        "countryCode": null
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            ]
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    "proposedDelta": null
 | 
			
		||||
}
 | 
			
		||||
,
 | 
			
		||||
{
 | 
			
		||||
    "auditHeader": null,
 | 
			
		||||
    "proposedSnapshot": {
 | 
			
		||||
        "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
 | 
			
		||||
            "urn": "urn:li:corpuser:harshal2",
 | 
			
		||||
            "aspects": [
 | 
			
		||||
                {
 | 
			
		||||
                    "com.linkedin.pegasus2avro.identity.CorpUserInfo": {
 | 
			
		||||
                        "active": true,
 | 
			
		||||
                        "displayName": {
 | 
			
		||||
                            "string": "Harshal2 Sheth2"
 | 
			
		||||
                        },
 | 
			
		||||
                        "email": "harshal@sheth.io",
 | 
			
		||||
                        "title": {
 | 
			
		||||
                            "string": "who knows?"
 | 
			
		||||
                        },
 | 
			
		||||
                        "managerUrn": null,
 | 
			
		||||
                        "departmentId": null,
 | 
			
		||||
                        "departmentName": null,
 | 
			
		||||
                        "firstName": null,
 | 
			
		||||
                        "lastName": null,
 | 
			
		||||
                        "fullName": {
 | 
			
		||||
                            "string": "Harshal2 Sheth2"
 | 
			
		||||
                        },
 | 
			
		||||
                        "countryCode": null
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            ]
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    "proposedDelta": null
 | 
			
		||||
}
 | 
			
		||||
]
 | 
			
		||||
@ -56,6 +56,7 @@ class Pipeline:
 | 
			
		||||
            logger.exception(f'Did not find a registered source class for {source_type}')
 | 
			
		||||
            raise ValueError("Failed to configure source")
 | 
			
		||||
        self.source: Source = source_class.create(self.config.source.dict().get(source_type, {}), self.ctx)
 | 
			
		||||
        logger.info(f"Source type:{source_type},{source_class} configured")
 | 
			
		||||
        sink_type = self.config.sink.type
 | 
			
		||||
        try:
 | 
			
		||||
            self.sink_class = sink_class_mapping[sink_type]
 | 
			
		||||
@ -72,6 +73,7 @@ class Pipeline:
 | 
			
		||||
        extractor = self.extractor_class()
 | 
			
		||||
        SinkClass: Type[Sink] = self.sink_class
 | 
			
		||||
        sink = SinkClass.create(self.sink_config, self.ctx)
 | 
			
		||||
        logger.info(f"Sink type:{self.config.sink.type},{self.sink_class} configured")
 | 
			
		||||
        for wu in self.source.get_workunits():
 | 
			
		||||
            # TODO: change extractor interface
 | 
			
		||||
            extractor.configure({}, self.ctx)
 | 
			
		||||
@ -82,19 +84,3 @@ class Pipeline:
 | 
			
		||||
            extractor.close()
 | 
			
		||||
            sink.handle_work_unit_end(wu)
 | 
			
		||||
        sink.close()
 | 
			
		||||
 | 
			
		||||
        # # TODO: remove this
 | 
			
		||||
        # source = Source(...)
 | 
			
		||||
        # work_stream = source.get_workunits()
 | 
			
		||||
 | 
			
		||||
        # extractor = Extractor(...)
 | 
			
		||||
        # extracted_stream: Iterable[Tuple[WorkUnit, Iterable[RecordEnvelope]]] = extractor.get_records(work) for work in work_stream
 | 
			
		||||
 | 
			
		||||
        # sink = Sink(...)
 | 
			
		||||
        # for workunit, record_stream in extracted_stream:
 | 
			
		||||
        #     associated_sink = sink.with_work_unit(workunit)
 | 
			
		||||
        #         for record_envelope in record_stream:
 | 
			
		||||
        #             associated_sink.write_record_async(record_envelope)
 | 
			
		||||
        #     associated_sink.close()
 | 
			
		||||
        # sink.close()
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
@ -25,10 +25,10 @@ class MetadataFileSource(Source):
 | 
			
		||||
        if not isinstance(mce_obj_list, list):
 | 
			
		||||
            mce_obj_list = [mce_obj_list]
 | 
			
		||||
        
 | 
			
		||||
        for obj in mce_obj_list:
 | 
			
		||||
        for i, obj in enumerate(mce_obj_list):
 | 
			
		||||
            mce = json_converter.from_json_object(obj, MetadataChangeEvent.RECORD_SCHEMA)
 | 
			
		||||
            # TODO: autogenerate workunit IDs
 | 
			
		||||
            wu = MetadataWorkUnit('fake mce', mce)
 | 
			
		||||
            wu = MetadataWorkUnit(f"file://{self.config.filename}:{i}", mce)
 | 
			
		||||
            yield wu
 | 
			
		||||
        
 | 
			
		||||
    def close(self):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user