{ "urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077)": { "value": { "com.linkedin.metadata.snapshot.DataFlowSnapshot": { "urn": "urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077)", "aspects": [ { "com.linkedin.metadata.key.DataFlowKey": { "orchestrator": "spark", "cluster": "spark_spark-master_7077", "flowId": "PythonHdfsIn2HdfsOut2" } }, { "com.linkedin.datajob.DataFlowInfo": { "name": "PythonHdfsIn2HdfsOut2", "customProperties": { "sparkUser": "root", "appName": "PythonHdfsIn2HdfsOut2" } } }, { "com.linkedin.common.DataPlatformInstance": { "platform": "urn:li:dataPlatform:spark" } }, { "com.linkedin.common.BrowsePaths": { "paths": [ "/spark/spark_spark-master_7077" ] } } ] } } }, "urn:li:dataJob:(urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077),QueryExecId_4)": { "value": { "com.linkedin.metadata.snapshot.DataJobSnapshot": { "urn": "urn:li:dataJob:(urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077),QueryExecId_4)", "aspects": [ { "com.linkedin.metadata.key.DataJobKey": { "jobId": "QueryExecId_4", "flow": "urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077)" } }, { "com.linkedin.datajob.DataJobInputOutput": { "inputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)", "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)" ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out1.csv,PROD)" ] } }, { "com.linkedin.common.DataPlatformInstance": { "platform": "urn:li:dataPlatform:spark" } }, { "com.linkedin.datajob.DataJobInfo": { "name": "csv at NativeMethodAccessorImpl.java:0", "type": { "string": "sparkJob" }, "customProperties": { "SQLQueryId": "4", "appName": "PythonHdfsIn2HdfsOut2", "description": "csv at NativeMethodAccessorImpl.java:0", "queryPlan": "InsertIntoHadoopFsRelationCommand file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out1.csv, false, CSV, Map(path -> ../resources/data/PythonHdfsIn2HdfsOut2/out1.csv), Overwrite, [a, b, c, d]\n+- Project [c1#11 AS a#32, c2#12 AS b#33, c1#27 AS c#34, c2#28 AS d#35]\n +- Join Inner, (id#10 = id#26)\n :- Filter isnotnull(id#10)\n : +- Relation[id#10,c1#11,c2#12] csv\n +- Filter isnotnull(id#26)\n +- Relation[id#26,c1#27,c2#28] csv\n" } } }, { "com.linkedin.common.BrowsePaths": { "paths": [ "/spark/spark_spark-master_7077" ] } } ] } } }, "urn:li:dataJob:(urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077),QueryExecId_5)": { "value": { "com.linkedin.metadata.snapshot.DataJobSnapshot": { "urn": "urn:li:dataJob:(urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077),QueryExecId_5)", "aspects": [ { "com.linkedin.metadata.key.DataJobKey": { "jobId": "QueryExecId_5", "flow": "urn:li:dataFlow:(spark,PythonHdfsIn2HdfsOut2,spark_spark-master_7077)" } }, { "com.linkedin.datajob.DataJobInputOutput": { "inputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)", "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)" ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out2.csv,PROD)" ] } }, { "com.linkedin.common.DataPlatformInstance": { "platform": "urn:li:dataPlatform:spark" } }, { "com.linkedin.common.BrowsePaths": { "paths": [ "/spark/spark_spark-master_7077" ] } }, { "com.linkedin.datajob.DataJobInfo": { "name": "csv at NativeMethodAccessorImpl.java:0", "type": { "string": "sparkJob" }, "customProperties": { "SQLQueryId": "5", "appName": "PythonHdfsIn2HdfsOut2", "description": "csv at NativeMethodAccessorImpl.java:0", "queryPlan": "InsertIntoHadoopFsRelationCommand file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out2.csv, false, CSV, Map(path -> ../resources/data/PythonHdfsIn2HdfsOut2/out2.csv), Overwrite, [a1, b1, c1, d1]\n+- Project [c1#11 AS a1#50, c2#12 AS b1#51, c1#27, c2#28 AS d1#53]\n +- Join Inner, (id#10 = id#26)\n :- Filter isnotnull(id#10)\n : +- Relation[id#10,c1#11,c2#12] csv\n +- Filter isnotnull(id#26)\n +- Relation[id#26,c1#27,c2#28] csv\n" } } } ] } } } }