feat(build): adding support for python codegen for all aspects, not just the snapshot ones (#3299)

This commit is contained in:
Swaroop Jagadish 2021-09-26 17:22:58 -07:00 committed by GitHub
parent 2142157590
commit 9dd7303bad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 384 additions and 2 deletions

View File

@ -6,7 +6,24 @@ OUTDIR=./src/datahub/metadata
# Note: this assumes that datahub has already been built with `./gradlew build`.
DATAHUB_ROOT=..
SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/renamed/avro/com/linkedin"
FILES="$SCHEMAS_ROOT/mxe/MetadataChangeEvent.avsc $SCHEMAS_ROOT/mxe/MetadataChangeProposal.avsc $SCHEMAS_ROOT/usage/UsageAggregation.avsc $SCHEMAS_ROOT/dataset/DatasetProfile.avsc $SCHEMAS_ROOT/dataset/DatasetUsageStatistics.avsc"
FILES="$SCHEMAS_ROOT/mxe/MetadataChangeEvent.avsc $SCHEMAS_ROOT/mxe/MetadataChangeProposal.avsc $SCHEMAS_ROOT/usage/UsageAggregation.avsc"
# Since we depend on jq, check if jq is installed
if ! which jq; then
echo "jq is not installed. Please install jq and rerun (https://stedolan.github.io/jq/)"
exit 1
fi
find $SCHEMAS_ROOT -name "*.avsc" | sort | while read file
do
# Add all other files that are aspects but not included in the above
if (jq '.Aspect' -e $file > /dev/null)
then
FILES="${FILES} ${file}"
fi
echo $FILES > /tmp/codegen_files.txt
done
FILES=$(cat /tmp/codegen_files.txt)
rm -r $OUTDIR || true
python scripts/avro_codegen.py $FILES $OUTDIR

View File

@ -6044,6 +6044,27 @@
],
"doc": "Usage data for a given resource, rolled up into a bucket."
},
"com.linkedin.pegasus2avro.chart.ChartInfo",
"com.linkedin.pegasus2avro.chart.ChartQuery",
"com.linkedin.pegasus2avro.chart.EditableChartProperties",
"com.linkedin.pegasus2avro.common.BrowsePaths",
"com.linkedin.pegasus2avro.common.Cost",
"com.linkedin.pegasus2avro.common.Deprecation",
"com.linkedin.pegasus2avro.common.GlobalTags",
"com.linkedin.pegasus2avro.common.GlossaryTerms",
"com.linkedin.pegasus2avro.common.InstitutionalMemory",
"com.linkedin.pegasus2avro.common.Ownership",
"com.linkedin.pegasus2avro.common.Status",
"com.linkedin.pegasus2avro.dashboard.DashboardInfo",
"com.linkedin.pegasus2avro.dashboard.EditableDashboardProperties",
"com.linkedin.pegasus2avro.datajob.DataFlowInfo",
"com.linkedin.pegasus2avro.datajob.DataJobInfo",
"com.linkedin.pegasus2avro.datajob.DataJobInputOutput",
"com.linkedin.pegasus2avro.datajob.EditableDataFlowProperties",
"com.linkedin.pegasus2avro.datajob.EditableDataJobProperties",
"com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo",
"com.linkedin.pegasus2avro.dataprocess.DataProcessInfo",
"com.linkedin.pegasus2avro.dataset.DatasetDeprecation",
{
"type": "record",
"Aspect": {
@ -6347,6 +6368,8 @@
],
"doc": "Stats corresponding to datasets"
},
"com.linkedin.pegasus2avro.dataset.DatasetProperties",
"com.linkedin.pegasus2avro.dataset.DatasetUpstreamLineage",
{
"type": "record",
"Aspect": {
@ -6493,5 +6516,55 @@
}
],
"doc": "Stats corresponding to dataset's usage."
}
},
"com.linkedin.pegasus2avro.dataset.EditableDatasetProperties",
"com.linkedin.pegasus2avro.dataset.UpstreamLineage",
"com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo",
"com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms",
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo",
"com.linkedin.pegasus2avro.identity.CorpGroupInfo",
"com.linkedin.pegasus2avro.identity.CorpUserEditableInfo",
"com.linkedin.pegasus2avro.identity.CorpUserInfo",
"com.linkedin.pegasus2avro.identity.GroupMembership",
"com.linkedin.pegasus2avro.metadata.key.ChartKey",
"com.linkedin.pegasus2avro.metadata.key.CorpGroupKey",
"com.linkedin.pegasus2avro.metadata.key.CorpUserKey",
"com.linkedin.pegasus2avro.metadata.key.DashboardKey",
"com.linkedin.pegasus2avro.metadata.key.DataFlowKey",
"com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey",
"com.linkedin.pegasus2avro.metadata.key.DataJobKey",
"com.linkedin.pegasus2avro.metadata.key.DataPlatformKey",
"com.linkedin.pegasus2avro.metadata.key.DataProcessKey",
"com.linkedin.pegasus2avro.metadata.key.DatasetKey",
"com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey",
"com.linkedin.pegasus2avro.metadata.key.GlossaryTermKey",
"com.linkedin.pegasus2avro.metadata.key.MLFeatureKey",
"com.linkedin.pegasus2avro.metadata.key.MLFeatureTableKey",
"com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey",
"com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey",
"com.linkedin.pegasus2avro.metadata.key.MLModelKey",
"com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey",
"com.linkedin.pegasus2avro.metadata.key.SchemaFieldKey",
"com.linkedin.pegasus2avro.metadata.key.TagKey",
"com.linkedin.pegasus2avro.ml.metadata.CaveatsAndRecommendations",
"com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations",
"com.linkedin.pegasus2avro.ml.metadata.EvaluationData",
"com.linkedin.pegasus2avro.ml.metadata.IntendedUse",
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties",
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties",
"com.linkedin.pegasus2avro.ml.metadata.MLHyperParam",
"com.linkedin.pegasus2avro.ml.metadata.MLMetric",
"com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties",
"com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts",
"com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties",
"com.linkedin.pegasus2avro.ml.metadata.MLModelProperties",
"com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties",
"com.linkedin.pegasus2avro.ml.metadata.Metrics",
"com.linkedin.pegasus2avro.ml.metadata.QuantitativeAnalyses",
"com.linkedin.pegasus2avro.ml.metadata.SourceCode",
"com.linkedin.pegasus2avro.ml.metadata.TrainingData",
"com.linkedin.pegasus2avro.policy.DataHubPolicyInfo",
"com.linkedin.pegasus2avro.schema.EditableSchemaMetadata",
"com.linkedin.pegasus2avro.schema.SchemaMetadata",
"com.linkedin.pegasus2avro.tag.TagProperties"
]

View File

@ -23,11 +23,303 @@ def getMetadataChangeProposalSchema() -> str:
def getUsageAggregationSchema() -> str:
return _load_schema("UsageAggregation")
@functools.lru_cache(maxsize=None)
def getChartInfoSchema() -> str:
return _load_schema("ChartInfo")
@functools.lru_cache(maxsize=None)
def getChartQuerySchema() -> str:
return _load_schema("ChartQuery")
@functools.lru_cache(maxsize=None)
def getEditableChartPropertiesSchema() -> str:
return _load_schema("EditableChartProperties")
@functools.lru_cache(maxsize=None)
def getBrowsePathsSchema() -> str:
return _load_schema("BrowsePaths")
@functools.lru_cache(maxsize=None)
def getCostSchema() -> str:
return _load_schema("Cost")
@functools.lru_cache(maxsize=None)
def getDeprecationSchema() -> str:
return _load_schema("Deprecation")
@functools.lru_cache(maxsize=None)
def getGlobalTagsSchema() -> str:
return _load_schema("GlobalTags")
@functools.lru_cache(maxsize=None)
def getGlossaryTermsSchema() -> str:
return _load_schema("GlossaryTerms")
@functools.lru_cache(maxsize=None)
def getInstitutionalMemorySchema() -> str:
return _load_schema("InstitutionalMemory")
@functools.lru_cache(maxsize=None)
def getOwnershipSchema() -> str:
return _load_schema("Ownership")
@functools.lru_cache(maxsize=None)
def getStatusSchema() -> str:
return _load_schema("Status")
@functools.lru_cache(maxsize=None)
def getDashboardInfoSchema() -> str:
return _load_schema("DashboardInfo")
@functools.lru_cache(maxsize=None)
def getEditableDashboardPropertiesSchema() -> str:
return _load_schema("EditableDashboardProperties")
@functools.lru_cache(maxsize=None)
def getDataFlowInfoSchema() -> str:
return _load_schema("DataFlowInfo")
@functools.lru_cache(maxsize=None)
def getDataJobInfoSchema() -> str:
return _load_schema("DataJobInfo")
@functools.lru_cache(maxsize=None)
def getDataJobInputOutputSchema() -> str:
return _load_schema("DataJobInputOutput")
@functools.lru_cache(maxsize=None)
def getEditableDataFlowPropertiesSchema() -> str:
return _load_schema("EditableDataFlowProperties")
@functools.lru_cache(maxsize=None)
def getEditableDataJobPropertiesSchema() -> str:
return _load_schema("EditableDataJobProperties")
@functools.lru_cache(maxsize=None)
def getDataPlatformInfoSchema() -> str:
return _load_schema("DataPlatformInfo")
@functools.lru_cache(maxsize=None)
def getDataProcessInfoSchema() -> str:
return _load_schema("DataProcessInfo")
@functools.lru_cache(maxsize=None)
def getDatasetDeprecationSchema() -> str:
return _load_schema("DatasetDeprecation")
@functools.lru_cache(maxsize=None)
def getDatasetProfileSchema() -> str:
return _load_schema("DatasetProfile")
@functools.lru_cache(maxsize=None)
def getDatasetPropertiesSchema() -> str:
return _load_schema("DatasetProperties")
@functools.lru_cache(maxsize=None)
def getDatasetUpstreamLineageSchema() -> str:
return _load_schema("DatasetUpstreamLineage")
@functools.lru_cache(maxsize=None)
def getDatasetUsageStatisticsSchema() -> str:
return _load_schema("DatasetUsageStatistics")
@functools.lru_cache(maxsize=None)
def getEditableDatasetPropertiesSchema() -> str:
return _load_schema("EditableDatasetProperties")
@functools.lru_cache(maxsize=None)
def getUpstreamLineageSchema() -> str:
return _load_schema("UpstreamLineage")
@functools.lru_cache(maxsize=None)
def getGlossaryNodeInfoSchema() -> str:
return _load_schema("GlossaryNodeInfo")
@functools.lru_cache(maxsize=None)
def getGlossaryRelatedTermsSchema() -> str:
return _load_schema("GlossaryRelatedTerms")
@functools.lru_cache(maxsize=None)
def getGlossaryTermInfoSchema() -> str:
return _load_schema("GlossaryTermInfo")
@functools.lru_cache(maxsize=None)
def getCorpGroupInfoSchema() -> str:
return _load_schema("CorpGroupInfo")
@functools.lru_cache(maxsize=None)
def getCorpUserEditableInfoSchema() -> str:
return _load_schema("CorpUserEditableInfo")
@functools.lru_cache(maxsize=None)
def getCorpUserInfoSchema() -> str:
return _load_schema("CorpUserInfo")
@functools.lru_cache(maxsize=None)
def getGroupMembershipSchema() -> str:
return _load_schema("GroupMembership")
@functools.lru_cache(maxsize=None)
def getChartKeySchema() -> str:
return _load_schema("ChartKey")
@functools.lru_cache(maxsize=None)
def getCorpGroupKeySchema() -> str:
return _load_schema("CorpGroupKey")
@functools.lru_cache(maxsize=None)
def getCorpUserKeySchema() -> str:
return _load_schema("CorpUserKey")
@functools.lru_cache(maxsize=None)
def getDashboardKeySchema() -> str:
return _load_schema("DashboardKey")
@functools.lru_cache(maxsize=None)
def getDataFlowKeySchema() -> str:
return _load_schema("DataFlowKey")
@functools.lru_cache(maxsize=None)
def getDataHubPolicyKeySchema() -> str:
return _load_schema("DataHubPolicyKey")
@functools.lru_cache(maxsize=None)
def getDataJobKeySchema() -> str:
return _load_schema("DataJobKey")
@functools.lru_cache(maxsize=None)
def getDataPlatformKeySchema() -> str:
return _load_schema("DataPlatformKey")
@functools.lru_cache(maxsize=None)
def getDataProcessKeySchema() -> str:
return _load_schema("DataProcessKey")
@functools.lru_cache(maxsize=None)
def getDatasetKeySchema() -> str:
return _load_schema("DatasetKey")
@functools.lru_cache(maxsize=None)
def getGlossaryNodeKeySchema() -> str:
return _load_schema("GlossaryNodeKey")
@functools.lru_cache(maxsize=None)
def getGlossaryTermKeySchema() -> str:
return _load_schema("GlossaryTermKey")
@functools.lru_cache(maxsize=None)
def getMLFeatureKeySchema() -> str:
return _load_schema("MLFeatureKey")
@functools.lru_cache(maxsize=None)
def getMLFeatureTableKeySchema() -> str:
return _load_schema("MLFeatureTableKey")
@functools.lru_cache(maxsize=None)
def getMLModelDeploymentKeySchema() -> str:
return _load_schema("MLModelDeploymentKey")
@functools.lru_cache(maxsize=None)
def getMLModelGroupKeySchema() -> str:
return _load_schema("MLModelGroupKey")
@functools.lru_cache(maxsize=None)
def getMLModelKeySchema() -> str:
return _load_schema("MLModelKey")
@functools.lru_cache(maxsize=None)
def getMLPrimaryKeyKeySchema() -> str:
return _load_schema("MLPrimaryKeyKey")
@functools.lru_cache(maxsize=None)
def getSchemaFieldKeySchema() -> str:
return _load_schema("SchemaFieldKey")
@functools.lru_cache(maxsize=None)
def getTagKeySchema() -> str:
return _load_schema("TagKey")
@functools.lru_cache(maxsize=None)
def getCaveatsAndRecommendationsSchema() -> str:
return _load_schema("CaveatsAndRecommendations")
@functools.lru_cache(maxsize=None)
def getEthicalConsiderationsSchema() -> str:
return _load_schema("EthicalConsiderations")
@functools.lru_cache(maxsize=None)
def getEvaluationDataSchema() -> str:
return _load_schema("EvaluationData")
@functools.lru_cache(maxsize=None)
def getIntendedUseSchema() -> str:
return _load_schema("IntendedUse")
@functools.lru_cache(maxsize=None)
def getMLFeaturePropertiesSchema() -> str:
return _load_schema("MLFeatureProperties")
@functools.lru_cache(maxsize=None)
def getMLFeatureTablePropertiesSchema() -> str:
return _load_schema("MLFeatureTableProperties")
@functools.lru_cache(maxsize=None)
def getMLHyperParamSchema() -> str:
return _load_schema("MLHyperParam")
@functools.lru_cache(maxsize=None)
def getMLMetricSchema() -> str:
return _load_schema("MLMetric")
@functools.lru_cache(maxsize=None)
def getMLModelDeploymentPropertiesSchema() -> str:
return _load_schema("MLModelDeploymentProperties")
@functools.lru_cache(maxsize=None)
def getMLModelFactorPromptsSchema() -> str:
return _load_schema("MLModelFactorPrompts")
@functools.lru_cache(maxsize=None)
def getMLModelGroupPropertiesSchema() -> str:
return _load_schema("MLModelGroupProperties")
@functools.lru_cache(maxsize=None)
def getMLModelPropertiesSchema() -> str:
return _load_schema("MLModelProperties")
@functools.lru_cache(maxsize=None)
def getMLPrimaryKeyPropertiesSchema() -> str:
return _load_schema("MLPrimaryKeyProperties")
@functools.lru_cache(maxsize=None)
def getMetricsSchema() -> str:
return _load_schema("Metrics")
@functools.lru_cache(maxsize=None)
def getQuantitativeAnalysesSchema() -> str:
return _load_schema("QuantitativeAnalyses")
@functools.lru_cache(maxsize=None)
def getSourceCodeSchema() -> str:
return _load_schema("SourceCode")
@functools.lru_cache(maxsize=None)
def getTrainingDataSchema() -> str:
return _load_schema("TrainingData")
@functools.lru_cache(maxsize=None)
def getDataHubPolicyInfoSchema() -> str:
return _load_schema("DataHubPolicyInfo")
@functools.lru_cache(maxsize=None)
def getEditableSchemaMetadataSchema() -> str:
return _load_schema("EditableSchemaMetadata")
@functools.lru_cache(maxsize=None)
def getSchemaMetadataSchema() -> str:
return _load_schema("SchemaMetadata")
@functools.lru_cache(maxsize=None)
def getTagPropertiesSchema() -> str:
return _load_schema("TagProperties")
# fmt: on