diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml new file mode 100644 index 0000000000..6224b8c9dc --- /dev/null +++ b/.github/workflows/metadata-model.yml @@ -0,0 +1,43 @@ +name: metadata model generate +on: + push: + branches: + - master + paths-ignore: + - "docs/**" + - "**.md" + release: + types: [published, edited] + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + publish: ${{ steps.publish.outputs.publish }} + steps: + - name: Check whether upload to datahub is enabled + id: publish + env: + ENABLE_PUBLISH: ${{ secrets.DataHubToken }} + run: | + echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" + echo "::set-output name=publish::${{ env.ENABLE_PUBLISH != '' }}" + metadata-ingestion-docgen: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.9.9" + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh + - name: Run model generation + run: ./gradlew :metadata-models:build + - name: Upload metadata to DataHub + if: ${{ needs.setup.outputs.publish == 'true' }} + env: + DATAHUB_SERVER: ${{ secrets.DataHubServer }} + DATAHUB_TOKEN: ${{ secrets.DataHubToken }} + run: ./gradlew :metadata-ingestion:modelDocUpload + + diff --git a/docs/imgs/datahub-metadata-model.png b/docs/imgs/datahub-metadata-model.png index c377fc9347..59449cd0d4 100644 Binary files a/docs/imgs/datahub-metadata-model.png and b/docs/imgs/datahub-metadata-model.png differ diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index c8c55b47eb..3a0c8f2f02 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -231,7 +231,9 @@ def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str: if adjacency.self_loop: relationships_section += f"\n### Self\nThese are the relationships to itself, stored in this entity's aspects" for relnship in adjacency.self_loop: - relationships_section += f"\n- {relnship.name} ({relnship.doc[1:] if relnship.doc else ''})" + relationships_section += ( + f"\n- {relnship.name} ({relnship.doc[1:] if relnship.doc else ''})" + ) if adjacency.outgoing: relationships_section += f"\n### Outgoing\nThese are the relationships stored in this entity's aspects" @@ -274,9 +276,7 @@ def generate_stitched_record(relnships_graph: RelationshipGraph) -> List[Any]: if aspect_name not in aspect_registry: print(f"Did not find aspect name: {aspect_name} in aspect_registry") continue - import pdb - # breakpoint() # all aspects should have a schema aspect_schema = aspect_registry[aspect_name].schema assert aspect_schema @@ -292,7 +292,7 @@ def generate_stitched_record(relnships_graph: RelationshipGraph) -> List[Any]: field_objects = [] for f in entity_fields: field = avro.schema.Field( - type_=f["type"], + type=f["type"], name=f["name"], has_default=False, ) @@ -327,7 +327,8 @@ def generate_stitched_record(relnships_graph: RelationshipGraph) -> List[Any]: for f_field in schema_fields: if f_field.jsonProps: import pdb - #breakpoint() + + # breakpoint() json_dict = json.loads(f_field.jsonProps) if "Aspect" in json_dict: aspect_info = json_dict["Aspect"] @@ -483,7 +484,7 @@ def generate( server: Optional[str], file: Optional[str], dot: Optional[str], - png: Optional[str] + png: Optional[str], ) -> None: logger.info(f"server = {server}") logger.info(f"file = {file}") @@ -497,7 +498,7 @@ def generate( else: # schema file load_schema_file(schema_file) - + relationship_graph = RelationshipGraph() events = generate_stitched_record(relationship_graph) @@ -573,9 +574,12 @@ def generate( try: graph.write_png(png) except Exception as e: - logger.error("Failed to create png file. Do you have graphviz installed?") + logger.error( + "Failed to create png file. Do you have graphviz installed?" + ) raise e + if __name__ == "__main__": logger.setLevel("INFO") generate() diff --git a/metadata-ingestion/scripts/modeldocgen.sh b/metadata-ingestion/scripts/modeldocgen.sh index c55be8bbda..77924c0c1e 100755 --- a/metadata-ingestion/scripts/modeldocgen.sh +++ b/metadata-ingestion/scripts/modeldocgen.sh @@ -6,8 +6,8 @@ OUTDIR=./generated/docs # Note: this assumes that datahub has already been built with `./gradlew build`. DATAHUB_ROOT=.. REGISTRY_ROOT="$DATAHUB_ROOT/metadata-models/src/main/resources" -SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-models/src/mainGeneratedAvroSchema/avro/com/linkedin" -FILES="$REGISTRY_ROOT/entity-registry.yml $SCHEMAS_ROOT/mxe/MetadataChangeEvent.avsc" +SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro" +FILES="$REGISTRY_ROOT/entity-registry.yml $SCHEMAS_ROOT/com/linkedin/mxe/MetadataChangeEvent.avsc" # Since we depend on jq, check if jq is installed if ! which jq > /dev/null; then echo "jq is not installed. Please install jq and rerun (https://stedolan.github.io/jq/)"