From 33fe3b5ee596a9b744c64ac0a635d3b868bccedb Mon Sep 17 00:00:00 2001 From: Tom Bushell <75358882+TomBushell@users.noreply.github.com> Date: Mon, 30 Oct 2023 18:23:47 +1100 Subject: [PATCH] Fixes #7858: Moved ingestion related make commands into Makefile in ingestion directory (#13677) * Moved more recipes into ingestion/Makefile * Removed some recipes into ingestion/Makefile and added import statement * Modified file paths so that 'make generate' works from the ingestion directory * Modified checks for current directory * Fixed function names to be in snake case * Reverted function names back to camel case * Reverted changes to js_antlr and py_antlr and moved generate command back into root directory Makefile * Updated run_ometa_integration_testsrecipe in ingestion/Makefile --------- Co-authored-by: Pere Miquel Brull --- Makefile | 110 ++++---------------------------- ingestion/Makefile | 101 +++++++++++++++++++++++++++++ scripts/datamodel_generation.py | 26 +++++--- 3 files changed, 128 insertions(+), 109 deletions(-) create mode 100644 ingestion/Makefile diff --git a/Makefile b/Makefile index b7c1a74fe6d..78d7de3bfdf 100644 --- a/Makefile +++ b/Makefile @@ -1,118 +1,20 @@ .DEFAULT_GOAL := help PY_SOURCE ?= ingestion/src +include ingestion/Makefile .PHONY: help help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}' -.PHONY: install -install: ## Install the ingestion module to the current environment - python -m pip install ingestion/ - -.PHONY: install_apis -install_apis: ## Install the REST APIs module to the current environment - python -m pip install openmetadata-airflow-apis/ - -.PHONY: install_test -install_test: ## Install the ingestion module with test dependencies - python -m pip install "ingestion[test]/" - .PHONY: install_e2e_tests install_e2e_tests: ## Install the ingestion module with e2e test dependencies (playwright) python -m pip install "ingestion[e2e_test]/" playwright install --with-deps -.PHONY: install_dev -install_dev: ## Install the ingestion module with dev dependencies - python -m pip install "ingestion[dev]/" - -.PHONY: install_all -install_all: ## Install the ingestion module with all dependencies - python -m pip install "ingestion[all]/" - -.PHONY: precommit_install -precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml - @echo "Installing pre-commit hooks" - @echo "Make sure to first run install_test first" - pre-commit install - -.PHONY: lint -lint: ## Run pylint on the Python sources to analyze the codebase - PYTHONPATH="${PYTHONPATH}:./ingestion/plugins" find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint - -.PHONY: py_format -py_format: ## Run black and isort to format the Python codebase - pycln ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated --all - isort ingestion/ openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip ingestion/env --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3 - black ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated - -.PHONY: py_format_check -py_format_check: ## Check if Python sources are correctly formatted - pycln ingestion/ openmetadata-airflow-apis/ --diff --extend-exclude $(PY_SOURCE)/metadata/generated --all - isort --check-only ingestion/ openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3 - black --check --diff ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated - PYTHONPATH="${PYTHONPATH}:./ingestion/plugins" pylint --fail-under=10 $(PY_SOURCE)/metadata --ignore-paths $(PY_SOURCE)/metadata/generated || (echo "PyLint error code $$?"; exit 1) - -## Ingestion models generation -.PHONY: generate -generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module - @echo "Running Datamodel Code Generator" - @echo "Make sure to first run the install_dev recipe" - rm -rf ingestion/src/metadata/generated - mkdir -p ingestion/src/metadata/generated - python scripts/datamodel_generation.py - $(MAKE) py_antlr js_antlr - $(MAKE) install - -## Ingestion tests & QA -.PHONY: run_ometa_integration_tests -run_ometa_integration_tests: ## Run Python integration tests - coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml=ingestion/junit/test-results-integration.xml ingestion/tests/integration/ometa ingestion/tests/integration/orm_profiler ingestion/tests/integration/test_suite ingestion/tests/integration/data_insight ingestion/tests/integration/lineage - -.PHONY: unit_ingestion -unit_ingestion: ## Run Python unit tests - coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml=ingestion/junit/test-results-unit.xml --ignore=ingestion/tests/unit/source ingestion/tests/unit - .PHONY: run_e2e_tests run_e2e_tests: ## Run e2e tests pytest --screenshot=only-on-failure --output="ingestion/tests/e2e/artifacts" $(ARGS) --slowmo 5 --junitxml=ingestion/junit/test-results-e2e.xml ingestion/tests/e2e -.PHONY: run_python_tests -run_python_tests: ## Run all Python tests with coverage - coverage erase - $(MAKE) unit_ingestion - $(MAKE) run_ometa_integration_tests - coverage report --rcfile ingestion/.coveragerc || true - -.PHONY: coverage -coverage: ## Run all Python tests and generate the coverage XML report - $(MAKE) run_python_tests - coverage xml --rcfile ingestion/.coveragerc -o ingestion/coverage.xml || true - sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" ingestion/coverage.xml >> ingestion/ci-coverage.xml - -.PHONY: sonar_ingestion -sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud - docker run \ - --rm \ - -e SONAR_HOST_URL="https://sonarcloud.io" \ - -e SONAR_SCANNER_OPTS="-Xmx1g" \ - -e SONAR_LOGIN=$(token) \ - -v ${PWD}/ingestion:/usr/src \ - sonarsource/sonar-scanner-cli \ - -Dproject.settings=sonar-project.properties - -.PHONY: run_apis_tests -run_apis_tests: ## Run the openmetadata airflow apis tests - coverage erase - coverage run --rcfile openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml=openmetadata-airflow-apis/junit/test-results.xml openmetadata-airflow-apis/tests - coverage report --rcfile openmetadata-airflow-apis/.coveragerc - -.PHONY: coverage_apis -coverage_apis: ## Run the python tests on openmetadata-airflow-apis - $(MAKE) run_apis_tests - coverage xml --rcfile openmetadata-airflow-apis/.coveragerc -o openmetadata-airflow-apis/coverage.xml - sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")\///g" openmetadata-airflow-apis/coverage.xml >> openmetadata-airflow-apis/ci-coverage.xml - ## Yarn .PHONY: yarn_install_cache yarn_install_cache: ## Use Yarn to install UI dependencies @@ -164,6 +66,16 @@ py_antlr: ## Generate the Python code for parsing FQNs js_antlr: ## Generate the Python code for parsing FQNs antlr4 -Dlanguage=JavaScript -o openmetadata-ui/src/main/resources/ui/src/generated/antlr ${PWD}/openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4 +## Ingestion models generation +.PHONY: generate +generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module + @echo "Running Datamodel Code Generator" + @echo "Make sure to first run the install_dev recipe" + rm -rf ingestion/src/metadata/generated + mkdir -p ingestion/src/metadata/generated + python scripts/datamodel_generation.py + $(MAKE) py_antlr js_antlr + $(MAKE) install .PHONY: install_antlr_cli install_antlr_cli: ## Install antlr CLI locally diff --git a/ingestion/Makefile b/ingestion/Makefile new file mode 100644 index 00000000000..80d88dac1ce --- /dev/null +++ b/ingestion/Makefile @@ -0,0 +1,101 @@ +DIRECTORY_NAME := $(notdir $(CURDIR)) +PY_SOURCE ?= ./src + +ifeq (ingestion,$(DIRECTORY_NAME)) + INGESTION_DIR := . + ROOT_DIR := .. +else + INGESTION_DIR := ingestion + ROOT_DIR := . +endif + +.PHONY: install +install: ## Install the ingestion module to the current environment + python -m pip install $(INGESTION_DIR)/ + +.PHONY: install_dev +install_dev: ## Install the ingestion module with dev dependencies + python -m pip install "$(INGESTION_DIR)[dev]/" + +.PHONY: install_test +install_test: ## Install the ingestion module with test dependencies + python -m pip install "$(INGESTION_DIR)[test]/" + +.PHONY: install_all +install_all: ## Install the ingestion module with all dependencies + python -m pip install "$(INGESTION_DIR)[all]/" + +.PHONY: install_apis +install_apis: ## Install the REST APIs module to the current environment + python -m pip install $(ROOT_DIR)openmetadata-airflow-apis/ + +.PHONY: lint +lint: ## Run pylint on the Python sources to analyze the codebase + PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint + +.PHONY: precommit_install +precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml + @echo "Installing pre-commit hooks" + @echo "Make sure to first run install_test first" + pre-commit install + +.PHONY: py_format +py_format: ## Run black and isort to format the Python codebase + pycln $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated --all + isort $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip $(INGESTION_DIR)/env --skip $(INGESTION_DIR)/build --skip $(ROOT_DIR)/openmetadata-airflow-apis/build --profile black --multi-line 3 + black $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated + +.PHONY: py_format_check +py_format_check: ## Check if Python sources are correctly formatted + pycln $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --diff --extend-exclude $(PY_SOURCE)/metadata/generated --all + isort --check-only $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip $(INGESTION_DIR)/build --skip $(ROOT_DIR)/openmetadata-airflow-apis/build --profile black --multi-line 3 + black --check --diff $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated + PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --fail-under=10 $(PY_SOURCE)/metadata --ignore-paths $(PY_SOURCE)/metadata/generated || (echo "PyLint error code $$?"; exit 1) + +.PHONY: unit_ingestion +unit_ingestion: ## Run Python unit tests + coverage run --rcfile $(INGESTION_DIR)/.coveragerc -a --branch -m pytest -c $(INGESTION_DIR)/setup.cfg --junitxml=$(INGESTION_DIR)/junit/test-results-unit.xml --ignore=$(INGESTION_DIR)/tests/unit/source $(INGESTION_DIR)/tests/unit + +## Ingestion tests & QA +.PHONY: run_ometa_integration_tests +run_ometa_integration_tests: ## Run Python integration tests + coverage run --rcfile $(INGESTION_DIR)/.coveragerc -a --branch -m pytest -c $(INGESTION_DIR)/setup.cfg --junitxml=$(INGESTION_DIR)/junit/test-results-integration.xml $(INGESTION_DIR)/tests/integration/ometa $(INGESTION_DIR)/tests/integration/orm_profiler $(INGESTION_DIR)/tests/integration/test_suite $(INGESTION_DIR)/tests/integration/data_insight $(INGESTION_DIR)/tests/integration/lineage + +.PHONY: run_python_tests +run_python_tests: ## Run all Python tests with coverage + coverage erase + $(MAKE) unit_ingestion + $(MAKE) run_ometa_integration_tests + coverage report --rcfile $(INGESTION_DIR)/.coveragerc || true + +.PHONY: sonar_ingestion +sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud + docker run \ + --rm \ + -e SONAR_HOST_URL="https://sonarcloud.io" \ + -e SONAR_SCANNER_OPTS="-Xmx1g" \ + -e SONAR_LOGIN=$(token) \ + -v ${PWD}/$(INGESTION_DIR):/usr/src \ + sonarsource/sonar-scanner-cli \ + -Dproject.settings=sonar-project.properties + +.PHONY: run_apis_tests +run_apis_tests: ## Run the openmetadata airflow apis tests + coverage erase + coverage run --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml=$(ROOT_DIR)/openmetadata-airflow-apis/junit/test-results.xml $(ROOT_DIR)/openmetadata-airflow-apis/tests + coverage report --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc + + +.PHONY: coverage_apis +coverage_apis: ## Run the python tests on openmetadata-airflow-apis + $(MAKE) run_apis_tests + coverage xml --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc -o $(ROOT_DIR)/openmetadata-airflow-apis/coverage.xml + sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")\///g" $(INGESTION_DIR)/openmetadata-airflow-apis/coverage.xml >> $(INGESTION_DIR)/openmetadata-airflow-apis/ci-coverage.xml + + + +.PHONY: coverage +coverage: ## Run all Python tests and generate the coverage XML report + $(MAKE) run_python_tests + coverage xml --rcfile $(INGESTION_DIR)/.coveragerc -o $(INGESTION_DIR)/coverage.xml || true + sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml diff --git a/scripts/datamodel_generation.py b/scripts/datamodel_generation.py index a58ddc796dc..f97315036ab 100644 --- a/scripts/datamodel_generation.py +++ b/scripts/datamodel_generation.py @@ -17,14 +17,9 @@ from a configured secrets' manager. import datamodel_code_generator.model.pydantic from datamodel_code_generator.imports import Import +import os + -UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [ - "ingestion/src/metadata/generated/schema/entity/classification/tag.py", - "ingestion/src/metadata/generated/schema/entity/events/webhook.py", - "ingestion/src/metadata/generated/schema/entity/teams/user.py", - "ingestion/src/metadata/generated/schema/entity/type.py", - "ingestion/src/metadata/generated/schema/type/basic.py", -] datamodel_code_generator.model.pydantic.types.IMPORT_SECRET_STR = Import.from_full_path( "metadata.ingestion.models.custom_pydantic.CustomSecretStr" @@ -32,9 +27,20 @@ datamodel_code_generator.model.pydantic.types.IMPORT_SECRET_STR = Import.from_fu from datamodel_code_generator.__main__ import main -args = "--input openmetadata-spec/src/main/resources/json/schema --input-file-type jsonschema --output ingestion/src/metadata/generated/schema --set-default-enum-member".split( - " " -) +current_directory = os.getcwd() +ingestion_path = "./" if current_directory.endswith("/ingestion") else "ingestion/" +directory_root = "../" if current_directory.endswith("/ingestion") else "./" + +UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [ + f"{ingestion_path}src/metadata/generated/schema/entity/classification/tag.py", + f"{ingestion_path}src/metadata/generated/schema/entity/events/webhook.py", + f"{ingestion_path}src/metadata/generated/schema/entity/teams/user.py", + f"{ingestion_path}src/metadata/generated/schema/entity/type.py", + f"{ingestion_path}src/metadata/generated/schema/type/basic.py", +] + +args = f"--input {directory_root}openmetadata-spec/src/main/resources/json/schema --input-file-type jsonschema --output {ingestion_path}src/metadata/generated/schema --set-default-enum-member".split(" ") + main(args) for file_path in UNICODE_REGEX_REPLACEMENT_FILE_PATHS: