From b39b698eb7eed7629d747fdb97b3b36e6069d660 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Sun, 23 Jan 2022 18:47:06 +0100 Subject: [PATCH] Fix #2345 - Make bump & unify (#2359) * Add installing instructions * Unify makefile and automate bump version setup * Update submake call * Mark targets as phony * Add help to recipes * Update core publish * Do not trigger install from echo * Run make from root * Prepare py_format from isort and black * Run py_format * Check black and isort formatting * Update comment msg --- .github/workflows/java-checkstyle.yml | 5 +- ...etadata-ingestion-core-version-comment.yml | 4 +- .github/workflows/py-checkstyle.yml | 7 +- .../workflows/py-ingestion-core-publish.yml | 12 +- Makefile | 135 +++++++++++++----- ingestion-core/Makefile | 21 --- .../src/metadata/ingestion/source/amundsen.py | 3 +- .../src/metadata/ingestion/source/bigquery.py | 15 +- .../ingestion/source/redshift_usage.py | 1 - .../metadata/ingestion/source/sql_source.py | 7 +- .../src/metadata/utils/column_type_parser.py | 3 +- 11 files changed, 130 insertions(+), 83 deletions(-) delete mode 100644 ingestion-core/Makefile diff --git a/.github/workflows/java-checkstyle.yml b/.github/workflows/java-checkstyle.yml index 61b3f3e6487..d3cb179cfc9 100644 --- a/.github/workflows/java-checkstyle.yml +++ b/.github/workflows/java-checkstyle.yml @@ -54,7 +54,10 @@ jobs: body: | **The Java checkstyle failed.** - Please run `mvn googleformatter:format@reformat-sources` in the root of your repository and commit the changes to this PR. You can also use [pre-commit](https://pre-commit.com/) to automate the Java code formatting. + Please run `mvn googleformatter:format@reformat-sources` in the root of your repository and commit the changes to this PR. + You can also use [pre-commit](https://pre-commit.com/) to automate the Java code formatting. + + You can install the pre-commit hooks with `make install_test precommit_install`. - name: Java checkstyle failed, check the comment in the PR if: steps.git.outcome != 'success' diff --git a/.github/workflows/openmetadata-ingestion-core-version-comment.yml b/.github/workflows/openmetadata-ingestion-core-version-comment.yml index 314b4e21ae1..d17d12b3cf3 100644 --- a/.github/workflows/openmetadata-ingestion-core-version-comment.yml +++ b/.github/workflows/openmetadata-ingestion-core-version-comment.yml @@ -35,6 +35,6 @@ jobs: with: issue-number: ${{ github.event.pull_request.number }} body: | - **Schema Change Detected. Needs py-ingestion-core version bump** + **Schema Change Detected. Needs ingestion-core version bump** - Please run `make bump-version-dev` in the ingestion-core folder of your repository and commit the changes to _version.py in this PR. Please ignore if this has been handled already. + Please run `make core_bump_version_dev` in the project's root and commit the changes to _version.py in this PR. Please ignore if this has been handled already. diff --git a/.github/workflows/py-checkstyle.yml b/.github/workflows/py-checkstyle.yml index 4596558a545..48aebf44692 100644 --- a/.github/workflows/py-checkstyle.yml +++ b/.github/workflows/py-checkstyle.yml @@ -54,7 +54,7 @@ jobs: run: | source env/bin/activate make generate - make black_check + make py_format_check - name: Create a comment in the PR with the instructions if: steps.style.outcome != 'success' @@ -64,7 +64,10 @@ jobs: body: | **The Python checkstyle failed.** - Please run `make black` in the root of your repository and commit the changes to this PR. You can also use [pre-commit](https://pre-commit.com/) to automate the Python code formatting. + Please run `make py_format` in the root of your repository and commit the changes to this PR. + You can also use [pre-commit](https://pre-commit.com/) to automate the Python code formatting. + + You can install the pre-commit hooks with `make install_test precommit_install`. - name: Python checkstyle failed, check the comment in the PR if: steps.style.outcome != 'success' diff --git a/.github/workflows/py-ingestion-core-publish.yml b/.github/workflows/py-ingestion-core-publish.yml index da905e1566c..29b1c8ab1e7 100644 --- a/.github/workflows/py-ingestion-core-publish.yml +++ b/.github/workflows/py-ingestion-core-publish.yml @@ -36,17 +36,9 @@ jobs: - name: Install Ubuntu related dependencies run: | sudo apt-get install -y libsasl2-dev unixodbc-dev python3-venv - - name: Generate models - working-directory: ingestion-core - run: | - python3 -m venv env - source env/bin/activate - make generate - - name: Publish Test PyPi packages - working-directory: ingestion-core + - name: Install, Generate and Publish Test PyPi packages env: TWINE_USERNAME: ${{ secrets.TWINE_USERNAME_TEST }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }} run: | - source env/bin/activate - make publish + make core_publish diff --git a/Makefile b/Makefile index b71c68b5c52..e1dcfb048a3 100644 --- a/Makefile +++ b/Makefile @@ -1,57 +1,77 @@ -.PHONY: env38 +.DEFAULT_GOAL := help PY_SOURCE ?= ingestion/src +.PHONY: help +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: env38 env38: -# virtualenv -p python3.8 env38 - python3.8 -m venv env38 + python3.8 -m venv env38 + +.PHONY: clean_env37 clean_env37: rm -rf env38 -install: - python3 -m pip install ingestion/ +.PHONY: install +install: ## Install the ingestion module to the current environment + python -m pip install ingestion/ -install_test: - python3 -m pip install "ingestion[test]/" +.PHONY: install_test +install_test: ## Install the ingestion module with test dependencies + python -m pip install "ingestion[test]/" -install_dev: - python3 -m pip install "ingestion[dev]/" +.PHONY: install_dev +install_dev: ## Install the ingestion module with dev dependencies + python -m pip install "ingestion[dev]/" -precommit_install: +.PHONY: precommit_install +precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml @echo "Installing pre-commit hooks" - @echo "Make sure to first run `make install_test`" + @echo "Make sure to first run install_test first" pre-commit install -isort: - isort $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3 - -lint: +## Python Checkstyle +.PHONY: lint +lint: ## Run pylint on the Python sources to analyze the codebase find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint -black: +.PHONY: py_format +py_format: ## Run black and isort to format the Python codebase + isort $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3 black $(PY_SOURCE) --exclude $(PY_SOURCE)/metadata/generated -black_check: +.PHONY: py_format_check +py_format_check: ## Check if Python sources are correctly formatted black --check --diff $(PY_SOURCE) --exclude $(PY_SOURCE)/metadata/generated + isort --check-only $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3 -generate: +## Ingestion models generation +.PHONY: generate +generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module @echo "Running Datamodel Code Generator" @echo "Make sure to first run the install_dev recipe" - datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated - make install + datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated + $(MAKE) install -run_ometa_integration_tests: +## Ingestion tests & QA +.PHONY: run_ometa_integration_tests +run_ometa_integration_tests: ## Run Python integration tests coverage run -m pytest -c ingestion/setup.cfg --doctest-modules --junitxml=ingestion/junit/test-results-integration.xml --override-ini=testpaths="ingestion/tests/integration/ometa ingestion/tests/integration/stage" -unit_ingestion: +.PHONY: unit_ingestion +unit_ingestion: ## Run Python unit tests coverage run -m pytest -c ingestion/setup.cfg -s --doctest-modules --junitxml=ingestion/junit/test-results-unit.xml --override-ini=testpaths="ingestion/tests/unit" -coverage: +.PHONY: coverage +coverage: ## Run all Python tests and generate the coverage report coverage erase - make unit_ingestion - make run_ometa_integration_tests + $(MAKE) unit_ingestion + $(MAKE) run_ometa_integration_tests coverage xml -i -o ingestion/coverage.xml -sonar_ingestion: +.PHONY: sonar_ingestion +sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud docker run \ --rm \ -e SONAR_HOST_URL="https://sonarcloud.io" \ @@ -60,27 +80,74 @@ sonar_ingestion: sonarsource/sonar-scanner-cli \ -Dproject.settings=ingestion/sonar-project.properties -publish: - make install_dev generate +## Ingestion publish +.PHONY: publish +publish: ## Publish the ingestion module to PyPI + $(MAKE) install_dev generate cd ingestion; \ python setup.py install sdist bdist_wheel; \ twine check dist/*; \ twine upload dist/* -build_docker_base: - make install_dev generate +## Docker operators +.PHONY: build_docker_base +build_docker_base: ## Build the base Docker image for the Ingestion Framework Sources + $(MAKE) install_dev generate docker build -f ingestion/connectors/Dockerfile-base ingestion/ -t openmetadata/ingestion-connector-base -build_docker_connectors: +.PHONY: build_docker_connectors +build_docker_connectors: ## Build all Ingestion Framework Sources Images to be used as Docker Operators in Airflow @echo "Building Docker connectors. Make sure to run build_docker_base first" python ingestion/connectors/docker-cli.py build -push_docker_connectors: +.PHONY: push_docker_connectors +push_docker_connectors: ## Push all Sources Docker Images to DockerHub @echo "Pushing Docker connectors. Make sure to run build_docker_connectors first" python ingestion/connectors/docker-cli.py push -yarn_install_cache: +## Yarn +.PHONY: yarn_install_cache +yarn_install_cache: ## Use Yarn to install UI dependencies cd openmetadata-ui/src/main/resources/ui && yarn install --frozen-lockfile -yarn_start_dev_ui: +.PHONY: yarn_start_dev_ui +yarn_start_dev_ui: ## Run the UI locally with Yarn cd openmetadata-ui/src/main/resources/ui && yarn start + +## Ingestion Core +.PHONY: core_install_dev +core_install_dev: ## Prepare a virtualenv for the ingestion-core module + cd ingestion-core; \ + rm -rf venv; \ + python -m virtualenv venv; \ + . venv/bin/activate; \ + python -m pip install ".[dev]" + +.PHONY: core_clean +core_clean: ## Clean the ingestion-core generated files + rm -rf ingestion-core/src/metadata/generated + rm -rf ingestion-core/build + rm -rf ingestion-core/dist + +.PHONY: core_generate +core_generate: ## Generate the pydantic models from the JSON Schemas to the ingestion-core module + $(MAKE) core_install_dev + mkdir -p ingestion-core/src/metadata/generated + . ingestion-core/venv/bin/activate + datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion-core/src/metadata/generated + +.PHONY: core_bump_version_dev +core_bump_version_dev: ## Bump a `dev` version to the ingestion-core module. To be used when schemas are updated + $(MAKE) core_install_dev + cd ingestion-core; \ + . venv/bin/activate; \ + python -m incremental.update metadata --dev + +.PHONY: core_publish +core_publish: ## Install, generate and publish the ingestion-core module to Test PyPI + $(MAKE) core_clean core_generate + cd ingestion-core; \ + . venv/bin/activate; \ + python setup.py install sdist bdist_wheel; \ + twine check dist/*; \ + twine upload -r testpypi dist/* diff --git a/ingestion-core/Makefile b/ingestion-core/Makefile deleted file mode 100644 index 20bbbd1e1bf..00000000000 --- a/ingestion-core/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -install_dev: - python -m pip install ".[dev]" - -generate: - mkdir -p src/metadata/generated - python3 -m pip install --upgrade pip setuptools - make install_dev - datamodel-codegen --input ../catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output src/metadata/generated - -clean: - rm -rf src/metadata/generated - rm -rf build - rm -rf dist - -bump-version-dev: - python -m incremental.update metadata --dev - -publish: clean generate - python setup.py install sdist bdist_wheel - twine check dist/* - twine upload -r testpypi dist/* diff --git a/ingestion/src/metadata/ingestion/source/amundsen.py b/ingestion/src/metadata/ingestion/source/amundsen.py index 24dc8635658..2abce047869 100644 --- a/ingestion/src/metadata/ingestion/source/amundsen.py +++ b/ingestion/src/metadata/ingestion/source/amundsen.py @@ -15,6 +15,8 @@ import uuid from dataclasses import dataclass, field from typing import Iterable, List, Optional +from pydantic import SecretStr + from metadata.config.common import ConfigModel from metadata.generated.schema.api.services.createDatabaseService import ( CreateDatabaseServiceEntityRequest, @@ -41,7 +43,6 @@ from metadata.utils.sql_queries import ( NEO4J_AMUNDSEN_TABLE_QUERY, NEO4J_AMUNDSEN_USER_QUERY, ) -from pydantic import SecretStr logger: logging.Logger = logging.getLogger(__name__) diff --git a/ingestion/src/metadata/ingestion/source/bigquery.py b/ingestion/src/metadata/ingestion/source/bigquery.py index ec7dd74883d..b6014187d96 100644 --- a/ingestion/src/metadata/ingestion/source/bigquery.py +++ b/ingestion/src/metadata/ingestion/source/bigquery.py @@ -9,14 +9,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json +import logging import os -from typing import Optional, Tuple, Any -import json, tempfile, logging +import tempfile +from typing import Any, Optional, Tuple -from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig -from metadata.ingestion.source.sql_source import SQLSource -from metadata.ingestion.source.sql_source_common import SQLConnectionConfig -from metadata.utils.column_type_parser import create_sqlalchemy_type from sqlalchemy_bigquery import _types from sqlalchemy_bigquery._struct import STRUCT from sqlalchemy_bigquery._types import ( @@ -24,6 +22,11 @@ from sqlalchemy_bigquery._types import ( _get_transitive_schema_fields, ) +from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig +from metadata.ingestion.source.sql_source import SQLSource +from metadata.ingestion.source.sql_source_common import SQLConnectionConfig +from metadata.utils.column_type_parser import create_sqlalchemy_type + GEOGRAPHY = create_sqlalchemy_type("GEOGRAPHY") _types._type_map["GEOGRAPHY"] = GEOGRAPHY diff --git a/ingestion/src/metadata/ingestion/source/redshift_usage.py b/ingestion/src/metadata/ingestion/source/redshift_usage.py index c2c2126be39..380b4cb5dc9 100644 --- a/ingestion/src/metadata/ingestion/source/redshift_usage.py +++ b/ingestion/src/metadata/ingestion/source/redshift_usage.py @@ -24,7 +24,6 @@ from metadata.ingestion.source.sql_alchemy_helper import ( from metadata.utils.helpers import get_start_and_end from metadata.utils.sql_queries import REDSHIFT_SQL_STATEMENT - logger = logging.getLogger(__name__) diff --git a/ingestion/src/metadata/ingestion/source/sql_source.py b/ingestion/src/metadata/ingestion/source/sql_source.py index 2e74f684aba..db34af411bd 100644 --- a/ingestion/src/metadata/ingestion/source/sql_source.py +++ b/ingestion/src/metadata/ingestion/source/sql_source.py @@ -18,6 +18,10 @@ import traceback import uuid from typing import Dict, Iterable, List, Optional, Tuple +from sqlalchemy import create_engine +from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.inspection import inspect + from metadata.generated.schema.entity.data.database import Database from metadata.generated.schema.entity.data.table import ( Column, @@ -41,9 +45,6 @@ from metadata.ingestion.source.sql_source_common import ( ) from metadata.utils.column_type_parser import ColumnTypeParser from metadata.utils.helpers import get_database_service_or_create -from sqlalchemy import create_engine -from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.inspection import inspect logger: logging.Logger = logging.getLogger(__name__) diff --git a/ingestion/src/metadata/utils/column_type_parser.py b/ingestion/src/metadata/utils/column_type_parser.py index 743a8868bc8..bc0288d0ce9 100644 --- a/ingestion/src/metadata/utils/column_type_parser.py +++ b/ingestion/src/metadata/utils/column_type_parser.py @@ -1,6 +1,5 @@ import re -from typing import Any, Dict, Optional, Type -from typing import List, Union +from typing import Any, Dict, List, Optional, Type, Union from sqlalchemy.sql import sqltypes as types from sqlalchemy.types import TypeEngine