From b39b698eb7eed7629d747fdb97b3b36e6069d660 Mon Sep 17 00:00:00 2001
From: Pere Miquel Brull
Date: Sun, 23 Jan 2022 18:47:06 +0100
Subject: [PATCH] Fix #2345 - Make bump & unify (#2359)
* Add installing instructions
* Unify makefile and automate bump version setup
* Update submake call
* Mark targets as phony
* Add help to recipes
* Update core publish
* Do not trigger install from echo
* Run make from root
* Prepare py_format from isort and black
* Run py_format
* Check black and isort formatting
* Update comment msg
---
.github/workflows/java-checkstyle.yml | 5 +-
...etadata-ingestion-core-version-comment.yml | 4 +-
.github/workflows/py-checkstyle.yml | 7 +-
.../workflows/py-ingestion-core-publish.yml | 12 +-
Makefile | 135 +++++++++++++-----
ingestion-core/Makefile | 21 ---
.../src/metadata/ingestion/source/amundsen.py | 3 +-
.../src/metadata/ingestion/source/bigquery.py | 15 +-
.../ingestion/source/redshift_usage.py | 1 -
.../metadata/ingestion/source/sql_source.py | 7 +-
.../src/metadata/utils/column_type_parser.py | 3 +-
11 files changed, 130 insertions(+), 83 deletions(-)
delete mode 100644 ingestion-core/Makefile
diff --git a/.github/workflows/java-checkstyle.yml b/.github/workflows/java-checkstyle.yml
index 61b3f3e6487..d3cb179cfc9 100644
--- a/.github/workflows/java-checkstyle.yml
+++ b/.github/workflows/java-checkstyle.yml
@@ -54,7 +54,10 @@ jobs:
body: |
**The Java checkstyle failed.**
- Please run `mvn googleformatter:format@reformat-sources` in the root of your repository and commit the changes to this PR. You can also use [pre-commit](https://pre-commit.com/) to automate the Java code formatting.
+ Please run `mvn googleformatter:format@reformat-sources` in the root of your repository and commit the changes to this PR.
+ You can also use [pre-commit](https://pre-commit.com/) to automate the Java code formatting.
+
+ You can install the pre-commit hooks with `make install_test precommit_install`.
- name: Java checkstyle failed, check the comment in the PR
if: steps.git.outcome != 'success'
diff --git a/.github/workflows/openmetadata-ingestion-core-version-comment.yml b/.github/workflows/openmetadata-ingestion-core-version-comment.yml
index 314b4e21ae1..d17d12b3cf3 100644
--- a/.github/workflows/openmetadata-ingestion-core-version-comment.yml
+++ b/.github/workflows/openmetadata-ingestion-core-version-comment.yml
@@ -35,6 +35,6 @@ jobs:
with:
issue-number: ${{ github.event.pull_request.number }}
body: |
- **Schema Change Detected. Needs py-ingestion-core version bump**
+ **Schema Change Detected. Needs ingestion-core version bump**
- Please run `make bump-version-dev` in the ingestion-core folder of your repository and commit the changes to _version.py in this PR. Please ignore if this has been handled already.
+ Please run `make core_bump_version_dev` in the project's root and commit the changes to _version.py in this PR. Please ignore if this has been handled already.
diff --git a/.github/workflows/py-checkstyle.yml b/.github/workflows/py-checkstyle.yml
index 4596558a545..48aebf44692 100644
--- a/.github/workflows/py-checkstyle.yml
+++ b/.github/workflows/py-checkstyle.yml
@@ -54,7 +54,7 @@ jobs:
run: |
source env/bin/activate
make generate
- make black_check
+ make py_format_check
- name: Create a comment in the PR with the instructions
if: steps.style.outcome != 'success'
@@ -64,7 +64,10 @@ jobs:
body: |
**The Python checkstyle failed.**
- Please run `make black` in the root of your repository and commit the changes to this PR. You can also use [pre-commit](https://pre-commit.com/) to automate the Python code formatting.
+ Please run `make py_format` in the root of your repository and commit the changes to this PR.
+ You can also use [pre-commit](https://pre-commit.com/) to automate the Python code formatting.
+
+ You can install the pre-commit hooks with `make install_test precommit_install`.
- name: Python checkstyle failed, check the comment in the PR
if: steps.style.outcome != 'success'
diff --git a/.github/workflows/py-ingestion-core-publish.yml b/.github/workflows/py-ingestion-core-publish.yml
index da905e1566c..29b1c8ab1e7 100644
--- a/.github/workflows/py-ingestion-core-publish.yml
+++ b/.github/workflows/py-ingestion-core-publish.yml
@@ -36,17 +36,9 @@ jobs:
- name: Install Ubuntu related dependencies
run: |
sudo apt-get install -y libsasl2-dev unixodbc-dev python3-venv
- - name: Generate models
- working-directory: ingestion-core
- run: |
- python3 -m venv env
- source env/bin/activate
- make generate
- - name: Publish Test PyPi packages
- working-directory: ingestion-core
+ - name: Install, Generate and Publish Test PyPi packages
env:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME_TEST }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
run: |
- source env/bin/activate
- make publish
+ make core_publish
diff --git a/Makefile b/Makefile
index b71c68b5c52..e1dcfb048a3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,57 +1,77 @@
-.PHONY: env38
+.DEFAULT_GOAL := help
PY_SOURCE ?= ingestion/src
+.PHONY: help
+help:
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: env38
env38:
-# virtualenv -p python3.8 env38
- python3.8 -m venv env38
+ python3.8 -m venv env38
+
+.PHONY: clean_env37
clean_env37:
rm -rf env38
-install:
- python3 -m pip install ingestion/
+.PHONY: install
+install: ## Install the ingestion module to the current environment
+ python -m pip install ingestion/
-install_test:
- python3 -m pip install "ingestion[test]/"
+.PHONY: install_test
+install_test: ## Install the ingestion module with test dependencies
+ python -m pip install "ingestion[test]/"
-install_dev:
- python3 -m pip install "ingestion[dev]/"
+.PHONY: install_dev
+install_dev: ## Install the ingestion module with dev dependencies
+ python -m pip install "ingestion[dev]/"
-precommit_install:
+.PHONY: precommit_install
+precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml
@echo "Installing pre-commit hooks"
- @echo "Make sure to first run `make install_test`"
+ @echo "Make sure to first run install_test first"
pre-commit install
-isort:
- isort $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3
-
-lint:
+## Python Checkstyle
+.PHONY: lint
+lint: ## Run pylint on the Python sources to analyze the codebase
find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint
-black:
+.PHONY: py_format
+py_format: ## Run black and isort to format the Python codebase
+ isort $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3
black $(PY_SOURCE) --exclude $(PY_SOURCE)/metadata/generated
-black_check:
+.PHONY: py_format_check
+py_format_check: ## Check if Python sources are correctly formatted
black --check --diff $(PY_SOURCE) --exclude $(PY_SOURCE)/metadata/generated
+ isort --check-only $(PY_SOURCE) --skip $(PY_SOURCE)/metadata/generated --profile black --multi-line 3
-generate:
+## Ingestion models generation
+.PHONY: generate
+generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module
@echo "Running Datamodel Code Generator"
@echo "Make sure to first run the install_dev recipe"
- datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated
- make install
+ datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated
+ $(MAKE) install
-run_ometa_integration_tests:
+## Ingestion tests & QA
+.PHONY: run_ometa_integration_tests
+run_ometa_integration_tests: ## Run Python integration tests
coverage run -m pytest -c ingestion/setup.cfg --doctest-modules --junitxml=ingestion/junit/test-results-integration.xml --override-ini=testpaths="ingestion/tests/integration/ometa ingestion/tests/integration/stage"
-unit_ingestion:
+.PHONY: unit_ingestion
+unit_ingestion: ## Run Python unit tests
coverage run -m pytest -c ingestion/setup.cfg -s --doctest-modules --junitxml=ingestion/junit/test-results-unit.xml --override-ini=testpaths="ingestion/tests/unit"
-coverage:
+.PHONY: coverage
+coverage: ## Run all Python tests and generate the coverage report
coverage erase
- make unit_ingestion
- make run_ometa_integration_tests
+ $(MAKE) unit_ingestion
+ $(MAKE) run_ometa_integration_tests
coverage xml -i -o ingestion/coverage.xml
-sonar_ingestion:
+.PHONY: sonar_ingestion
+sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud
docker run \
--rm \
-e SONAR_HOST_URL="https://sonarcloud.io" \
@@ -60,27 +80,74 @@ sonar_ingestion:
sonarsource/sonar-scanner-cli \
-Dproject.settings=ingestion/sonar-project.properties
-publish:
- make install_dev generate
+## Ingestion publish
+.PHONY: publish
+publish: ## Publish the ingestion module to PyPI
+ $(MAKE) install_dev generate
cd ingestion; \
python setup.py install sdist bdist_wheel; \
twine check dist/*; \
twine upload dist/*
-build_docker_base:
- make install_dev generate
+## Docker operators
+.PHONY: build_docker_base
+build_docker_base: ## Build the base Docker image for the Ingestion Framework Sources
+ $(MAKE) install_dev generate
docker build -f ingestion/connectors/Dockerfile-base ingestion/ -t openmetadata/ingestion-connector-base
-build_docker_connectors:
+.PHONY: build_docker_connectors
+build_docker_connectors: ## Build all Ingestion Framework Sources Images to be used as Docker Operators in Airflow
@echo "Building Docker connectors. Make sure to run build_docker_base first"
python ingestion/connectors/docker-cli.py build
-push_docker_connectors:
+.PHONY: push_docker_connectors
+push_docker_connectors: ## Push all Sources Docker Images to DockerHub
@echo "Pushing Docker connectors. Make sure to run build_docker_connectors first"
python ingestion/connectors/docker-cli.py push
-yarn_install_cache:
+## Yarn
+.PHONY: yarn_install_cache
+yarn_install_cache: ## Use Yarn to install UI dependencies
cd openmetadata-ui/src/main/resources/ui && yarn install --frozen-lockfile
-yarn_start_dev_ui:
+.PHONY: yarn_start_dev_ui
+yarn_start_dev_ui: ## Run the UI locally with Yarn
cd openmetadata-ui/src/main/resources/ui && yarn start
+
+## Ingestion Core
+.PHONY: core_install_dev
+core_install_dev: ## Prepare a virtualenv for the ingestion-core module
+ cd ingestion-core; \
+ rm -rf venv; \
+ python -m virtualenv venv; \
+ . venv/bin/activate; \
+ python -m pip install ".[dev]"
+
+.PHONY: core_clean
+core_clean: ## Clean the ingestion-core generated files
+ rm -rf ingestion-core/src/metadata/generated
+ rm -rf ingestion-core/build
+ rm -rf ingestion-core/dist
+
+.PHONY: core_generate
+core_generate: ## Generate the pydantic models from the JSON Schemas to the ingestion-core module
+ $(MAKE) core_install_dev
+ mkdir -p ingestion-core/src/metadata/generated
+ . ingestion-core/venv/bin/activate
+ datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion-core/src/metadata/generated
+
+.PHONY: core_bump_version_dev
+core_bump_version_dev: ## Bump a `dev` version to the ingestion-core module. To be used when schemas are updated
+ $(MAKE) core_install_dev
+ cd ingestion-core; \
+ . venv/bin/activate; \
+ python -m incremental.update metadata --dev
+
+.PHONY: core_publish
+core_publish: ## Install, generate and publish the ingestion-core module to Test PyPI
+ $(MAKE) core_clean core_generate
+ cd ingestion-core; \
+ . venv/bin/activate; \
+ python setup.py install sdist bdist_wheel; \
+ twine check dist/*; \
+ twine upload -r testpypi dist/*
diff --git a/ingestion-core/Makefile b/ingestion-core/Makefile
deleted file mode 100644
index 20bbbd1e1bf..00000000000
--- a/ingestion-core/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-install_dev:
- python -m pip install ".[dev]"
-
-generate:
- mkdir -p src/metadata/generated
- python3 -m pip install --upgrade pip setuptools
- make install_dev
- datamodel-codegen --input ../catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output src/metadata/generated
-
-clean:
- rm -rf src/metadata/generated
- rm -rf build
- rm -rf dist
-
-bump-version-dev:
- python -m incremental.update metadata --dev
-
-publish: clean generate
- python setup.py install sdist bdist_wheel
- twine check dist/*
- twine upload -r testpypi dist/*
diff --git a/ingestion/src/metadata/ingestion/source/amundsen.py b/ingestion/src/metadata/ingestion/source/amundsen.py
index 24dc8635658..2abce047869 100644
--- a/ingestion/src/metadata/ingestion/source/amundsen.py
+++ b/ingestion/src/metadata/ingestion/source/amundsen.py
@@ -15,6 +15,8 @@ import uuid
from dataclasses import dataclass, field
from typing import Iterable, List, Optional
+from pydantic import SecretStr
+
from metadata.config.common import ConfigModel
from metadata.generated.schema.api.services.createDatabaseService import (
CreateDatabaseServiceEntityRequest,
@@ -41,7 +43,6 @@ from metadata.utils.sql_queries import (
NEO4J_AMUNDSEN_TABLE_QUERY,
NEO4J_AMUNDSEN_USER_QUERY,
)
-from pydantic import SecretStr
logger: logging.Logger = logging.getLogger(__name__)
diff --git a/ingestion/src/metadata/ingestion/source/bigquery.py b/ingestion/src/metadata/ingestion/source/bigquery.py
index ec7dd74883d..b6014187d96 100644
--- a/ingestion/src/metadata/ingestion/source/bigquery.py
+++ b/ingestion/src/metadata/ingestion/source/bigquery.py
@@ -9,14 +9,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import json
+import logging
import os
-from typing import Optional, Tuple, Any
-import json, tempfile, logging
+import tempfile
+from typing import Any, Optional, Tuple
-from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
-from metadata.ingestion.source.sql_source import SQLSource
-from metadata.ingestion.source.sql_source_common import SQLConnectionConfig
-from metadata.utils.column_type_parser import create_sqlalchemy_type
from sqlalchemy_bigquery import _types
from sqlalchemy_bigquery._struct import STRUCT
from sqlalchemy_bigquery._types import (
@@ -24,6 +22,11 @@ from sqlalchemy_bigquery._types import (
_get_transitive_schema_fields,
)
+from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
+from metadata.ingestion.source.sql_source import SQLSource
+from metadata.ingestion.source.sql_source_common import SQLConnectionConfig
+from metadata.utils.column_type_parser import create_sqlalchemy_type
+
GEOGRAPHY = create_sqlalchemy_type("GEOGRAPHY")
_types._type_map["GEOGRAPHY"] = GEOGRAPHY
diff --git a/ingestion/src/metadata/ingestion/source/redshift_usage.py b/ingestion/src/metadata/ingestion/source/redshift_usage.py
index c2c2126be39..380b4cb5dc9 100644
--- a/ingestion/src/metadata/ingestion/source/redshift_usage.py
+++ b/ingestion/src/metadata/ingestion/source/redshift_usage.py
@@ -24,7 +24,6 @@ from metadata.ingestion.source.sql_alchemy_helper import (
from metadata.utils.helpers import get_start_and_end
from metadata.utils.sql_queries import REDSHIFT_SQL_STATEMENT
-
logger = logging.getLogger(__name__)
diff --git a/ingestion/src/metadata/ingestion/source/sql_source.py b/ingestion/src/metadata/ingestion/source/sql_source.py
index 2e74f684aba..db34af411bd 100644
--- a/ingestion/src/metadata/ingestion/source/sql_source.py
+++ b/ingestion/src/metadata/ingestion/source/sql_source.py
@@ -18,6 +18,10 @@ import traceback
import uuid
from typing import Dict, Iterable, List, Optional, Tuple
+from sqlalchemy import create_engine
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.inspection import inspect
+
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.table import (
Column,
@@ -41,9 +45,6 @@ from metadata.ingestion.source.sql_source_common import (
)
from metadata.utils.column_type_parser import ColumnTypeParser
from metadata.utils.helpers import get_database_service_or_create
-from sqlalchemy import create_engine
-from sqlalchemy.engine.reflection import Inspector
-from sqlalchemy.inspection import inspect
logger: logging.Logger = logging.getLogger(__name__)
diff --git a/ingestion/src/metadata/utils/column_type_parser.py b/ingestion/src/metadata/utils/column_type_parser.py
index 743a8868bc8..bc0288d0ce9 100644
--- a/ingestion/src/metadata/utils/column_type_parser.py
+++ b/ingestion/src/metadata/utils/column_type_parser.py
@@ -1,6 +1,5 @@
import re
-from typing import Any, Dict, Optional, Type
-from typing import List, Union
+from typing import Any, Dict, List, Optional, Type, Union
from sqlalchemy.sql import sqltypes as types
from sqlalchemy.types import TypeEngine