2022-01-23 18:47:06 +01:00
.DEFAULT_GOAL := help
2021-10-14 16:48:42 +02:00
PY_SOURCE ?= ingestion/src
2021-08-01 14:27:44 -07:00
2022-01-23 18:47:06 +01:00
.PHONY : help
help :
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $( MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY : env 38
2021-08-01 14:27:44 -07:00
env38 :
2022-01-23 18:47:06 +01:00
python3.8 -m venv env38
.PHONY : clean_env 37
2021-08-01 14:27:44 -07:00
clean_env37 :
rm -rf env38
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : install
install : ## Install the ingestion module to the current environment
python -m pip install ingestion/
2021-10-15 00:22:59 +02:00
2022-04-11 18:38:26 +02:00
.PHONY : install_apis
install_apis : ## Install the REST APIs module to the current environment
python -m pip install openmetadata-airflow-apis/
2022-01-23 18:47:06 +01:00
.PHONY : install_test
install_test : ## Install the ingestion module with test dependencies
python -m pip install "ingestion[test]/"
2021-10-15 00:22:59 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : install_dev
install_dev : ## Install the ingestion module with dev dependencies
python -m pip install "ingestion[dev]/"
2021-10-21 23:51:38 +02:00
2022-03-21 16:02:34 +05:30
.PHONY : install_all
install_all : ## Install the ingestion module with all dependencies
python -m pip install "ingestion[all]/"
2022-01-23 18:47:06 +01:00
.PHONY : precommit_install
precommit_install : ## Install the project's precommit hooks from .pre-commit-config.yaml
2021-10-15 00:22:59 +02:00
@echo "Installing pre-commit hooks"
2022-01-23 18:47:06 +01:00
@echo "Make sure to first run install_test first"
2022-01-07 03:53:21 +01:00
pre-commit install
2021-10-15 00:22:59 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : lint
2022-01-28 03:45:45 +01:00
lint : ## Run pylint on the Python sources to analyze the codebase
find $( PY_SOURCE) -path $( PY_SOURCE) /metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint --ignore-paths= $( PY_SOURCE) /metadata_server/
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : py_format
py_format : ## Run black and isort to format the Python codebase
2022-04-11 18:38:26 +02:00
pycln ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
isort ingestion/ openmetadata-airflow-apis/ --skip $( PY_SOURCE) /metadata/generated --skip ingestion/env --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3
black ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : py_format_check
py_format_check : ## Check if Python sources are correctly formatted
2022-04-11 18:38:26 +02:00
pycln ingestion/ openmetadata-airflow-apis/ --diff --extend-exclude $( PY_SOURCE) /metadata/generated
isort --check-only ingestion/ openmetadata-airflow-apis/ --skip $( PY_SOURCE) /metadata/generated --skip ingestion/build --profile black --multi-line 3
black --check --diff ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
2021-10-21 23:51:38 +02:00
2022-01-23 18:47:06 +01:00
## Ingestion models generation
.PHONY : generate
generate : ## Generate the pydantic models from the JSON Schemas to the ingestion module
2021-11-23 04:36:05 +05:30
@echo "Running Datamodel Code Generator"
2022-01-22 21:08:14 +01:00
@echo "Make sure to first run the install_dev recipe"
2022-07-26 12:08:01 -07:00
mkdir -p ingestion/src/metadata/generated
datamodel-codegen --input catalog-rest-service/src/main/resources/json/schema --input-file-type jsonschema --output ingestion/src/metadata/generated/schema --set-default-enum-member
2022-05-23 09:51:34 +02:00
$( MAKE) py_antlr
2022-01-23 18:47:06 +01:00
$( MAKE) install
2021-11-21 17:02:31 +01:00
2022-01-23 18:47:06 +01:00
## Ingestion tests & QA
.PHONY : run_ometa_integration_tests
run_ometa_integration_tests : ## Run Python integration tests
2022-07-21 12:29:57 +02:00
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml= ingestion/junit/test-results-integration.xml ingestion/tests/integration/ometa ingestion/tests/integration/stage ingestion/tests/integration/orm_profiler
2022-01-07 10:28:38 +01:00
2022-01-23 18:47:06 +01:00
.PHONY : unit_ingestion
unit_ingestion : ## Run Python unit tests
2022-07-21 12:29:57 +02:00
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml= ingestion/junit/test-results-unit.xml --ignore= ingestion/tests/unit/source ingestion/tests/unit
2022-01-07 10:28:38 +01:00
2022-07-19 14:51:44 +02:00
.PHONY : run_python_tests
run_python_tests : ## Run all Python tests with coverage
2022-01-07 10:28:38 +01:00
coverage erase
2022-01-23 18:47:06 +01:00
$( MAKE) unit_ingestion
$( MAKE) run_ometa_integration_tests
2022-07-21 12:29:57 +02:00
coverage report --rcfile ingestion/.coveragerc || true
2022-07-19 14:51:44 +02:00
.PHONY : coverage
2022-07-20 07:07:04 +02:00
coverage : ## Run all Python tests and generate the coverage XML report
2022-07-19 14:51:44 +02:00
$( MAKE) run_python_tests
2022-07-21 12:29:57 +02:00
coverage xml --rcfile ingestion/.coveragerc -o ingestion/coverage.xml
2022-07-19 14:51:44 +02:00
sed -e 's/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace(' /',' \/ '))")/src/g' ingestion/coverage.xml >> ingestion/ci-coverage.xml
2022-01-07 10:28:38 +01:00
2022-01-23 18:47:06 +01:00
.PHONY : sonar_ingestion
sonar_ingestion : ## Run the Sonar analysis based on the tests results and push it to SonarCloud
2022-01-07 10:28:38 +01:00
docker run \
--rm \
-e SONAR_HOST_URL = "https://sonarcloud.io" \
-e SONAR_LOGIN = $( token) \
2022-07-19 14:51:44 +02:00
-v ${ PWD } /ingestion:/usr/src \
2022-01-07 10:28:38 +01:00
sonarsource/sonar-scanner-cli \
2022-07-19 14:51:44 +02:00
-Dproject.settings= sonar-project.properties
2021-11-25 11:50:36 +05:30
2022-07-21 12:29:57 +02:00
.PHONY : run_apis_tests
run_apis_tests : ## Run the openmetadata airflow apis tests
coverage erase
coverage run --rcfile openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml= openmetadata-airflow-apis/junit/test-results.xml openmetadata-airflow-apis/tests
coverage report --rcfile openmetadata-airflow-apis/.coveragerc
.PHONY : coverage_apis
coverage_apis : ## Run the python tests on openmetadata-airflow-apis
$( MAKE) run_apis_tests
coverage xml --rcfile openmetadata-airflow-apis/.coveragerc -o openmetadata-airflow-apis/coverage.xml
2022-08-02 14:02:44 +02:00
sed -e 's/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace(' /',' \/ '))")\///g' openmetadata-airflow-apis/coverage.xml >> openmetadata-airflow-apis/ci-coverage.xml
2022-07-21 12:29:57 +02:00
2022-01-23 18:47:06 +01:00
## Ingestion publish
.PHONY : publish
publish : ## Publish the ingestion module to PyPI
$( MAKE) install_dev generate
2021-11-21 17:02:31 +01:00
cd ingestion; \
python setup.py install sdist bdist_wheel; \
twine check dist/*; \
twine upload dist/*
2021-12-18 16:41:38 +01:00
2022-01-23 18:47:06 +01:00
## Docker operators
.PHONY : build_docker_base
build_docker_base : ## Build the base Docker image for the Ingestion Framework Sources
$( MAKE) install_dev generate
2021-12-18 16:41:38 +01:00
docker build -f ingestion/connectors/Dockerfile-base ingestion/ -t openmetadata/ingestion-connector-base
2022-01-23 18:47:06 +01:00
.PHONY : build_docker_connectors
build_docker_connectors : ## Build all Ingestion Framework Sources Images to be used as Docker Operators in Airflow
2021-12-18 16:41:38 +01:00
@echo "Building Docker connectors. Make sure to run build_docker_base first"
python ingestion/connectors/docker-cli.py build
2022-01-23 18:47:06 +01:00
.PHONY : push_docker_connectors
push_docker_connectors : ## Push all Sources Docker Images to DockerHub
2021-12-18 16:41:38 +01:00
@echo "Pushing Docker connectors. Make sure to run build_docker_connectors first"
python ingestion/connectors/docker-cli.py push
2021-12-28 23:16:52 +05:30
2022-01-23 18:47:06 +01:00
## Yarn
.PHONY : yarn_install_cache
yarn_install_cache : ## Use Yarn to install UI dependencies
2021-12-28 23:16:52 +05:30
cd openmetadata-ui/src/main/resources/ui && yarn install --frozen-lockfile
2022-01-23 18:47:06 +01:00
.PHONY : yarn_start_dev_ui
yarn_start_dev_ui : ## Run the UI locally with Yarn
2021-12-28 23:16:52 +05:30
cd openmetadata-ui/src/main/resources/ui && yarn start
2022-01-23 18:47:06 +01:00
## Ingestion Core
.PHONY : core_install_dev
2022-01-24 12:21:35 +05:30
core_install_dev : ## Prepare a venv for the ingestion-core module
2022-01-23 18:47:06 +01:00
cd ingestion-core; \
rm -rf venv; \
2022-02-09 22:25:47 +05:30
python3 -m venv venv; \
2022-01-23 18:47:06 +01:00
. venv/bin/activate; \
2022-02-09 22:25:47 +05:30
python3 -m pip install ".[dev]"
2022-01-23 18:47:06 +01:00
.PHONY : core_clean
core_clean : ## Clean the ingestion-core generated files
rm -rf ingestion-core/src/metadata/generated
rm -rf ingestion-core/build
rm -rf ingestion-core/dist
.PHONY : core_generate
core_generate : ## Generate the pydantic models from the JSON Schemas to the ingestion-core module
$( MAKE) core_install_dev
2022-01-24 14:04:27 +05:30
mkdir -p ingestion-core/src/metadata/generated; \
. ingestion-core/venv/bin/activate; \
2022-07-26 12:08:01 -07:00
datamodel-codegen --input catalog-rest-service/src/main/resources/json/schema --input-file-type jsonschema --output ingestion-core/src/metadata/generated/schema
2022-05-23 09:51:34 +02:00
$( MAKE) core_py_antlr
2022-01-23 18:47:06 +01:00
.PHONY : core_bump_version_dev
core_bump_version_dev : ## Bump a `dev` version to the ingestion-core module. To be used when schemas are updated
$( MAKE) core_install_dev
cd ingestion-core; \
. venv/bin/activate; \
python -m incremental.update metadata --dev
.PHONY : core_publish
core_publish : ## Install, generate and publish the ingestion-core module to Test PyPI
$( MAKE) core_clean core_generate
cd ingestion-core; \
. venv/bin/activate; \
python setup.py install sdist bdist_wheel; \
twine check dist/*; \
twine upload -r testpypi dist/*
2022-05-23 09:51:34 +02:00
.PHONY : core_py_antlr
core_py_antlr : ## Generate the Python core code for parsing FQNs under ingestion-core
antlr4 -Dlanguage= Python3 -o ingestion-core/src/metadata/generated/antlr ${ PWD } /catalog-rest-service/src/main/antlr4/org/openmetadata/catalog/Fqn.g4
.PHONY : py_antlr
py_antlr : ## Generate the Python code for parsing FQNs
antlr4 -Dlanguage= Python3 -o ingestion/src/metadata/generated/antlr ${ PWD } /catalog-rest-service/src/main/antlr4/org/openmetadata/catalog/Fqn.g4
.PHONY : install_antlr_cli
install_antlr_cli : ## Install antlr CLI locally
echo '#!/usr/bin/java -jar' > /usr/local/bin/antlr4
curl https://www.antlr.org/download/antlr-4.9.2-complete.jar >> /usr/local/bin/antlr4
chmod 755 /usr/local/bin/antlr4
2022-06-27 15:14:04 +02:00
.PHONY : docker -docs
docker-docs : ## Runs the OM docs in docker passing openmetadata-docs as volume for content and images
2022-07-07 16:23:39 +02:00
docker run --name openmetadata-docs -p 3000:3000 -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images -v ${ PWD } /openmetadata-docs/ingestion:/docs/public/ingestion openmetadata/docs:latest
2022-07-14 14:13:00 +02:00
.PHONY : docker -docs -validate
docker-docs-validate : ## Runs the OM docs in docker passing openmetadata-docs as volume for content and images
docker run --entrypoint '/bin/sh' -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images -v ${ PWD } /openmetadata-docs/ingestion:/docs/public/ingestion openmetadata/docs:latest -c 'npm run export'
.PHONY : docker -docs -local
2022-07-07 16:23:39 +02:00
docker-docs-local : ## Runs the OM docs in docker with a local image
2022-06-27 15:14:04 +02:00
docker run --name openmetadata-docs -p 3000:3000 -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images -v ${ PWD } /openmetadata-docs/ingestion:/docs/public/ingestion openmetadata-docs:local
2022-07-28 23:06:33 +02:00
## SNYK
SNYK_ARGS := --severity-threshold= high
.PHONY : snyk -ingestion -report
snyk-ingestion-report : ## Uses Snyk CLI to validate the ingestion code and container. Don't stop the execution
@echo "Validating Ingestion container..."
docker build -t openmetadata-ingestion:scan -f ingestion/Dockerfile .
2022-08-09 21:54:08 +02:00
snyk container test openmetadata-ingestion:scan --file= ingestion/Dockerfile $( SNYK_ARGS) --json > security-report/ingestion-docker-scan.json | true;
2022-07-28 23:06:33 +02:00
@echo "Validating ALL ingestion dependencies. Make sure the venv is activated."
cd ingestion; \
pip freeze > scan-requirements.txt; \
2022-08-09 21:54:08 +02:00
snyk test --file= scan-requirements.txt --package-manager= pip --command= python3 $( SNYK_ARGS) --json > ../security-report/ingestion-dep-scan.json | true; \
snyk code test $( SNYK_ARGS) --json > ../security-report/ingestion-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -airflow -apis -report
snyk-airflow-apis-report : ## Uses Snyk CLI to validate the airflow apis code. Don't stop the execution
@echo "Validating airflow dependencies. Make sure the venv is activated."
cd openmetadata-airflow-apis; \
2022-08-09 21:54:08 +02:00
snyk code test $( SNYK_ARGS) --json > ../security-report/airflow-apis-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -catalog -report
snyk-server-report : ## Uses Snyk CLI to validate the catalog code and container. Don't stop the execution
@echo "Validating catalog container... Make sure the code is built and available under openmetadata-dist"
docker build -t openmetadata-server:scan -f docker/local-metadata/Dockerfile .
2022-08-09 21:54:08 +02:00
snyk container test openmetadata-server:scan --file= docker/local-metadata/Dockerfile $( SNYK_ARGS) --json > security-report/server-docker-scan.json | true;
snyk test --all-projects $( SNYK_ARGS) --json > security-report/server-dep-scan.json | true;
snyk code test --all-projects --severity-threshold= high --json > security-report/server-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -ui -report
snyk-ui-report : ## Uses Snyk CLI to validate the UI dependencies. Don't stop the execution
2022-08-09 21:54:08 +02:00
snyk test --file= openmetadata-ui/src/main/resources/ui/yarn.lock $( SNYK_ARGS) --json > security-report/ui-dep-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -dependencies -report
2022-07-29 13:08:12 +02:00
snyk-dependencies-report : ## Uses Snyk CLI to validate the project dependencies: MySQL, Postgres and ES. Only local testing.
2022-07-28 23:06:33 +02:00
@echo "Validating dependencies images..."
2022-08-09 21:54:08 +02:00
snyk container test mysql/mysql-server:latest $( SNYK_ARGS) --json > security-report/mysql-scan.json | true;
snyk container test postgres:latest $( SNYK_ARGS) --json > security-report/postgres-scan.json | true;
snyk container test docker.elastic.co/elasticsearch/elasticsearch:7.10.2 $( SNYK_ARGS) --json > security-report/es-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -report
snyk-report : ## Uses Snyk CLI to run a security scan of the different pieces of the code
@echo "To run this locally, make sure to install and authenticate using the Snyk CLI: https://docs.snyk.io/snyk-cli/install-the-snyk-cli"
2022-08-09 21:54:08 +02:00
rm -rf security-report
mkdir -p security-report
2022-07-28 23:06:33 +02:00
$( MAKE) snyk-ingestion-report
$( MAKE) snyk-airflow-apis-report
$( MAKE) snyk-server-report
$( MAKE) snyk-ui-report
2022-08-09 21:54:08 +02:00
$( MAKE) export-snyk-html-report
2022-07-28 23:06:33 +02:00
2022-08-09 21:54:08 +02:00
.PHONY : export -snyk -html -report
export-snyk-html-report : ## export json file from security-report/ to HTML
2022-07-28 23:06:33 +02:00
@echo "Reading all results"
2022-08-09 21:54:08 +02:00
npm install snyk-to-html -g
ls security-report | xargs -I % snyk-to-html -i security-report/% -o security-report/%.html