2022-01-23 18:47:06 +01:00
.DEFAULT_GOAL := help
2021-10-14 16:48:42 +02:00
PY_SOURCE ?= ingestion/src
2021-08-01 14:27:44 -07:00
2022-01-23 18:47:06 +01:00
.PHONY : help
help :
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $( MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY : env 38
2021-08-01 14:27:44 -07:00
env38 :
2022-01-23 18:47:06 +01:00
python3.8 -m venv env38
.PHONY : clean_env 37
2021-08-01 14:27:44 -07:00
clean_env37 :
rm -rf env38
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : install
install : ## Install the ingestion module to the current environment
python -m pip install ingestion/
2021-10-15 00:22:59 +02:00
2022-04-11 18:38:26 +02:00
.PHONY : install_apis
install_apis : ## Install the REST APIs module to the current environment
python -m pip install openmetadata-airflow-apis/
2022-01-23 18:47:06 +01:00
.PHONY : install_test
install_test : ## Install the ingestion module with test dependencies
python -m pip install "ingestion[test]/"
2021-10-15 00:22:59 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : install_dev
install_dev : ## Install the ingestion module with dev dependencies
python -m pip install "ingestion[dev]/"
2021-10-21 23:51:38 +02:00
2022-03-21 16:02:34 +05:30
.PHONY : install_all
install_all : ## Install the ingestion module with all dependencies
python -m pip install "ingestion[all]/"
2022-01-23 18:47:06 +01:00
.PHONY : precommit_install
precommit_install : ## Install the project's precommit hooks from .pre-commit-config.yaml
2021-10-15 00:22:59 +02:00
@echo "Installing pre-commit hooks"
2022-01-23 18:47:06 +01:00
@echo "Make sure to first run install_test first"
2022-01-07 03:53:21 +01:00
pre-commit install
2021-10-15 00:22:59 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : lint
2022-01-28 03:45:45 +01:00
lint : ## Run pylint on the Python sources to analyze the codebase
2023-03-23 13:38:56 +05:30
PYTHONPATH = " ${ PYTHONPATH } :./ingestion/plugins " find $( PY_SOURCE) -path $( PY_SOURCE) /metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : py_format
py_format : ## Run black and isort to format the Python codebase
2022-04-11 18:38:26 +02:00
pycln ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
isort ingestion/ openmetadata-airflow-apis/ --skip $( PY_SOURCE) /metadata/generated --skip ingestion/env --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3
black ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
2021-10-14 16:48:42 +02:00
2022-01-23 18:47:06 +01:00
.PHONY : py_format_check
py_format_check : ## Check if Python sources are correctly formatted
2022-04-11 18:38:26 +02:00
pycln ingestion/ openmetadata-airflow-apis/ --diff --extend-exclude $( PY_SOURCE) /metadata/generated
isort --check-only ingestion/ openmetadata-airflow-apis/ --skip $( PY_SOURCE) /metadata/generated --skip ingestion/build --profile black --multi-line 3
black --check --diff ingestion/ openmetadata-airflow-apis/ --extend-exclude $( PY_SOURCE) /metadata/generated
2022-11-17 17:44:26 +01:00
PYTHONPATH = " ${ PYTHONPATH } :./ingestion/plugins " pylint --fail-under= 10 $( PY_SOURCE) /metadata --ignore-paths $( PY_SOURCE) /metadata/generated || ( echo " PyLint error code $$ ? " ; exit 1)
2021-10-21 23:51:38 +02:00
2022-01-23 18:47:06 +01:00
## Ingestion models generation
.PHONY : generate
generate : ## Generate the pydantic models from the JSON Schemas to the ingestion module
2021-11-23 04:36:05 +05:30
@echo "Running Datamodel Code Generator"
2022-01-22 21:08:14 +01:00
@echo "Make sure to first run the install_dev recipe"
2023-01-08 21:32:16 -08:00
rm -rf ingestion/src/metadata/generated
2022-07-26 12:08:01 -07:00
mkdir -p ingestion/src/metadata/generated
2022-11-11 09:59:15 +01:00
python scripts/datamodel_generation.py
2022-08-18 18:32:28 +05:30
$( MAKE) py_antlr js_antlr
2022-01-23 18:47:06 +01:00
$( MAKE) install
2021-11-21 17:02:31 +01:00
2022-01-23 18:47:06 +01:00
## Ingestion tests & QA
.PHONY : run_ometa_integration_tests
run_ometa_integration_tests : ## Run Python integration tests
2022-12-17 04:52:12 +01:00
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml= ingestion/junit/test-results-integration.xml ingestion/tests/integration/ometa ingestion/tests/integration/orm_profiler ingestion/tests/integration/test_suite ingestion/tests/integration/data_insight ingestion/tests/integration/lineage
2022-01-07 10:28:38 +01:00
2022-01-23 18:47:06 +01:00
.PHONY : unit_ingestion
unit_ingestion : ## Run Python unit tests
2022-07-21 12:29:57 +02:00
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml= ingestion/junit/test-results-unit.xml --ignore= ingestion/tests/unit/source ingestion/tests/unit
2022-01-07 10:28:38 +01:00
2022-07-19 14:51:44 +02:00
.PHONY : run_python_tests
run_python_tests : ## Run all Python tests with coverage
2022-01-07 10:28:38 +01:00
coverage erase
2022-01-23 18:47:06 +01:00
$( MAKE) unit_ingestion
$( MAKE) run_ometa_integration_tests
2022-07-21 12:29:57 +02:00
coverage report --rcfile ingestion/.coveragerc || true
2022-07-19 14:51:44 +02:00
.PHONY : coverage
2022-07-20 07:07:04 +02:00
coverage : ## Run all Python tests and generate the coverage XML report
2022-07-19 14:51:44 +02:00
$( MAKE) run_python_tests
2022-10-15 14:56:30 +02:00
coverage xml --rcfile ingestion/.coveragerc -o ingestion/coverage.xml || true
2022-10-17 17:38:19 +02:00
sed -e " s/ $( shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))" ) /src/g " ingestion/coverage.xml >> ingestion/ci-coverage.xml
2022-01-07 10:28:38 +01:00
2022-01-23 18:47:06 +01:00
.PHONY : sonar_ingestion
sonar_ingestion : ## Run the Sonar analysis based on the tests results and push it to SonarCloud
2022-01-07 10:28:38 +01:00
docker run \
--rm \
-e SONAR_HOST_URL = "https://sonarcloud.io" \
2022-10-14 15:17:40 +02:00
-e SONAR_SCANNER_OPTS = "-Xmx1g" \
2022-01-07 10:28:38 +01:00
-e SONAR_LOGIN = $( token) \
2022-07-19 14:51:44 +02:00
-v ${ PWD } /ingestion:/usr/src \
2022-01-07 10:28:38 +01:00
sonarsource/sonar-scanner-cli \
2022-07-19 14:51:44 +02:00
-Dproject.settings= sonar-project.properties
2021-11-25 11:50:36 +05:30
2022-07-21 12:29:57 +02:00
.PHONY : run_apis_tests
run_apis_tests : ## Run the openmetadata airflow apis tests
coverage erase
coverage run --rcfile openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml= openmetadata-airflow-apis/junit/test-results.xml openmetadata-airflow-apis/tests
coverage report --rcfile openmetadata-airflow-apis/.coveragerc
.PHONY : coverage_apis
coverage_apis : ## Run the python tests on openmetadata-airflow-apis
$( MAKE) run_apis_tests
coverage xml --rcfile openmetadata-airflow-apis/.coveragerc -o openmetadata-airflow-apis/coverage.xml
2022-10-17 17:38:19 +02:00
sed -e " s/ $( shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))" ) \///g " openmetadata-airflow-apis/coverage.xml >> openmetadata-airflow-apis/ci-coverage.xml
2022-07-21 12:29:57 +02:00
2022-01-23 18:47:06 +01:00
## Ingestion publish
.PHONY : publish
publish : ## Publish the ingestion module to PyPI
$( MAKE) install_dev generate
2021-11-21 17:02:31 +01:00
cd ingestion; \
python setup.py install sdist bdist_wheel; \
twine check dist/*; \
twine upload dist/*
2021-12-18 16:41:38 +01:00
2022-01-23 18:47:06 +01:00
## Yarn
.PHONY : yarn_install_cache
yarn_install_cache : ## Use Yarn to install UI dependencies
2021-12-28 23:16:52 +05:30
cd openmetadata-ui/src/main/resources/ui && yarn install --frozen-lockfile
2022-01-23 18:47:06 +01:00
.PHONY : yarn_start_dev_ui
yarn_start_dev_ui : ## Run the UI locally with Yarn
2021-12-28 23:16:52 +05:30
cd openmetadata-ui/src/main/resources/ui && yarn start
2022-01-23 18:47:06 +01:00
## Ingestion Core
.PHONY : core_install_dev
2022-01-24 12:21:35 +05:30
core_install_dev : ## Prepare a venv for the ingestion-core module
2022-01-23 18:47:06 +01:00
cd ingestion-core; \
rm -rf venv; \
2022-02-09 22:25:47 +05:30
python3 -m venv venv; \
2022-01-23 18:47:06 +01:00
. venv/bin/activate; \
2022-02-09 22:25:47 +05:30
python3 -m pip install ".[dev]"
2022-01-23 18:47:06 +01:00
.PHONY : core_clean
core_clean : ## Clean the ingestion-core generated files
rm -rf ingestion-core/src/metadata/generated
rm -rf ingestion-core/build
rm -rf ingestion-core/dist
.PHONY : core_generate
core_generate : ## Generate the pydantic models from the JSON Schemas to the ingestion-core module
$( MAKE) core_install_dev
2022-01-24 14:04:27 +05:30
mkdir -p ingestion-core/src/metadata/generated; \
. ingestion-core/venv/bin/activate; \
2022-09-14 23:14:02 -07:00
datamodel-codegen --input openmetadata-spec/src/main/resources/json/schema --input-file-type jsonschema --output ingestion-core/src/metadata/generated/schema
2022-05-23 09:51:34 +02:00
$( MAKE) core_py_antlr
2022-01-23 18:47:06 +01:00
.PHONY : core_bump_version_dev
core_bump_version_dev : ## Bump a `dev` version to the ingestion-core module. To be used when schemas are updated
$( MAKE) core_install_dev
cd ingestion-core; \
. venv/bin/activate; \
python -m incremental.update metadata --dev
.PHONY : core_publish
core_publish : ## Install, generate and publish the ingestion-core module to Test PyPI
$( MAKE) core_clean core_generate
cd ingestion-core; \
. venv/bin/activate; \
python setup.py install sdist bdist_wheel; \
twine check dist/*; \
twine upload -r testpypi dist/*
2022-05-23 09:51:34 +02:00
.PHONY : core_py_antlr
core_py_antlr : ## Generate the Python core code for parsing FQNs under ingestion-core
2022-09-14 23:14:02 -07:00
antlr4 -Dlanguage= Python3 -o ingestion-core/src/metadata/generated/antlr ${ PWD } /openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4
2022-05-23 09:51:34 +02:00
.PHONY : py_antlr
py_antlr : ## Generate the Python code for parsing FQNs
2022-09-14 23:14:02 -07:00
antlr4 -Dlanguage= Python3 -o ingestion/src/metadata/generated/antlr ${ PWD } /openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4
2022-08-18 18:32:28 +05:30
.PHONY : js_antlr
js_antlr : ## Generate the Python code for parsing FQNs
2022-09-14 23:14:02 -07:00
antlr4 -Dlanguage= JavaScript -o openmetadata-ui/src/main/resources/ui/src/generated/antlr ${ PWD } /openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4
2022-08-18 18:32:28 +05:30
2022-05-23 09:51:34 +02:00
.PHONY : install_antlr_cli
install_antlr_cli : ## Install antlr CLI locally
echo '#!/usr/bin/java -jar' > /usr/local/bin/antlr4
curl https://www.antlr.org/download/antlr-4.9.2-complete.jar >> /usr/local/bin/antlr4
chmod 755 /usr/local/bin/antlr4
2022-06-27 15:14:04 +02:00
.PHONY : docker -docs
docker-docs : ## Runs the OM docs in docker passing openmetadata-docs as volume for content and images
2023-02-02 12:32:54 +01:00
docker pull openmetadata/docs:latest
2022-08-29 08:59:41 +02:00
docker run --name openmetadata-docs -p 3000:3000 -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images openmetadata/docs:latest
2022-07-07 16:23:39 +02:00
2022-07-14 14:13:00 +02:00
.PHONY : docker -docs -validate
docker-docs-validate : ## Runs the OM docs in docker passing openmetadata-docs as volume for content and images
2023-02-02 12:32:54 +01:00
docker pull openmetadata/docs:latest
2022-08-29 08:59:41 +02:00
docker run --entrypoint '/bin/sh' -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images openmetadata/docs:latest -c 'npm run export'
2022-07-14 14:13:00 +02:00
.PHONY : docker -docs -local
2022-07-07 16:23:39 +02:00
docker-docs-local : ## Runs the OM docs in docker with a local image
2022-08-29 08:59:41 +02:00
docker run --name openmetadata-docs -p 3000:3000 -v ${ PWD } /openmetadata-docs/content:/docs/content/ -v ${ PWD } /openmetadata-docs/images:/docs/public/images openmetadata-docs:local
2022-07-28 23:06:33 +02:00
## SNYK
SNYK_ARGS := --severity-threshold= high
.PHONY : snyk -ingestion -report
snyk-ingestion-report : ## Uses Snyk CLI to validate the ingestion code and container. Don't stop the execution
@echo "Validating Ingestion container..."
docker build -t openmetadata-ingestion:scan -f ingestion/Dockerfile .
2022-08-09 21:54:08 +02:00
snyk container test openmetadata-ingestion:scan --file= ingestion/Dockerfile $( SNYK_ARGS) --json > security-report/ingestion-docker-scan.json | true;
2022-07-28 23:06:33 +02:00
@echo "Validating ALL ingestion dependencies. Make sure the venv is activated."
cd ingestion; \
pip freeze > scan-requirements.txt; \
2022-08-09 21:54:08 +02:00
snyk test --file= scan-requirements.txt --package-manager= pip --command= python3 $( SNYK_ARGS) --json > ../security-report/ingestion-dep-scan.json | true; \
snyk code test $( SNYK_ARGS) --json > ../security-report/ingestion-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -airflow -apis -report
snyk-airflow-apis-report : ## Uses Snyk CLI to validate the airflow apis code. Don't stop the execution
@echo "Validating airflow dependencies. Make sure the venv is activated."
cd openmetadata-airflow-apis; \
2022-08-09 21:54:08 +02:00
snyk code test $( SNYK_ARGS) --json > ../security-report/airflow-apis-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -catalog -report
snyk-server-report : ## Uses Snyk CLI to validate the catalog code and container. Don't stop the execution
@echo "Validating catalog container... Make sure the code is built and available under openmetadata-dist"
2023-03-06 08:58:53 -08:00
docker build -t openmetadata-server:scan -f docker/development/Dockerfile .
snyk container test openmetadata-server:scan --file= docker/development/Dockerfile $( SNYK_ARGS) --json > security-report/server-docker-scan.json | true;
2022-08-09 21:54:08 +02:00
snyk test --all-projects $( SNYK_ARGS) --json > security-report/server-dep-scan.json | true;
snyk code test --all-projects --severity-threshold= high --json > security-report/server-code-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -ui -report
snyk-ui-report : ## Uses Snyk CLI to validate the UI dependencies. Don't stop the execution
2022-08-09 21:54:08 +02:00
snyk test --file= openmetadata-ui/src/main/resources/ui/yarn.lock $( SNYK_ARGS) --json > security-report/ui-dep-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -dependencies -report
2022-07-29 13:08:12 +02:00
snyk-dependencies-report : ## Uses Snyk CLI to validate the project dependencies: MySQL, Postgres and ES. Only local testing.
2022-07-28 23:06:33 +02:00
@echo "Validating dependencies images..."
2022-08-09 21:54:08 +02:00
snyk container test mysql/mysql-server:latest $( SNYK_ARGS) --json > security-report/mysql-scan.json | true;
snyk container test postgres:latest $( SNYK_ARGS) --json > security-report/postgres-scan.json | true;
snyk container test docker.elastic.co/elasticsearch/elasticsearch:7.10.2 $( SNYK_ARGS) --json > security-report/es-scan.json | true;
2022-07-28 23:06:33 +02:00
.PHONY : snyk -report
snyk-report : ## Uses Snyk CLI to run a security scan of the different pieces of the code
@echo "To run this locally, make sure to install and authenticate using the Snyk CLI: https://docs.snyk.io/snyk-cli/install-the-snyk-cli"
2022-08-09 21:54:08 +02:00
rm -rf security-report
mkdir -p security-report
2022-07-28 23:06:33 +02:00
$( MAKE) snyk-ingestion-report
$( MAKE) snyk-airflow-apis-report
$( MAKE) snyk-server-report
$( MAKE) snyk-ui-report
2023-02-02 17:10:35 +01:00
$( MAKE) export-snyk-pdf-report
2022-07-28 23:06:33 +02:00
2023-02-02 17:10:35 +01:00
.PHONY : export -snyk -pdf -report
export-snyk-pdf-report : ## export json file from security-report/ to HTML
2022-07-28 23:06:33 +02:00
@echo "Reading all results"
2022-08-09 21:54:08 +02:00
npm install snyk-to-html -g
ls security-report | xargs -I % snyk-to-html -i security-report/% -o security-report/%.html
2023-02-02 17:10:35 +01:00
pip install pdfkit
pip install PyPDF2
python scripts/html_to_pdf.py
2022-10-11 07:50:49 +02:00
# Ingestion Operators
.PHONY : build -ingestion -base -local
build-ingestion-base-local : ## Builds the ingestion DEV docker operator with the local ingestion files
$( MAKE) install_dev generate
docker build -f ingestion/operators/docker/Dockerfile-dev . -t openmetadata/ingestion-base:local