plugins { id 'base' } apply from: "../gradle/coverage/python-coverage.gradle" ext { python_executable = 'python3' venv_name = 'venv' } ext.venv_activate_command = "set +x && source ${venv_name}/bin/activate && set -x && " if (!project.hasProperty("extra_pip_requirements")) { ext.extra_pip_requirements = "" } task checkPythonVersion(type: Exec) { commandLine python_executable, '-c', 'import sys; sys.version_info >= (3, 9), f"Python version {sys.version_info[:2]} not allowed"' } task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " + "set -x && " + // If we already have uv available, use it to upgrade uv. Otherwise, install it with pip. "if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " + "touch ${sentinel_file}" } task installPackageOnly(type: Exec, dependsOn: environmentSetup) { def sentinel_file = "${venv_name}/.build_install_package_only_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', venv_activate_command + "uv pip install -e . &&" + "touch ${sentinel_file}" } task installPackage(type: Exec, dependsOn: installPackageOnly) { def sentinel_file = "${venv_name}/.build_install_package_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', venv_activate_command + "uv pip install -e . ${extra_pip_requirements} && " + "touch ${sentinel_file}" } task codegen(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) { inputs.files( project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc"), project.fileTree(dir: "scripts"), ) outputs.dir('src/datahub/metadata') commandLine 'bash', '-c', "${venv_activate_command} ./scripts/codegen.sh" } task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) { def package_name = project.findProperty('package_name') def package_version = project.findProperty('package_version') commandLine 'bash', '-c', venv_activate_command + "uv pip install build && " + "./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'" } task install(dependsOn: [installPackage, codegen]) task installDev(type: Exec, dependsOn: [install]) { def sentinel_file = "${venv_name}/.build_install_dev_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', venv_activate_command + "uv pip install -e .[dev] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } task installAll(type: Exec, dependsOn: [install]) { def sentinel_file = "${venv_name}/.build_install_all_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', venv_activate_command + "uv pip install -e .[all] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } task modelDocGen(type: Exec, dependsOn: [codegen]) { def outdir = './generated/docs' def docsOutdir = '../docs/generated/metamodel' def datahubRoot = '..' def schemasRoot = "${datahubRoot}/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro/" def entityRegistry = "${datahubRoot}/metadata-models/src/main/resources/entity-registry.yml" def metadataModelDocsRoot = "${datahubRoot}/metadata-models/docs" inputs.files( file('scripts/modeldocgen.py'), project.fileTree(dir: "../metadata-models/docs/entities/", include: "**/*.md"), project.fileTree(dir: "examples/", include: "**/*.py"), project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc") ) outputs.dir('../docs/generated/metamodel') commandLine 'bash', '-c', "${venv_activate_command} python scripts/modeldocgen.py ${schemasRoot} --registry ${entityRegistry} --generated-docs-dir ${docsOutdir} --file ${outdir}/metadata_model_mces.json --extra-docs ${metadataModelDocsRoot}" } task lineageGen(type: Exec, dependsOn: [codegen]) { def datahubRoot = '..' def schemasRoot = "${datahubRoot}/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro/" def entityRegistry = "${datahubRoot}/metadata-models/src/main/resources/entity-registry.yml" def lineageOutput = "src/datahub/ingestion/autogenerated/lineage.json" inputs.files( file('scripts/modeldocgen.py'), project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc") ) outputs.file(lineageOutput) commandLine 'bash', '-c', "${venv_activate_command} python scripts/modeldocgen.py ${schemasRoot} --registry ${entityRegistry} --generated-docs-dir /tmp --lineage-output ${lineageOutput}" } task testScripts(type: Exec, dependsOn: [installDev]) { inputs.files(project.fileTree(dir: "scripts/tests/", include: "**/*.py")) outputs.dir("${venv_name}") commandLine 'bash', '-c', "set +x && source ${project.projectDir}/${venv_name}/bin/activate && set -x && cd ${project.projectDir}/scripts && cp ../setup.cfg . && PYTHONPATH=${project.projectDir} pytest -vvv tests/ --cov=. --cov-report=term --cov-report=html:./cov_html && rm setup.cfg" } task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) { commandLine 'bash', '-c', "${venv_activate_command} ./scripts/modeldocupload.sh" } task lint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', venv_activate_command + "ruff check scripts/ scripts/tests/ src/ tests/ examples/ && " + "ruff format --check scripts/ scripts/tests/ src/ tests/ examples/ && " + "mypy --show-traceback --show-error-codes src/ tests/ examples/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', venv_activate_command + "ruff check --fix scripts/ scripts/tests/ src/ tests/ examples/ && " + "ruff format scripts/ scripts/tests/ src/ tests/ examples/ " } def pytest_default_env = "" // set to "PYTHONDEVMODE=1" to warn on unclosed sockets def pytest_default_args = "--durations=20 -vv --continue-on-collection-errors" task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJsonSchema']) { // We can't enforce the coverage requirements if we run a subset of the tests. inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) outputs.dir("${venv_name}") def cvg_arg = get_coverage_args("quick") commandLine 'bash', '-c', venv_activate_command + "${pytest_default_env} pytest ${cvg_arg} tests/unit ${pytest_default_args} --random-order -m 'not integration' --junit-xml=junit.quick.xml" } task installDevTest(type: Exec, dependsOn: [install]) { def sentinel_file = "${venv_name}/.build_install_dev_test_sentinel" inputs.file file('setup.py') outputs.dir("${venv_name}") outputs.file(sentinel_file) commandLine 'bash', '-c', venv_activate_command + "uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } def testFile = hasProperty('testFile') ? testFile : 'unknown' task testSingle(dependsOn: [installDevTest]) { doLast { if (testFile != 'unknown') { exec { commandLine 'bash', '-c', "${venv_activate_command} ${pytest_default_env} pytest ${testFile} ${pytest_default_args}" } } else { throw new GradleException("No file provided. Use -PtestFile=") } } } // Create testIntegrationBatch0 through testIntegrationBatch5 tasks (0..5).each { batchNum -> tasks.register("testIntegrationBatch${batchNum}", Exec) { dependsOn installDevTest def cvg_arg = get_coverage_args("intBatch${batchNum}") commandLine 'bash', '-c', venv_activate_command + "${pytest_default_env} pytest ${cvg_arg} ${pytest_default_args} -m 'integration_batch_${batchNum}' --junit-xml=junit.integrationbatch${batchNum}.xml" } } task testFull(type: Exec, dependsOn: [installDevTest]) { commandLine 'bash', '-c', venv_activate_command + "${pytest_default_env} pytest ${pytest_default_args} --junit-xml=junit.full.xml" } task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { commandLine 'bash', '-c', "${venv_activate_command} ./scripts/specgen.sh" } task capabilitySummary(type: Exec, dependsOn: [codegen, installDevTest]) { inputs.files( file('scripts/capability_summary.py'), file('scripts/docgen_types.py'), project.fileTree(dir: "src/datahub/ingestion/source", include: "**/*.py") ) commandLine 'bash', '-c', "${venv_activate_command} python scripts/capability_summary.py --output-dir ./src/datahub/ingestion/autogenerated" } task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) { def sourceFile = file('./src/datahub/ingestion/autogenerated/capability_summary.json') if (!sourceFile.exists()) { // Doing it like this cuts docGen time from 15 seconds to 9 seconds locally // This can further reduce if we generate more things in the future dependsOn capabilitySummary } def docsOutDir = "${rootProject.projectDir}/docs/generated/ingestion" def extraDocsDir = "${rootProject.projectDir}/metadata-ingestion/docs/sources" def capabilitySummaryFile = "${rootProject.projectDir}/metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json" inputs.files( file('scripts/docgen.py'), file(capabilitySummaryFile), project.fileTree(dir: extraDocsDir, include: "**/*.md") ) outputs.dir(docsOutDir) commandLine 'bash', '-c', venv_activate_command + "rm -rf ${docsOutDir} && " + "python scripts/docgen.py --out-dir ${docsOutDir} --capability-summary ${capabilitySummaryFile} --extra-docs ${extraDocsDir}" } task cleanPythonCache(type: Exec) { commandLine 'bash', '-c', "find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) { commandLine 'bash', '-c', venv_activate_command + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' } build.dependsOn install check.dependsOn lint check.dependsOn testQuick clean { delete venv_name delete 'build' delete 'dist' delete 'src/datahub/metadata' delete '../docs/generated' delete 'generated' delete '.ruff_cache' delete '.mypy_cache' delete '.pytest_cache' delete '.preflight_sentinel' delete '.coverage' delete 'scripts/cov_html' } clean.dependsOn cleanPythonCache idea { module { sourceDirs += file('src') testSourceDirs += file('tests') } }