mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 03:09:12 +00:00
285 lines
11 KiB
Groovy
285 lines
11 KiB
Groovy
plugins {
|
|
id 'base'
|
|
}
|
|
|
|
apply from: "../gradle/coverage/python-coverage.gradle"
|
|
|
|
ext {
|
|
python_executable = 'python3'
|
|
venv_name = 'venv'
|
|
}
|
|
|
|
ext.venv_activate_command = "set +x && source ${venv_name}/bin/activate && set -x && "
|
|
|
|
if (!project.hasProperty("extra_pip_requirements")) {
|
|
ext.extra_pip_requirements = ""
|
|
}
|
|
|
|
task checkPythonVersion(type: Exec) {
|
|
commandLine python_executable, '-c',
|
|
'import sys; sys.version_info >= (3, 9), f"Python version {sys.version_info[:2]} not allowed"'
|
|
}
|
|
|
|
task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
|
|
def sentinel_file = "${venv_name}/.venv_environment_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
"if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " +
|
|
"set -x && " +
|
|
// If we already have uv available, use it to upgrade uv. Otherwise, install it with pip.
|
|
"if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
task installPackageOnly(type: Exec, dependsOn: environmentSetup) {
|
|
def sentinel_file = "${venv_name}/.build_install_package_only_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install -e . &&" +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
task installPackage(type: Exec, dependsOn: installPackageOnly) {
|
|
def sentinel_file = "${venv_name}/.build_install_package_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install -e . ${extra_pip_requirements} && " +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
task codegen(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) {
|
|
inputs.files(
|
|
project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc"),
|
|
project.fileTree(dir: "scripts"),
|
|
)
|
|
outputs.dir('src/datahub/metadata')
|
|
commandLine 'bash', '-c', "${venv_activate_command} ./scripts/codegen.sh"
|
|
}
|
|
|
|
task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) {
|
|
def package_name = project.findProperty('package_name')
|
|
def package_version = project.findProperty('package_version')
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install build && " +
|
|
"./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'"
|
|
}
|
|
|
|
task install(dependsOn: [installPackage, codegen])
|
|
|
|
task installDev(type: Exec, dependsOn: [install]) {
|
|
def sentinel_file = "${venv_name}/.build_install_dev_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install -e .[dev] ${extra_pip_requirements} && " +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
task installAll(type: Exec, dependsOn: [install]) {
|
|
def sentinel_file = "${venv_name}/.build_install_all_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install -e .[all] ${extra_pip_requirements} && " +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
task modelDocGen(type: Exec, dependsOn: [codegen]) {
|
|
def outdir = './generated/docs'
|
|
def docsOutdir = '../docs/generated/metamodel'
|
|
def datahubRoot = '..'
|
|
def schemasRoot = "${datahubRoot}/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro/"
|
|
def entityRegistry = "${datahubRoot}/metadata-models/src/main/resources/entity-registry.yml"
|
|
def metadataModelDocsRoot = "${datahubRoot}/metadata-models/docs"
|
|
|
|
inputs.files(
|
|
file('scripts/modeldocgen.py'),
|
|
project.fileTree(dir: "../metadata-models/docs/entities/", include: "**/*.md"),
|
|
project.fileTree(dir: "examples/", include: "**/*.py"),
|
|
project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc")
|
|
)
|
|
outputs.dir('../docs/generated/metamodel')
|
|
|
|
commandLine 'bash', '-c', "${venv_activate_command} python scripts/modeldocgen.py ${schemasRoot} --registry ${entityRegistry} --generated-docs-dir ${docsOutdir} --file ${outdir}/metadata_model_mces.json --extra-docs ${metadataModelDocsRoot}"
|
|
}
|
|
|
|
task lineageGen(type: Exec, dependsOn: [codegen]) {
|
|
def datahubRoot = '..'
|
|
def schemasRoot = "${datahubRoot}/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro/"
|
|
def entityRegistry = "${datahubRoot}/metadata-models/src/main/resources/entity-registry.yml"
|
|
def lineageOutput = "src/datahub/ingestion/autogenerated/lineage.json"
|
|
|
|
inputs.files(
|
|
file('scripts/modeldocgen.py'),
|
|
project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc")
|
|
)
|
|
outputs.file(lineageOutput)
|
|
|
|
commandLine 'bash', '-c', "${venv_activate_command} python scripts/modeldocgen.py ${schemasRoot} --registry ${entityRegistry} --generated-docs-dir /tmp --lineage-output ${lineageOutput}"
|
|
}
|
|
|
|
task testScripts(type: Exec, dependsOn: [installDev]) {
|
|
inputs.files(project.fileTree(dir: "scripts/tests/", include: "**/*.py"))
|
|
outputs.dir("${venv_name}")
|
|
|
|
commandLine 'bash', '-c', "set +x && source ${project.projectDir}/${venv_name}/bin/activate && set -x && cd ${project.projectDir}/scripts && cp ../setup.cfg . && PYTHONPATH=${project.projectDir} pytest -vvv tests/ --cov=. --cov-report=term --cov-report=html:./cov_html && rm setup.cfg"
|
|
}
|
|
|
|
task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) {
|
|
commandLine 'bash', '-c', "${venv_activate_command} ./scripts/modeldocupload.sh"
|
|
}
|
|
|
|
task lint(type: Exec, dependsOn: installDev) {
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"ruff check scripts/ scripts/tests/ src/ tests/ examples/ && " +
|
|
"ruff format --check scripts/ scripts/tests/ src/ tests/ examples/ && " +
|
|
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
|
|
}
|
|
|
|
task lintFix(type: Exec, dependsOn: installDev) {
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"ruff check --fix scripts/ scripts/tests/ src/ tests/ examples/ && " +
|
|
"ruff format scripts/ scripts/tests/ src/ tests/ examples/ "
|
|
}
|
|
|
|
def pytest_default_env = "" // set to "PYTHONDEVMODE=1" to warn on unclosed sockets
|
|
def pytest_default_args = "--durations=20 -vv --continue-on-collection-errors"
|
|
|
|
task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJsonSchema']) {
|
|
// We can't enforce the coverage requirements if we run a subset of the tests.
|
|
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
|
|
inputs.files(project.fileTree(dir: "tests/"))
|
|
outputs.dir("${venv_name}")
|
|
def cvg_arg = get_coverage_args("quick")
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"${pytest_default_env} pytest ${cvg_arg} tests/unit ${pytest_default_args} --random-order -m 'not integration' --junit-xml=junit.quick.xml"
|
|
}
|
|
|
|
task installDevTest(type: Exec, dependsOn: [install]) {
|
|
def sentinel_file = "${venv_name}/.build_install_dev_test_sentinel"
|
|
inputs.file file('setup.py')
|
|
outputs.dir("${venv_name}")
|
|
outputs.file(sentinel_file)
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " +
|
|
"touch ${sentinel_file}"
|
|
}
|
|
|
|
def testFile = hasProperty('testFile') ? testFile : 'unknown'
|
|
task testSingle(dependsOn: [installDevTest]) {
|
|
doLast {
|
|
if (testFile != 'unknown') {
|
|
exec {
|
|
commandLine 'bash', '-c',
|
|
"${venv_activate_command} ${pytest_default_env} pytest ${testFile} ${pytest_default_args}"
|
|
}
|
|
} else {
|
|
throw new GradleException("No file provided. Use -PtestFile=<test_file>")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create testIntegrationBatch0 through testIntegrationBatch5 tasks
|
|
(0..5).each { batchNum ->
|
|
tasks.register("testIntegrationBatch${batchNum}", Exec) {
|
|
dependsOn installDevTest
|
|
def cvg_arg = get_coverage_args("intBatch${batchNum}")
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"${pytest_default_env} pytest ${cvg_arg} ${pytest_default_args} -m 'integration_batch_${batchNum}' --junit-xml=junit.integrationbatch${batchNum}.xml"
|
|
}
|
|
}
|
|
|
|
task testFull(type: Exec, dependsOn: [installDevTest]) {
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"${pytest_default_env} pytest ${pytest_default_args} --junit-xml=junit.full.xml"
|
|
}
|
|
|
|
task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
|
|
commandLine 'bash', '-c', "${venv_activate_command} ./scripts/specgen.sh"
|
|
}
|
|
|
|
task capabilitySummary(type: Exec, dependsOn: [codegen, installDevTest]) {
|
|
inputs.files(
|
|
file('scripts/capability_summary.py'),
|
|
file('scripts/docgen_types.py'),
|
|
project.fileTree(dir: "src/datahub/ingestion/source", include: "**/*.py")
|
|
)
|
|
commandLine 'bash', '-c', "${venv_activate_command} python scripts/capability_summary.py --output-dir ./src/datahub/ingestion/autogenerated"
|
|
}
|
|
|
|
task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) {
|
|
def sourceFile = file('./src/datahub/ingestion/autogenerated/capability_summary.json')
|
|
if (!sourceFile.exists()) {
|
|
// Doing it like this cuts docGen time from 15 seconds to 9 seconds locally
|
|
// This can further reduce if we generate more things in the future
|
|
dependsOn capabilitySummary
|
|
}
|
|
|
|
def docsOutDir = "${rootProject.projectDir}/docs/generated/ingestion"
|
|
def extraDocsDir = "${rootProject.projectDir}/metadata-ingestion/docs/sources"
|
|
def capabilitySummaryFile = "${rootProject.projectDir}/metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json"
|
|
|
|
inputs.files(
|
|
file('scripts/docgen.py'),
|
|
file(capabilitySummaryFile),
|
|
project.fileTree(dir: extraDocsDir, include: "**/*.md")
|
|
)
|
|
outputs.dir(docsOutDir)
|
|
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
"rm -rf ${docsOutDir} && " +
|
|
"python scripts/docgen.py --out-dir ${docsOutDir} --capability-summary ${capabilitySummaryFile} --extra-docs ${extraDocsDir}"
|
|
}
|
|
|
|
task cleanPythonCache(type: Exec) {
|
|
commandLine 'bash', '-c',
|
|
"find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
|
|
}
|
|
task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) {
|
|
commandLine 'bash', '-c',
|
|
venv_activate_command +
|
|
'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
|
|
}
|
|
|
|
build.dependsOn install
|
|
check.dependsOn lint
|
|
check.dependsOn testQuick
|
|
|
|
clean {
|
|
delete venv_name
|
|
delete 'build'
|
|
delete 'dist'
|
|
delete 'src/datahub/metadata'
|
|
delete '../docs/generated'
|
|
delete 'generated'
|
|
delete '.ruff_cache'
|
|
delete '.mypy_cache'
|
|
delete '.pytest_cache'
|
|
delete '.preflight_sentinel'
|
|
delete '.coverage'
|
|
delete 'scripts/cov_html'
|
|
}
|
|
clean.dependsOn cleanPythonCache
|
|
|
|
idea {
|
|
module {
|
|
sourceDirs += file('src')
|
|
testSourceDirs += file('tests')
|
|
}
|
|
}
|