feat(python): split out temp wheel builds (#12157)

This commit is contained in:
Harshal Sheth 2024-12-19 11:02:37 -05:00 committed by GitHub
parent e45f548910
commit 08605a95a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 304 additions and 49 deletions

View File

@ -27,7 +27,6 @@ jobs:
airflow-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
@ -69,7 +68,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
with:
name: Test Results (Airflow Plugin ${{ matrix.python-version}})
@ -93,7 +92,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}

View File

@ -27,7 +27,6 @@ jobs:
dagster-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
@ -44,7 +43,8 @@ jobs:
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v4
- uses: gradle/actions/setup-gradle@v3
- uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@ -56,7 +56,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
with:
name: Test Results (dagster Plugin ${{ matrix.python-version}})
@ -79,7 +79,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}

View File

@ -27,7 +27,6 @@ jobs:
gx-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
@ -48,7 +47,8 @@ jobs:
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v4
- uses: gradle/actions/setup-gradle@v3
- uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@ -60,7 +60,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
name: Test Results (GX Plugin ${{ matrix.python-version}})
@ -83,7 +83,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}

View File

@ -28,7 +28,6 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 40
env:
SPARK_VERSION: 3.3.2
DATAHUB_TELEMETRY_ENABLED: false
# TODO: Enable this once the test is fixed.
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
@ -84,9 +83,9 @@ jobs:
df -hl
docker image ls
docker system df
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: Test Results (metadata ingestion ${{ matrix.python-version }})
name: Test Results (metadata ingestion ${{ matrix.python-version }} ${{ matrix.command }})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
@ -100,14 +99,14 @@ jobs:
directory: ./build/coverage-reports/
fail_ci_if_error: false
flags: pytest-${{ matrix.command }}
name: pytest-${{ matrix.command }}
name: pytest-${{ matrix.python-version }}-${{ matrix.command }}
verbose: true
event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}

View File

@ -27,25 +27,20 @@ jobs:
prefect-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
include:
- python-version: "3.8"
- python-version: "3.9"
- python-version: "3.10"
fail-fast: false
steps:
- name: Set up JDK 17
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
@ -56,7 +51,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ always() && matrix.python-version == '3.10'}}
with:
name: Test Results (Prefect Plugin ${{ matrix.python-version}})
@ -72,7 +67,7 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
directory: ./build/coverage-reports/
fail_ci_if_error: false
flags: prefect,prefect-${{ matrix.extra_pip_extras }}
flags: prefect,prefect-${{ matrix.python-version }}
name: pytest-prefect-${{ matrix.python-version }}
verbose: true
@ -80,7 +75,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}

View File

@ -0,0 +1,64 @@
name: Python Build
on:
push:
branches:
- master
paths:
- ".github/workflows/python-build-pages.yml"
- "metadata-ingestion/**"
- "metadata-ingestion-modules/**"
- "metadata-models/**"
pull_request:
branches:
- "**"
paths:
- ".github/workflows/python-build-pages.yml"
- "metadata-ingestion/**"
- "metadata-ingestion-modules/**"
- "metadata-models/**"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
deploy-pages:
runs-on: ubuntu-latest
if: ${{ vars.CLOUDFLARE_WHEELS_PROJECT_NAME != '' }}
name: Python Wheels
permissions:
contents: read
pull-requests: read
deployments: write
steps:
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Build Python wheel site
run: |
./gradlew :python-build:buildSite
env:
GITHUB_TOKEN: ${{ github.token }}
- name: Publish
uses: cloudflare/pages-action@v1
with:
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
projectName: ${{ vars.CLOUDFLARE_WHEELS_PROJECT_NAME }}
workingDirectory: python-build
directory: site
gitHubToken: ${{ github.token }}

View File

@ -83,11 +83,7 @@ task yarnInstall(type: YarnTask) {
task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall,
generateGraphQLSchema, generateJsonSchema,
':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen',
':metadata-ingestion:buildWheel',
':metadata-ingestion-modules:airflow-plugin:buildWheel',
':metadata-ingestion-modules:dagster-plugin:buildWheel',
':metadata-ingestion-modules:prefect-plugin:buildWheel',
':metadata-ingestion-modules:gx-plugin:buildWheel',
':python-build:buildWheels',
]) {
inputs.files(projectMdFiles)
outputs.cacheIf { true }

View File

@ -573,26 +573,20 @@ function write_markdown_file(
function copy_python_wheels(): void {
// Copy the built wheel files to the static directory.
const wheel_dirs = [
"../metadata-ingestion/dist",
"../metadata-ingestion-modules/airflow-plugin/dist",
"../metadata-ingestion-modules/dagster-plugin/dist",
"../metadata-ingestion-modules/prefect-plugin/dist",
"../metadata-ingestion-modules/gx-plugin/dist",
];
// Everything is copied to the python-build directory first, so
// we just need to copy from there.
const wheel_dir = "../python-build/wheels";
const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels");
fs.mkdirSync(wheel_output_directory, { recursive: true });
for (const wheel_dir of wheel_dirs) {
const wheel_files = fs.readdirSync(wheel_dir);
for (const wheel_file of wheel_files) {
const src = path.join(wheel_dir, wheel_file);
const dest = path.join(wheel_output_directory, wheel_file);
const wheel_files = fs.readdirSync(wheel_dir);
for (const wheel_file of wheel_files) {
const src = path.join(wheel_dir, wheel_file);
const dest = path.join(wheel_output_directory, wheel_file);
// console.log(`Copying artifact ${src} to ${dest}...`);
fs.copyFileSync(src, dest);
}
// console.log(`Copying artifact ${src} to ${dest}...`);
fs.copyFileSync(src, dest);
}
}

View File

@ -23,8 +23,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
inputs.file file('setup.py')
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} && " +
"${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " +
"${python_executable} -m venv ${venv_name} && set -x && " +
"${venv_name}/bin/python -m pip install --upgrade uv && " +
"touch ${sentinel_file}"
}

3
python-build/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/wheels
/site

27
python-build/build.gradle Normal file
View File

@ -0,0 +1,27 @@
plugins {
id 'base'
}
ext {
python_executable = 'python3'
}
task checkPythonVersion(type: Exec) {
commandLine python_executable, '-c',
'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info} is too old"'
}
task buildWheels(type: Exec, dependsOn: [
checkPythonVersion,
':metadata-ingestion:buildWheel',
':metadata-ingestion-modules:airflow-plugin:buildWheel',
':metadata-ingestion-modules:dagster-plugin:buildWheel',
':metadata-ingestion-modules:prefect-plugin:buildWheel',
':metadata-ingestion-modules:gx-plugin:buildWheel',
]) {
commandLine python_executable, "copy_wheels.py"
}
task buildSite(type: Exec, dependsOn: [buildWheels]) {
commandLine python_executable, "build_site.py"
}

150
python-build/build_site.py Normal file
View File

@ -0,0 +1,150 @@
import contextlib
import json
import os
import pathlib
import shutil
import subprocess
from datetime import datetime, timezone
PYTHON_BUILD_DIR = pathlib.Path(__file__).parent
WHEEL_DIR = PYTHON_BUILD_DIR / "wheels"
SITE_OUTPUT_DIR = PYTHON_BUILD_DIR / "site"
shutil.rmtree(SITE_OUTPUT_DIR, ignore_errors=True)
SITE_OUTPUT_DIR.mkdir(parents=True)
SITE_ARTIFACT_WHEEL_DIR = SITE_OUTPUT_DIR / "artifacts" / "wheels"
SITE_ARTIFACT_WHEEL_DIR.mkdir(parents=True)
for wheel_file in WHEEL_DIR.glob("*"):
shutil.copy(wheel_file, SITE_ARTIFACT_WHEEL_DIR)
def package_name(wheel_file: pathlib.Path) -> str:
return wheel_file.name.split("-")[0].replace("_", "-")
# Get some extra context about the build
ts = datetime.now(timezone.utc).isoformat()
context_info: dict = {
"timestamp": ts,
}
# Get branch info.
with contextlib.suppress(Exception):
if branch_info := os.getenv("GITHUB_HEAD_REF"):
pass
else:
branch_info = subprocess.check_output(
["git", "branch", "--show-current"], text=True
)
context_info["branch"] = branch_info.strip()
# Get commit info.
with contextlib.suppress(Exception):
commit_info = subprocess.check_output(
["git", "log", "-1", "--pretty=%H%n%B"], text=True
)
commit_hash, commit_msg = commit_info.strip().split("\n", 1)
context_info["commit"] = {
"hash": commit_hash,
"message": commit_msg.strip(),
}
# Get PR info.
with contextlib.suppress(Exception):
pr_info = "unknown"
if github_ref := os.getenv("GITHUB_REF"):
# e.g. GITHUB_REF=refs/pull/12157/merge
parts = github_ref.split("/")
if parts[1] == "pull":
pull_number = parts[2]
pr_info = json.loads(
subprocess.check_output(
["gh", "pr", "view", pull_number, "--json", "title,number,url"],
text=True,
)
)
else:
# The `gh` CLI might be able to figure it out.
pr_info = json.loads(
subprocess.check_output(
["gh", "pr", "view", "--json", "title,number,url"], text=True
)
)
context_info["pr"] = pr_info
newline = "\n"
(SITE_OUTPUT_DIR / "index.html").write_text(
f"""
<html>
<head>
<title>DataHub Python Builds</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/semantic-ui@2.5.0/dist/semantic.min.css" integrity="sha256-cDGQ39yChhpN5vzgHbjIdGEtQ5kXE9tttCsI7VR9TuY=" crossorigin="anonymous">
<script src="https://cdn.jsdelivr.net/npm/semantic-ui@2.5.0/dist/semantic.min.js" integrity="sha256-fN8vcX2ULyTDspVTHEteK8hd3rQAb5thNiwakjAW75Q=" crossorigin="anonymous"></script>
<!-- CDN example (jsDelivr) -->
<script src="https://cdn.jsdelivr.net/npm/dayjs@1.11.13/dayjs.min.js" integrity="sha256-nP25Pzivzy0Har7NZtMr/TODzfGWdlTrwmomYF2vQXM=" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/dayjs@1.11.13/plugin/relativeTime.js" integrity="sha256-muryXOPFkVJcJO1YFmhuKyXYmGDT2TYVxivG0MCgRzg=" crossorigin="anonymous"></script>
<script>dayjs.extend(window.dayjs_plugin_relativeTime)</script>
</head>
<body>
<div class="ui container">
<h1 class="ui header" style="padding-top: 1.5em;">DataHub Python Builds</h1>
<p>
These prebuilt wheel files can be used to install our Python packages as of a specific commit.
</p>
<h2>Build context</h2>
<p>
Built <span id="build-timestamp">at {ts}</span>.
</p>
<pre id="context-info">{json.dumps(context_info, indent=2)}</pre>
<h2>Usage</h2>
<p>
Current base URL: <span class="base-url">unknown</span>
</p>
<table class="ui celled table">
<thead>
<tr>
<th>Package</th>
<th>Size</th>
<th>Install command</th>
</tr>
</thead>
<tbody>
{
newline.join(
f'''
<tr>
<td><code>{package_name(wheel_file)}</code></td>
<td>{wheel_file.stat().st_size / 1024 / 1024:.3f} MB</td>
<td><code>uv pip install '{package_name(wheel_file)} @ <span class="base-url">&lt;base-url&gt;</span>/artifacts/wheels/{wheel_file.name}'</code></td>
</tr>
'''
for wheel_file in sorted(WHEEL_DIR.glob("*.whl"))
)
}
</tbody>
</table>
</div>
</body>
<script>
const baseUrl = window.location.href.split('/').slice(0, -1).join('/');
document.querySelectorAll(".base-url").forEach(el => {{
el.textContent = baseUrl;
}});
const buildTimestamp = document.getElementById("build-timestamp");
const buildTimestampDate = dayjs('{ts}');
buildTimestamp.textContent = buildTimestampDate.fromNow();
</script>
</html>
"""
)
print("DataHub Python wheel site built in", SITE_OUTPUT_DIR)

View File

@ -0,0 +1,27 @@
import pathlib
import shutil
PYTHON_BUILD_DIR = pathlib.Path(__file__).parent
ROOT_DIR = PYTHON_BUILD_DIR.parent
WHEEL_OUTPUT_DIR = PYTHON_BUILD_DIR / "wheels"
# These should line up with the build.gradle file.
wheel_dirs = [
ROOT_DIR / "metadata-ingestion/dist",
ROOT_DIR / "metadata-ingestion-modules/airflow-plugin/dist",
ROOT_DIR / "metadata-ingestion-modules/dagster-plugin/dist",
ROOT_DIR / "metadata-ingestion-modules/prefect-plugin/dist",
ROOT_DIR / "metadata-ingestion-modules/gx-plugin/dist",
]
# Delete and recreate the output directory.
if WHEEL_OUTPUT_DIR.exists():
shutil.rmtree(WHEEL_OUTPUT_DIR)
WHEEL_OUTPUT_DIR.mkdir(parents=True)
# Copy things over.
for wheel_dir in wheel_dirs:
for wheel_file in wheel_dir.glob("*"):
shutil.copy(wheel_file, WHEEL_OUTPUT_DIR)
print("Copied wheels to", WHEEL_OUTPUT_DIR)

View File

@ -64,6 +64,7 @@ include 'metadata-ingestion-modules:airflow-plugin'
include 'metadata-ingestion-modules:gx-plugin'
include 'metadata-ingestion-modules:dagster-plugin'
include 'metadata-ingestion-modules:prefect-plugin'
include 'python-build'
include 'smoke-test'
include 'metadata-auth:auth-api'
include 'metadata-service:schema-registry-api'