feat(ingest): use uv for python package installs (#9885)

This commit is contained in:
Harshal Sheth 2024-02-26 15:02:47 -08:00 committed by GitHub
parent a1f2216da7
commit 02f41b74b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 245 additions and 237 deletions

View File

@ -1,7 +1,11 @@
**/node_modules/ **/node_modules/
*/build/ */build/
*/*/build/ */*/build/
*/venv/ **/venv/
**/.tox/
**/.mypy_cache/
**/.pytest_cache/
**/__pycache__/
out out
**/*.class **/*.class
# Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars # Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars

View File

@ -24,7 +24,7 @@ function get_tag_full {
} }
function get_python_docker_release_v { function get_python_docker_release_v {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g') echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},1!0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),1!\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,1!0.0.0+docker.pr\1,g')
} }
function get_unique_tag { function get_unique_tag {

View File

@ -38,11 +38,17 @@ jobs:
with: with:
python-version: "3.10" python-version: "3.10"
cache: "pip" cache: "pip"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Set up JDK 17 - name: Set up JDK 17
uses: actions/setup-java@v3 uses: actions/setup-java@v3
with: with:
distribution: "zulu" distribution: "zulu"
java-version: 17 java-version: 17
- uses: gradle/gradle-build-action@v2
- name: Ensure packages are correct - name: Ensure packages are correct
run: | run: |
python ./.github/scripts/check_python_package.py python ./.github/scripts/check_python_package.py
@ -978,14 +984,14 @@ jobs:
if: failure() if: failure()
run: | run: |
docker ps -a docker ps -a
docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true
docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true
docker logs datahub-mae-consumer >& mae-${{ matrix.test_strategy }}.log || true docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true
docker logs datahub-mce-consumer >& mce-${{ matrix.test_strategy }}.log || true docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true
docker logs broker >& broker-${{ matrix.test_strategy }}.log || true docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true
docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true
docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true
- name: Upload logs - name: Upload logs
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
if: failure() if: failure()

View File

@ -51,6 +51,15 @@ jobs:
java-version: 17 java-version: 17
- uses: gradle/gradle-build-action@v2 - uses: gradle/gradle-build-action@v2
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- uses: actions/setup-python@v4 - uses: actions/setup-python@v4
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}

View File

@ -50,7 +50,7 @@ RUN apt-get update && apt-get install -y -qq \
ldap-utils \ ldap-utils \
unixodbc \ unixodbc \
libodbc2 \ libodbc2 \
&& python -m pip install --no-cache --upgrade pip wheel setuptools \ && python -m pip install --no-cache --upgrade pip uv>=0.1.10 wheel setuptools \
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/* && rm -rf /var/lib/apt/lists/* /var/cache/apk/*
# compiled against newer golang for security fixes # compiled against newer golang for security fixes
@ -59,16 +59,22 @@ COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin
COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh
RUN pip install --no-cache -r requirements.txt && \ RUN addgroup --gid 1000 datahub && \
pip uninstall -y acryl-datahub && \ adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub && \
chmod +x /entrypoint.sh && \ chmod +x /entrypoint.sh
addgroup --gid 1000 datahub && \
adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub USER datahub
ENV VIRTUAL_ENV=/datahub-ingestion/.venv
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
RUN python3 -m venv $VIRTUAL_ENV && \
uv pip install --no-cache -r requirements.txt && \
pip uninstall -y acryl-datahub
ENTRYPOINT [ "/entrypoint.sh" ] ENTRYPOINT [ "/entrypoint.sh" ]
FROM ${BASE_IMAGE} as full-install FROM ${BASE_IMAGE} as full-install
USER 0
RUN apt-get update && apt-get install -y -qq \ RUN apt-get update && apt-get install -y -qq \
default-jre-headless \ default-jre-headless \
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/* && rm -rf /var/lib/apt/lists/* /var/cache/apk/*
@ -91,10 +97,11 @@ RUN if [ $(arch) = "x86_64" ]; then \
ldconfig; \ ldconfig; \
fi; fi;
USER datahub
FROM ${BASE_IMAGE} as slim-install FROM ${BASE_IMAGE} as slim-install
# Do nothing else on top of base # Do nothing else on top of base
FROM ${APP_ENV}-install FROM ${APP_ENV}-install
USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH" ENV PATH="/datahub-ingestion/.local/bin:$PATH"

View File

@ -3,22 +3,20 @@ acryl-datahub-classify==0.0.9
acryl-PyHive==0.6.16 acryl-PyHive==0.6.16
acryl-sqlglot==20.4.1.dev14 acryl-sqlglot==20.4.1.dev14
aenum==3.1.15 aenum==3.1.15
aiohttp==3.9.1 aiohttp==3.9.3
aiosignal==1.3.1 aiosignal==1.3.1
alembic==1.13.1 alembic==1.13.1
altair==4.2.0 altair==4.2.0
annotated-types==0.6.0 anyio==4.3.0
anyio==3.7.1
apache-airflow==2.7.3 apache-airflow==2.7.3
apache-airflow-providers-common-sql==1.9.0 apache-airflow-providers-common-sql==1.11.0
apache-airflow-providers-ftp==3.7.0 apache-airflow-providers-ftp==3.7.0
apache-airflow-providers-http==4.8.0 apache-airflow-providers-http==4.9.1
apache-airflow-providers-imap==3.5.0 apache-airflow-providers-imap==3.5.0
apache-airflow-providers-sqlite==3.6.0 apache-airflow-providers-sqlite==3.7.1
apispec==6.3.1 apispec==6.4.0
appdirs==1.4.4 appnope==0.1.4
appnope==0.1.3 argcomplete==3.2.2
argcomplete==3.2.1
argon2-cffi==23.1.0 argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0 argon2-cffi-bindings==21.2.0
asgiref==3.7.2 asgiref==3.7.2
@ -26,25 +24,24 @@ asn1crypto==1.5.1
asttokens==2.4.1 asttokens==2.4.1
async-timeout==4.0.3 async-timeout==4.0.3
asynch==0.2.3 asynch==0.2.3
attrs==23.1.0 attrs==23.2.0
avro==1.11.3 avro==1.11.3
avro-gen3==0.7.11 avro-gen3==0.7.11
Babel==2.14.0 Babel==2.14.0
backoff==2.2.1 backoff==2.2.1
beautifulsoup4==4.12.2 beautifulsoup4==4.12.3
bleach==6.1.0 bleach==6.1.0
blinker==1.7.0 blinker==1.7.0
blis==0.7.11 blis==0.7.11
boto3==1.34.8 boto3==1.34.49
botocore==1.34.8 botocore==1.34.49
bowler==0.9.0
bracex==2.4 bracex==2.4
cached-property==1.5.2 cached-property==1.5.2
cachelib==0.9.0 cachelib==0.9.0
cachetools==5.3.2 cachetools==5.3.2
catalogue==2.0.10 catalogue==2.0.10
cattrs==23.2.3 cattrs==23.2.3
certifi==2023.11.17 certifi==2024.2.2
cffi==1.16.0 cffi==1.16.0
chardet==5.2.0 chardet==5.2.0
charset-normalizer==3.3.2 charset-normalizer==3.3.2
@ -53,33 +50,31 @@ click==8.1.7
click-default-group==1.2.4 click-default-group==1.2.4
click-spinner==0.1.10 click-spinner==0.1.10
clickclick==20.10.2 clickclick==20.10.2
clickhouse-driver==0.2.6 clickhouse-driver==0.2.7
clickhouse-sqlalchemy==0.2.4 clickhouse-sqlalchemy==0.2.4
cloudpickle==3.0.0 cloudpickle==3.0.0
colorama==0.4.6 colorama==0.4.6
colorlog==4.8.0 colorlog==4.8.0
comm==0.2.0 comm==0.2.1
confection==0.1.4 confection==0.1.4
ConfigUpdater==3.2 ConfigUpdater==3.2
confluent-kafka==2.3.0 confluent-kafka==2.3.0
connexion==2.14.2 connexion==2.14.1
cron-descriptor==1.4.0 cron-descriptor==1.4.3
croniter==2.0.1 croniter==2.0.1
cryptography==41.0.7 cryptography==42.0.4
cx-Oracle==8.3.0 cx_Oracle==8.3.0
cymem==2.0.8 cymem==2.0.8
dask==2023.12.1
databricks-cli==0.18.0
databricks-dbapi==0.6.0 databricks-dbapi==0.6.0
databricks-sdk==0.15.0 databricks-sdk==0.20.0
databricks-sql-connector==2.9.3 databricks-sql-connector==2.9.4
debugpy==1.8.0 debugpy==1.8.1
decorator==5.1.1 decorator==5.1.1
defusedxml==0.7.1 defusedxml==0.7.1
deltalake==0.14.0 deltalake==0.15.3
Deprecated==1.2.14 Deprecated==1.2.14
dill==0.3.7 dill==0.3.8
dnspython==2.4.2 dnspython==2.6.1
docker==7.0.0 docker==7.0.0
docutils==0.20.1 docutils==0.20.1
ecdsa==0.18.0 ecdsa==0.18.0
@ -90,25 +85,22 @@ et-xmlfile==1.1.0
exceptiongroup==1.2.0 exceptiongroup==1.2.0
executing==2.0.1 executing==2.0.1
expandvars==0.12.0 expandvars==0.12.0
fastapi==0.109.1 fastavro==1.9.4
fastavro==1.9.2 fastjsonschema==2.19.1
fastjsonschema==2.19.0
feast==0.31.1
filelock==3.13.1 filelock==3.13.1
fissix==21.11.13
Flask==2.2.5 Flask==2.2.5
flatdict==4.0.1 flatdict==4.0.1
frozenlist==1.4.1 frozenlist==1.4.1
fsspec==2023.12.2 fsspec==2023.12.2
future==0.18.3 future==1.0.0
GeoAlchemy2==0.14.3 GeoAlchemy2==0.14.4
gitdb==4.0.11 gitdb==4.0.11
GitPython==3.1.41 GitPython==3.1.42
google-api-core==2.15.0 google-api-core==2.17.1
google-auth==2.25.2 google-auth==2.28.1
google-cloud-appengine-logging==1.4.0 google-cloud-appengine-logging==1.4.2
google-cloud-audit-log==0.2.5 google-cloud-audit-log==0.2.5
google-cloud-bigquery==3.14.1 google-cloud-bigquery==3.17.2
google-cloud-core==2.4.1 google-cloud-core==2.4.1
google-cloud-datacatalog-lineage==0.2.2 google-cloud-datacatalog-lineage==0.2.2
google-cloud-logging==3.5.0 google-cloud-logging==3.5.0
@ -116,98 +108,92 @@ google-crc32c==1.5.0
google-re2==1.1 google-re2==1.1
google-resumable-media==2.7.0 google-resumable-media==2.7.0
googleapis-common-protos==1.62.0 googleapis-common-protos==1.62.0
gql==3.4.1 gql==3.5.0
graphql-core==3.2.3 graphql-core==3.2.3
graphviz==0.20.1 graphviz==0.20.1
great-expectations==0.15.50 great-expectations==0.15.50
greenlet==3.0.3 greenlet==3.0.3
grpc-google-iam-v1==0.13.0 grpc-google-iam-v1==0.13.0
grpcio==1.60.0 grpcio==1.62.0
grpcio-reflection==1.60.0 grpcio-status==1.62.0
grpcio-status==1.60.0 grpcio-tools==1.62.0
grpcio-tools==1.60.0
gssapi==1.8.3 gssapi==1.8.3
gunicorn==21.2.0 gunicorn==21.2.0
h11==0.14.0 h11==0.14.0
hdbcli==2.19.20 hdbcli==2.19.21
httpcore==1.0.2 httpcore==1.0.4
httptools==0.6.1 httpx==0.27.0
httpx==0.26.0
humanfriendly==10.0 humanfriendly==10.0
idna==3.6 idna==3.6
ijson==3.2.3 ijson==3.2.3
importlib-metadata==6.11.0 importlib-metadata==7.0.1
importlib-resources==6.1.1 importlib-resources==6.1.1
inflection==0.5.1 inflection==0.5.1
ipaddress==1.0.23 ipaddress==1.0.23
ipykernel==6.17.1 ipykernel==6.17.1
ipython==8.19.0 ipython==8.21.0
ipython-genutils==0.2.0 ipython-genutils==0.2.0
ipywidgets==8.1.1 ipywidgets==8.1.2
iso3166==2.1.1 iso3166==2.1.1
isodate==0.6.1 isodate==0.6.1
itsdangerous==2.1.2 itsdangerous==2.1.2
jedi==0.19.1 jedi==0.19.1
Jinja2==3.1.2 Jinja2==3.1.3
jmespath==1.0.1 jmespath==1.0.1
JPype1==1.5.0 JPype1==1.5.0
jsonlines==4.0.0 jsonlines==4.0.0
jsonpatch==1.33 jsonpatch==1.33
jsonpointer==2.4 jsonpointer==2.4
jsonref==1.1.0 jsonref==1.1.0
jsonschema==4.20.0 jsonschema==4.21.1
jsonschema-specifications==2023.12.1 jsonschema-specifications==2023.12.1
jupyter-server==1.24.0 jupyter-server==1.16.0
jupyter_client==7.4.9 jupyter_client==7.4.9
jupyter_core==4.12.0 jupyter_core==5.0.0
jupyterlab-widgets==3.0.9
jupyterlab_pygments==0.3.0 jupyterlab_pygments==0.3.0
jupyterlab_widgets==3.0.10
langcodes==3.3.0 langcodes==3.3.0
lark==1.1.4 lark==1.1.4
lazy-object-proxy==1.10.0 lazy-object-proxy==1.10.0
leb128==1.0.5 leb128==1.0.5
limits==3.7.0 limits==3.9.0
linear-tsv==1.1.0 linear-tsv==1.1.0
linkify-it-py==2.0.2 linkify-it-py==2.0.3
lkml==1.3.3 lkml==1.3.4
locket==1.0.0
lockfile==0.12.2 lockfile==0.12.2
looker-sdk==23.0.0 looker-sdk==23.0.0
lxml==4.9.4 lxml==5.1.0
lz4==4.3.2 lz4==4.3.3
makefun==1.15.2 makefun==1.15.2
Mako==1.3.0 Mako==1.3.2
Markdown==3.5.1 Markdown==3.5.2
markdown-it-py==3.0.0 markdown-it-py==3.0.0
MarkupSafe==2.1.3 MarkupSafe==2.1.5
marshmallow==3.20.1 marshmallow==3.20.2
marshmallow-oneofschema==3.0.1 marshmallow-oneofschema==3.1.1
marshmallow-sqlalchemy==0.26.1 marshmallow-sqlalchemy==0.26.1
matplotlib-inline==0.1.6 matplotlib-inline==0.1.6
mdit-py-plugins==0.4.0 mdit-py-plugins==0.4.0
mdurl==0.1.2 mdurl==0.1.2
mistune==3.0.2 mistune==3.0.2
mixpanel==4.10.0 mixpanel==4.10.0
mlflow-skinny==2.9.2 mlflow-skinny==2.10.2
mmh3==4.0.1
mmhash3==3.0.1 mmhash3==3.0.1
more-itertools==10.1.0 more-itertools==10.2.0
moreorless==0.4.0 moto==4.2.14
moto==4.2.12
msal==1.22.0 msal==1.22.0
multidict==6.0.4 multidict==6.0.5
murmurhash==1.0.10 murmurhash==1.0.10
mypy==1.8.0
mypy-extensions==1.0.0 mypy-extensions==1.0.0
nbclassic==1.0.0 nbclassic==1.0.0
nbclient==0.6.3 nbclient==0.6.3
nbconvert==7.13.1 nbconvert==7.16.1
nbformat==5.9.1 nbformat==5.9.2
nest-asyncio==1.5.8 nest-asyncio==1.6.0
networkx==3.2.1 networkx==3.2.1
notebook==6.5.6 notebook==6.5.6
notebook_shim==0.2.3 notebook_shim==0.2.4
numpy==1.26.2 numpy==1.26.4
oauthlib==3.2.2 oauthlib==3.2.2
okta==1.7.0 okta==1.7.0
openlineage-airflow==1.2.0 openlineage-airflow==1.2.0
@ -215,133 +201,127 @@ openlineage-integration-common==1.2.0
openlineage-python==1.2.0 openlineage-python==1.2.0
openlineage_sql==1.2.0 openlineage_sql==1.2.0
openpyxl==3.1.2 openpyxl==3.1.2
opentelemetry-api==1.22.0 opentelemetry-api==1.16.0
opentelemetry-exporter-otlp==1.22.0 opentelemetry-exporter-otlp==1.16.0
opentelemetry-exporter-otlp-proto-common==1.22.0 opentelemetry-exporter-otlp-proto-grpc==1.16.0
opentelemetry-exporter-otlp-proto-grpc==1.22.0 opentelemetry-exporter-otlp-proto-http==1.16.0
opentelemetry-exporter-otlp-proto-http==1.22.0 opentelemetry-proto==1.16.0
opentelemetry-proto==1.22.0 opentelemetry-sdk==1.16.0
opentelemetry-sdk==1.22.0 opentelemetry-semantic-conventions==0.37b0
opentelemetry-semantic-conventions==0.43b0
ordered-set==4.1.0 ordered-set==4.1.0
packaging==23.2 packaging==23.2
pandas==1.5.3 pandas==2.2.1
pandavro==1.5.2 pandocfilters==1.5.1
pandocfilters==1.5.0 parse==1.20.1
parse==1.20.0
parso==0.8.3 parso==0.8.3
partd==1.4.1 pathlib_abc==0.1.1
pathspec==0.12.1 pathspec==0.12.1
pathy==0.10.3 pathy==0.11.0
pendulum==2.1.2 pendulum==2.1.2
pexpect==4.9.0 pexpect==4.9.0
phonenumbers==8.13.0 phonenumbers==8.13.0
platformdirs==3.11.0 platformdirs==3.11.0
pluggy==1.3.0 pluggy==1.4.0
preshed==3.0.9 preshed==3.0.9
prison==0.2.1 prison==0.2.1
progressbar2==4.3.2 progressbar2==4.3.2
prometheus-client==0.19.0 prometheus_client==0.20.0
prompt-toolkit==3.0.43 prompt-toolkit==3.0.43
proto-plus==1.23.0 proto-plus==1.23.0
protobuf==4.25.1 protobuf==4.25.3
psutil==5.9.7 psutil==5.9.8
psycopg2-binary==2.9.9 psycopg2-binary==2.9.9
ptyprocess==0.7.0 ptyprocess==0.7.0
pure-eval==0.2.2 pure-eval==0.2.2
pure-sasl==0.6.2 pure-sasl==0.6.2
py-partiql-parser==0.5.0 py-partiql-parser==0.5.0
pyarrow==11.0.0 pyarrow==12.0.1
pyarrow-hotfix==0.6
pyasn1==0.5.1 pyasn1==0.5.1
pyasn1-modules==0.3.0 pyasn1-modules==0.3.0
pyathena==2.25.2 pyathena==2.25.2
pycountry==23.12.11 pycountry==23.12.11
pycparser==2.21 pycparser==2.21
pycryptodome==3.19.0 pycryptodome==3.20.0
pydantic==1.10.13 pydantic==1.10.14
pydantic_core==2.14.6 pydash==7.0.7
pydash==7.0.6
pydruid==0.6.6 pydruid==0.6.6
Pygments==2.17.2 Pygments==2.17.2
pyiceberg==0.4.0 pyiceberg==0.4.0
pymongo==4.6.1 pymongo==4.6.2
PyMySQL==1.1.0 PyMySQL==1.1.0
pyOpenSSL==23.3.0 pyOpenSSL==24.0.0
pyparsing==3.0.9 pyparsing==3.0.9
pyspnego==0.10.2 pyspnego==0.10.2
python-daemon==3.0.1 python-daemon==3.0.1
python-dateutil==2.8.2 python-dateutil==2.8.2
python-dotenv==1.0.0
python-jose==3.3.0 python-jose==3.3.0
python-ldap==3.4.4 python-ldap==3.4.4
python-nvd3==0.15.0 python-nvd3==0.15.0
python-slugify==8.0.1 python-slugify==8.0.4
python-stdnum==1.19 python-stdnum==1.19
python-tds==1.14.0 python-tds==1.15.0
python-utils==3.8.1 python-utils==3.8.2
python3-openid==3.2.0 pytz==2023.4
pytz==2023.3.post1
pytzdata==2020.1 pytzdata==2020.1
PyYAML==6.0.1 PyYAML==6.0.1
pyzmq==24.0.1 pyzmq==24.0.1
redash-toolbelt==0.1.9 redash-toolbelt==0.1.9
redshift-connector==2.0.918 redshift-connector==2.1.0
referencing==0.32.0 referencing==0.33.0
regex==2023.12.25 regex==2023.12.25
requests==2.31.0 requests==2.31.0
requests-file==1.5.1 requests-file==2.0.0
requests-gssapi==1.2.3 requests-gssapi==1.3.0
requests-ntlm==1.2.0 requests-ntlm==1.2.0
requests-toolbelt==0.10.1 requests-toolbelt==1.0.0
responses==0.24.1 responses==0.25.0
rfc3339-validator==0.1.4 rfc3339-validator==0.1.4
rfc3986==2.0.0 rfc3986==2.0.0
rich==13.7.0 rich==13.7.0
rich-argparse==1.4.0 rich-argparse==1.4.0
rpds-py==0.15.2 rpds-py==0.18.0
rsa==4.9 rsa==4.9
ruamel.yaml==0.17.17 ruamel.yaml==0.17.17
ruamel.yaml.clib==0.2.8
s3transfer==0.10.0 s3transfer==0.10.0
schwifty==2023.11.2 schwifty==2024.1.1.post0
scipy==1.11.4 scipy==1.12.0
scramp==1.4.4 scramp==1.4.4
Send2Trash==1.8.2 Send2Trash==1.8.2
sentry-sdk==1.39.1 sentry-sdk==1.40.5
setproctitle==1.3.3 setproctitle==1.3.3
simple-salesforce==1.12.5 simple-salesforce==1.12.5
six==1.16.0 six==1.16.0
slack-sdk==3.18.1
smart-open==6.4.0 smart-open==6.4.0
smmap==5.0.1 smmap==5.0.1
sniffio==1.3.0 sniffio==1.3.0
snowflake-connector-python==3.6.0 snowflake-connector-python==3.7.1
snowflake-sqlalchemy==1.5.1 snowflake-sqlalchemy==1.5.1
sortedcontainers==2.4.0 sortedcontainers==2.4.0
soupsieve==2.5 soupsieve==2.5
spacy==3.4.3 spacy==3.5.0
spacy-legacy==3.0.12 spacy-legacy==3.0.12
spacy-loggers==1.0.5 spacy-loggers==1.0.5
sql-metadata==2.2.2 sql-metadata==2.2.2
SQLAlchemy==1.4.44 SQLAlchemy==1.4.44
sqlalchemy-bigquery==1.9.0 sqlalchemy-bigquery==1.9.0
sqlalchemy-hana==1.1.1 sqlalchemy-hana==1.3.0
SQLAlchemy-JSONField==1.0.2 SQLAlchemy-JSONField==1.0.2
sqlalchemy-pytds==0.3.5 sqlalchemy-pytds==0.3.5
sqlalchemy-redshift==0.8.14 sqlalchemy-redshift==0.8.14
SQLAlchemy-Utils==0.41.1 SQLAlchemy-Utils==0.41.1
sqlalchemy2-stubs==0.0.2a37
sqllineage==1.3.8 sqllineage==1.3.8
sqlparse==0.4.4 sqlparse==0.4.4
srsly==2.4.8 srsly==2.4.8
stack-data==0.6.3 stack-data==0.6.3
starlette==0.32.0.post1
strictyaml==1.7.3 strictyaml==1.7.3
tableauserverclient==0.25 tableauserverclient==0.25
tableschema==1.20.2 tableschema==1.20.2
tabulate==0.9.0 tabulate==0.9.0
tabulator==1.53.5 tabulator==1.53.5
tenacity==8.2.3 tenacity==8.2.3
teradatasql==20.0.0.2 teradatasql==20.0.0.7
teradatasqlalchemy==17.20.0.0 teradatasqlalchemy==17.20.0.0
termcolor==2.4.0 termcolor==2.4.0
terminado==0.18.0 terminado==0.18.0
@ -351,38 +331,31 @@ thrift==0.16.0
thrift-sasl==0.4.3 thrift-sasl==0.4.3
tinycss2==1.2.1 tinycss2==1.2.1
toml==0.10.2 toml==0.10.2
tomli==2.0.1
tomlkit==0.12.3 tomlkit==0.12.3
toolz==0.12.0 toolz==0.12.1
tornado==6.4 tornado==6.4
tqdm==4.66.1 tqdm==4.66.2
traitlets==5.2.1.post0 traitlets==5.2.1.post0
trino==0.327.0 trino==0.328.0
typeguard==2.13.3
typer==0.7.0 typer==0.7.0
typing-inspect==0.9.0 typing-inspect==0.9.0
typing_extensions==4.9.0 typing_extensions==4.9.0
tzdata==2024.1
tzlocal==5.2 tzlocal==5.2
uc-micro-py==1.0.2 uc-micro-py==1.0.3
ujson==5.9.0 ujson==5.9.0
unicodecsv==0.14.1 unicodecsv==0.14.1
universal-pathlib==0.1.4
urllib3==1.26.18 urllib3==1.26.18
uvicorn==0.25.0
uvloop==0.19.0
vertica-python==1.3.8 vertica-python==1.3.8
vertica-sqlalchemy-dialect==0.0.8.1 vertica-sqlalchemy-dialect==0.0.8.1
vininfo==1.7.0 vininfo==1.8.0
volatile==2.1.0 wasabi==1.1.2
wasabi==0.10.1 wcmatch==8.5.1
watchfiles==0.21.0 wcwidth==0.2.13
wcmatch==8.5
wcwidth==0.2.12
webencodings==0.5.1 webencodings==0.5.1
websocket-client==1.7.0 websocket-client==1.7.0
websockets==12.0 Werkzeug==2.3.8
Werkzeug==2.2.3 widgetsnbextension==4.0.10
widgetsnbextension==4.0.9
wrapt==1.16.0 wrapt==1.16.0
WTForms==3.0.1 WTForms==3.0.1
xlrd==2.0.1 xlrd==2.0.1

View File

@ -1,10 +1,10 @@
#!/usr/bin/bash #!/usr/bin/bash
if [ ! -z "$ACTIONS_EXTRA_PACKAGES" ]; then if [ -n "$ACTIONS_EXTRA_PACKAGES" ]; then
pip install --user $ACTIONS_EXTRA_PACKAGES uv pip install $ACTIONS_EXTRA_PACKAGES
fi fi
if [[ ! -z "$ACTIONS_CONFIG" && ! -z "$ACTIONS_EXTRA_PACKAGES" ]]; then if [[ -n "$ACTIONS_CONFIG" && -n "$ACTIONS_EXTRA_PACKAGES" ]]; then
mkdir -p /tmp/datahub/logs mkdir -p /tmp/datahub/logs
curl -q "$ACTIONS_CONFIG" -o config.yaml curl -q "$ACTIONS_CONFIG" -o config.yaml
exec dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s \ exec dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s \

View File

@ -13,14 +13,12 @@ VENV_DIR=$(mktemp -d)
python -c "import sys; assert sys.version_info >= (3, 9), 'Python 3.9 or higher is required.'" python -c "import sys; assert sys.version_info >= (3, 9), 'Python 3.9 or higher is required.'"
python -m venv $VENV_DIR python -m venv $VENV_DIR
source $VENV_DIR/bin/activate source $VENV_DIR/bin/activate
pip install --upgrade pip setuptools wheel pip install --upgrade pip uv setuptools wheel
echo "Using virtualenv at $VENV_DIR" echo "Using virtualenv at $VENV_DIR"
# Install stuff. # Install stuff.
pushd $DATAHUB_DIR/metadata-ingestion pushd $DATAHUB_DIR/metadata-ingestion
pip install -e . uv pip install -e '.[all]' -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]'
pip install -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]'
pip install -e '.[all]'
popd popd
# Generate the requirements file. # Generate the requirements file.
@ -31,6 +29,7 @@ popd
echo "# Generated requirements file. Run ./$SCRIPT_NAME to regenerate." > base-requirements.txt echo "# Generated requirements file. Run ./$SCRIPT_NAME to regenerate." > base-requirements.txt
pip freeze \ pip freeze \
| grep -v -E "^-e" \ | grep -v -E "^-e" \
| grep -v -E "^uv==" \
| grep -v "Flask-" \ | grep -v "Flask-" \
| grep -v -E "(py4j|PyJWT)==" \ | grep -v -E "(py4j|PyJWT)==" \
| grep -v -E "(pyspark|pydeequ)==" \ | grep -v -E "(pyspark|pydeequ)==" \

View File

@ -6,26 +6,23 @@ ARG PIP_MIRROR_URL=null
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
FROM $BASE_IMAGE:$DOCKER_VERSION as base FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0 USER datahub
COPY ./metadata-ingestion /datahub-ingestion COPY --chown=datahub ./metadata-ingestion /datahub-ingestion
COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin
ARG RELEASE_VERSION ARG RELEASE_VERSION
WORKDIR /datahub-ingestion WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \
sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
cat src/datahub/__init__.py && \ cat src/datahub/__init__.py | grep __version__ && \
chown -R datahub /datahub-ingestion cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__
USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
FROM base as slim-install FROM base as slim-install
ARG PIP_MIRROR_URL ARG PIP_MIRROR_URL
RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ."
FROM base as full-install-build FROM base as full-install-build
ARG PIP_MIRROR_URL ARG PIP_MIRROR_URL
@ -39,14 +36,13 @@ USER datahub
COPY ./docker/datahub-ingestion/pyspark_jars.sh . COPY ./docker/datahub-ingestion/pyspark_jars.sh .
RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base]" && \ RUN uv pip install --no-cache "acryl-datahub[base,all] @ ." "acryl-datahub-airflow-plugin[plugin-v2] @ ./airflow-plugin" && \
pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \ datahub --version
pip install --no-cache --user ".[all]"
RUN ./pyspark_jars.sh RUN ./pyspark_jars.sh
FROM base as full-install FROM base as full-install
COPY --from=full-install-build /datahub-ingestion/.local /datahub-ingestion/.local COPY --from=full-install-build ${VIRTUAL_ENV} ${VIRTUAL_ENV}
FROM base as dev-install FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image. # Dummy stage for development. Assumes code is built on your machine and mounted to this image.
@ -55,4 +51,3 @@ FROM base as dev-install
FROM ${APP_ENV}-install as final FROM ${APP_ENV}-install as final
USER datahub USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"

View File

@ -15,16 +15,15 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS
chown -R datahub /datahub-ingestion chown -R datahub /datahub-ingestion
USER datahub USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
FROM base as slim-install FROM base as slim-install
ARG PIP_MIRROR_URL ARG PIP_MIRROR_URL
RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ." && \
datahub --version
FROM slim-install as final FROM slim-install as final
USER datahub USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"

View File

@ -2,7 +2,7 @@
set -ex set -ex
PYSPARK_JARS="$(python -m site --user-site)/pyspark/jars" PYSPARK_JARS="$(python -c 'import site; print(site.getsitepackages()[0])')/pyspark/jars"
function replace_jar { function replace_jar {
JAR_PREFIX=$1 JAR_PREFIX=$1

View File

@ -18,7 +18,7 @@ if (extra_pip_extras != "") {
ext.extra_pip_extras = "," + extra_pip_extras ext.extra_pip_extras = "," + extra_pip_extras
} }
def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion" def pip_install_command = "VIRTUAL_ENV=${venv_name} ${venv_name}/bin/uv pip install -e ../../metadata-ingestion"
task checkPythonVersion(type: Exec) { task checkPythonVersion(type: Exec) {
commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 7)' commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 7)'
@ -29,8 +29,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} &&" + "${python_executable} -m venv ${venv_name} && set -x && " +
"${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0' && " + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -41,8 +41,9 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
// Workaround for https://github.com/yaml/pyyaml/issues/601. // Workaround for https://github.com/yaml/pyyaml/issues/601.
// See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577. // See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577.
// and https://github.com/datahub-project/datahub/pull/8435. // and https://github.com/datahub-project/datahub/pull/8435.
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " + "source ${venv_name}/bin/activate && set -x && " +
"pip install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " +
"${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" + "${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -53,7 +54,8 @@ task installDev(type: Exec, dependsOn: [install]) {
def sentinel_file = "${venv_name}/.build_install_dev_sentinel" def sentinel_file = "${venv_name}/.build_install_dev_sentinel"
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.file("${sentinel_file}") outputs.file("${sentinel_file}")
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev${extra_pip_extras}] ${extra_pip_requirements} && " + "${pip_install_command} -e .[dev${extra_pip_extras}] ${extra_pip_requirements} && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -72,8 +74,8 @@ task lint(type: Exec, dependsOn: installDev) {
"mypy --show-traceback --show-error-codes src/ tests/" "mypy --show-traceback --show-error-codes src/ tests/"
} }
task lintFix(type: Exec, dependsOn: installDev) { task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && " + "source ${venv_name}/bin/activate && set -x && " +
"black src/ tests/ && " + "black src/ tests/ && " +
"isort src/ tests/ && " + "isort src/ tests/ && " +
"flake8 src/ tests/ && " + "flake8 src/ tests/ && " +
@ -85,30 +87,18 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.dir("${venv_name}") outputs.dir("${venv_name}")
outputs.file("${sentinel_file}") outputs.file("${sentinel_file}")
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev,integration-tests${extra_pip_extras}] ${extra_pip_requirements} && " + "${pip_install_command} -e .[dev,integration-tests${extra_pip_extras}] ${extra_pip_requirements} && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
def testFile = hasProperty('testFile') ? testFile : 'unknown'
task testSingle(dependsOn: [installDevTest]) {
doLast {
if (testFile != 'unknown') {
exec {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && pytest ${testFile}"
}
} else {
throw new GradleException("No file provided. Use -PtestFile=<test_file>")
}
}
}
task testQuick(type: Exec, dependsOn: installDevTest) { task testQuick(type: Exec, dependsOn: installDevTest) {
inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
inputs.files(project.fileTree(dir: "tests/")) inputs.files(project.fileTree(dir: "tests/"))
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
} }
@ -117,7 +107,9 @@ task cleanPythonCache(type: Exec) {
"find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
} }
task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) { task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
} }
build.dependsOn install build.dependsOn install

View File

@ -15,7 +15,9 @@ def get_long_description():
_version: str = package_metadata["__version__"] _version: str = package_metadata["__version__"]
_self_pin = f"=={_version}" if not _version.endswith("dev0") else "" _self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
)
base_requirements = { base_requirements = {
@ -163,6 +165,7 @@ setuptools.setup(
# Dependencies. # Dependencies.
install_requires=list(base_requirements), install_requires=list(base_requirements),
extras_require={ extras_require={
"ignore": [], # This is a dummy extra to allow for trailing commas in the list.
**{plugin: list(dependencies) for plugin, dependencies in plugins.items()}, **{plugin: list(dependencies) for plugin, dependencies in plugins.items()},
"dev": list(dev_requirements), "dev": list(dev_requirements),
"integration-tests": list(integration_test_requirements), "integration-tests": list(integration_test_requirements),

View File

@ -26,7 +26,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} && " + "${python_executable} -m venv ${venv_name} && " +
"${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0' && " + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -43,7 +43,8 @@ task installPackageOnly(type: Exec, dependsOn: runPreFlightScript) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-x', '-c', commandLine 'bash', '-x', '-c',
"${venv_name}/bin/pip install -e . &&" + "source ${venv_name}/bin/activate && " +
"uv pip install -e . &&" +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -52,7 +53,8 @@ task installPackage(type: Exec, dependsOn: installPackageOnly) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-x', '-c', commandLine 'bash', '-x', '-c',
"${venv_name}/bin/pip install -e . ${extra_pip_requirements} && " + "source ${venv_name}/bin/activate && " +
"uv pip install -e . ${extra_pip_requirements} && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -70,7 +72,7 @@ task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPack
def package_version = project.findProperty('package_version') def package_version = project.findProperty('package_version')
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && " + "source ${venv_name}/bin/activate && " +
"pip install build && " + "uv pip install build && " +
"./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'" "./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'"
} }
@ -82,7 +84,7 @@ task installDev(type: Exec, dependsOn: [install]) {
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " + "uv pip install -e .[dev] ${extra_pip_requirements} && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -92,7 +94,7 @@ task installAll(type: Exec, dependsOn: [install]) {
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"${venv_name}/bin/pip install -e .[all] ${extra_pip_requirements} && " + "uv pip install -e .[all] ${extra_pip_requirements} && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }
@ -142,7 +144,8 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso
outputs.dir("${venv_name}") outputs.dir("${venv_name}")
def cvg_arg = get_coverage_arg("quick") def cvg_arg = get_coverage_arg("quick")
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
} }
task installDevTest(type: Exec, dependsOn: [install]) { task installDevTest(type: Exec, dependsOn: [install]) {
@ -151,7 +154,9 @@ task installDevTest(type: Exec, dependsOn: [install]) {
outputs.dir("${venv_name}") outputs.dir("${venv_name}")
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"${venv_name}/bin/pip install -e .[dev,integration-tests] ${extra_pip_requirements} && touch ${sentinel_file}" "source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
} }
def testFile = hasProperty('testFile') ? testFile : 'unknown' def testFile = hasProperty('testFile') ? testFile : 'unknown'
@ -171,22 +176,26 @@ task testSingle(dependsOn: [installDevTest]) {
task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) { task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch0") def cvg_arg = get_coverage_arg("intBatch0")
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml"
} }
task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) { task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch1") def cvg_arg = get_coverage_arg("intBatch1")
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml"
} }
task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) { task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch2") def cvg_arg = get_coverage_arg("intBatch2")
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml"
} }
task testFull(type: Exec, dependsOn: [installDevTest]) { task testFull(type: Exec, dependsOn: [installDevTest]) {
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml" "source ${venv_name}/bin/activate && set -x && " +
"pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
} }
task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
@ -203,7 +212,7 @@ task cleanPythonCache(type: Exec) {
"find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" "find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
} }
task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) { task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
} }
build.dependsOn install build.dependsOn install

View File

@ -6,6 +6,10 @@ package_metadata: dict = {}
with open("./src/datahub/__init__.py") as fp: with open("./src/datahub/__init__.py") as fp:
exec(fp.read(), package_metadata) exec(fp.read(), package_metadata)
_version: str = package_metadata["__version__"]
_self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
)
base_requirements = { base_requirements = {
# Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict. # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
@ -17,7 +21,7 @@ base_requirements = {
# pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885 # pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885
"pydantic>=1.10.0,!=1.10.3", "pydantic>=1.10.0,!=1.10.3",
"mixpanel>=4.9.0", "mixpanel>=4.9.0",
"sentry-sdk", "sentry-sdk>=1.40.5",
} }
framework_common = { framework_common = {
@ -272,7 +276,7 @@ plugins: Dict[str, Set[str]] = {
}, },
# Integrations. # Integrations.
"airflow": { "airflow": {
f"acryl-datahub-airflow-plugin == {package_metadata['__version__']}", f"acryl-datahub-airflow-plugin{_self_pin}",
}, },
"circuit-breaker": { "circuit-breaker": {
"gql>=3.3.0", "gql>=3.3.0",
@ -398,12 +402,18 @@ plugins: Dict[str, Set[str]] = {
# This is mainly used to exclude plugins from the Docker image. # This is mainly used to exclude plugins from the Docker image.
all_exclude_plugins: Set[str] = { all_exclude_plugins: Set[str] = {
# The Airflow extra is only retained for compatibility, but new users should
# be using the datahub-airflow-plugin package instead.
"airflow",
# SQL Server ODBC requires additional drivers, and so we don't want to keep # SQL Server ODBC requires additional drivers, and so we don't want to keep
# it included in the default "all" installation. # it included in the default "all" installation.
"mssql-odbc", "mssql-odbc",
# duckdb doesn't have a prebuilt wheel for Linux arm7l or aarch64, so we # duckdb doesn't have a prebuilt wheel for Linux arm7l or aarch64, so we
# simply exclude it. # simply exclude it.
"datahub-lite", "datahub-lite",
# Feast tends to have overly restrictive dependencies and hence doesn't
# play nice with the "all" installation.
"feast",
} }
mypy_stubs = { mypy_stubs = {
@ -678,7 +688,7 @@ entry_points = {
setuptools.setup( setuptools.setup(
# Package metadata. # Package metadata.
name=package_metadata["__package_name__"], name=package_metadata["__package_name__"],
version=package_metadata["__version__"], version=_version,
url="https://datahubproject.io/", url="https://datahubproject.io/",
project_urls={ project_urls={
"Documentation": "https://datahubproject.io/docs/", "Documentation": "https://datahubproject.io/docs/",

View File

@ -49,10 +49,12 @@ task installDev(type: Exec) {
inputs.file file('pyproject.toml') inputs.file file('pyproject.toml')
inputs.file file('requirements.txt') inputs.file file('requirements.txt')
outputs.file("${venv_name}/.build_install_dev_sentinel") outputs.file("${venv_name}/.build_install_dev_sentinel")
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"set -x && " +
"${python_executable} -m venv ${venv_name} && " + "${python_executable} -m venv ${venv_name} && " +
"${venv_name}/bin/pip install --upgrade pip wheel setuptools && " + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel setuptools && " +
"${venv_name}/bin/pip install -r requirements.txt && " + "set +x && source ${venv_name}/bin/activate && set -x && " +
"uv pip install -r requirements.txt && " +
"touch ${venv_name}/.build_install_dev_sentinel" "touch ${venv_name}/.build_install_dev_sentinel"
} }