chore(build): update base-requirements + add script for regeneration (#9524)

This commit is contained in:
Harshal Sheth 2023-12-28 04:06:41 -05:00 committed by GitHub
parent 9f79f44dd6
commit 89c7059ce9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 195 additions and 159 deletions

View File

@ -1,149 +1,147 @@
# Excluded for slim
# pyspark==3.0.3
# pydeequ==1.0.1
# Generated requirements file. Run ./regenerate-base-requirements.sh to regenerate.
acryl-datahub-classify==0.0.8
acryl-PyHive==0.6.14
acryl-sqlglot==18.5.2.dev45
acryl-PyHive==0.6.16
acryl-sqlglot==20.4.1.dev14
aenum==3.1.15
aiohttp==3.8.6
aiohttp==3.9.1
aiosignal==1.3.1
alembic==1.12.0
alembic==1.13.1
altair==4.2.0
annotated-types==0.6.0
anyio==3.7.1
apache-airflow==2.7.2
apache-airflow-providers-common-sql==1.7.2
apache-airflow-providers-ftp==3.5.2
apache-airflow-providers-http==4.5.2
apache-airflow-providers-imap==3.3.2
apache-airflow-providers-sqlite==3.4.3
apispec==6.3.0
apache-airflow==2.7.3
apache-airflow-providers-common-sql==1.9.0
apache-airflow-providers-ftp==3.7.0
apache-airflow-providers-http==4.8.0
apache-airflow-providers-imap==3.5.0
apache-airflow-providers-sqlite==3.6.0
apispec==6.3.1
appdirs==1.4.4
appnope==0.1.3
argcomplete==3.1.2
argcomplete==3.2.1
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
asgiref==3.7.2
asn1crypto==1.5.1
asttokens==2.4.0
asttokens==2.4.1
async-timeout==4.0.3
asynch==0.2.2
asynch==0.2.3
attrs==23.1.0
avro==1.10.2
avro==1.11.3
avro-gen3==0.7.11
Babel==2.13.0
backcall==0.2.0
Babel==2.14.0
backoff==2.2.1
beautifulsoup4==4.12.2
bleach==6.1.0
blinker==1.6.3
blinker==1.7.0
blis==0.7.11
boto3==1.28.62
botocore==1.31.62
boto3==1.34.8
botocore==1.34.8
bowler==0.9.0
bracex==2.4
cached-property==1.5.2
cachelib==0.9.0
cachetools==5.3.1
cachetools==5.3.2
catalogue==2.0.10
cattrs==23.1.2
certifi==2023.7.22
cattrs==23.2.3
certifi==2023.11.17
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.0
ciso8601==2.3.0
charset-normalizer==3.3.2
ciso8601==2.3.1
click==8.1.7
click-default-group==1.2.4
click-spinner==0.1.10
clickclick==20.10.2
clickhouse-cityhash==1.0.2.4
clickhouse-driver==0.2.6
clickhouse-sqlalchemy==0.2.4
cloudpickle==2.2.1
cloudpickle==3.0.0
colorama==0.4.6
colorlog==4.8.0
comm==0.1.4
confection==0.1.3
ConfigUpdater==3.1.1
comm==0.2.0
confection==0.1.4
ConfigUpdater==3.2
confluent-kafka==2.3.0
connexion==2.14.2
cron-descriptor==1.4.0
croniter==2.0.1
cryptography==41.0.4
cryptography==41.0.7
cx-Oracle==8.3.0
cymem==2.0.8
dask==2023.9.3
dask==2023.12.1
databricks-cli==0.18.0
databricks-dbapi==0.6.0
databricks-sdk==0.10.0
databricks-sdk==0.15.0
databricks-sql-connector==2.9.3
debugpy==1.8.0
decorator==5.1.1
defusedxml==0.7.1
deltalake==0.11.0
deltalake==0.14.0
Deprecated==1.2.14
dill==0.3.7
dnspython==2.4.2
docker==6.1.3
docker==7.0.0
docutils==0.20.1
ecdsa==0.18.0
elasticsearch==7.13.4
email-validator==1.3.1
entrypoints==0.4
et-xmlfile==1.1.0
exceptiongroup==1.1.3
executing==2.0.0
expandvars==0.11.0
fastapi==0.103.2
fastavro==1.8.4
fastjsonschema==2.18.1
exceptiongroup==1.2.0
executing==2.0.1
expandvars==0.12.0
fastapi==0.108.0
fastavro==1.9.2
fastjsonschema==2.19.0
feast==0.31.1
filelock==3.12.4
filelock==3.13.1
fissix==21.11.13
Flask==2.2.5
flatdict==4.0.1
frozenlist==1.4.0
fsspec==2023.9.2
frozenlist==1.4.1
fsspec==2023.12.2
future==0.18.3
GeoAlchemy2==0.14.1
gitdb==4.0.10
GitPython==3.1.37
google-api-core==2.12.0
google-auth==2.23.3
google-cloud-appengine-logging==1.3.2
GeoAlchemy2==0.14.3
gitdb==4.0.11
GitPython==3.1.40
google-api-core==2.15.0
google-auth==2.25.2
google-cloud-appengine-logging==1.4.0
google-cloud-audit-log==0.2.5
google-cloud-bigquery==3.12.0
google-cloud-core==2.3.3
google-cloud-bigquery==3.14.1
google-cloud-core==2.4.1
google-cloud-datacatalog-lineage==0.2.2
google-cloud-logging==3.5.0
google-crc32c==1.5.0
google-re2==1.1
google-resumable-media==2.6.0
googleapis-common-protos==1.60.0
google-resumable-media==2.7.0
googleapis-common-protos==1.62.0
gql==3.4.1
graphql-core==3.2.3
graphviz==0.20.1
great-expectations==0.15.50
greenlet==3.0.0
grpc-google-iam-v1==0.12.6
grpcio==1.59.0
grpcio-reflection==1.59.0
grpcio-status==1.59.0
grpcio-tools==1.59.0
greenlet==3.0.3
grpc-google-iam-v1==0.13.0
grpcio==1.60.0
grpcio-reflection==1.60.0
grpcio-status==1.60.0
grpcio-tools==1.60.0
gssapi==1.8.3
gunicorn==21.2.0
h11==0.14.0
httpcore==0.18.0
httptools==0.6.0
httpx==0.25.0
hdbcli==2.19.20
httpcore==1.0.2
httptools==0.6.1
httpx==0.26.0
humanfriendly==10.0
idna==3.4
idna==3.6
ijson==3.2.3
importlib-metadata==6.8.0
importlib-resources==6.1.0
importlib-metadata==6.11.0
importlib-resources==6.1.1
inflection==0.5.1
ipaddress==1.0.23
ipykernel==6.17.1
ipython==8.16.1
ipython==8.19.0
ipython-genutils==0.2.0
ipywidgets==8.1.1
iso3166==2.1.1
@ -152,34 +150,34 @@ itsdangerous==2.1.2
jedi==0.19.1
Jinja2==3.1.2
jmespath==1.0.1
JPype1==1.4.1
JPype1==1.5.0
jsonlines==4.0.0
jsonpatch==1.33
jsonpointer==2.4
jsonref==1.1.0
jsonschema==4.19.1
jsonschema-specifications==2023.7.1
jsonschema==4.20.0
jsonschema-specifications==2023.12.1
jupyter-server==1.24.0
jupyter_client==7.4.9
jupyter_core==4.12.0
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.9
jupyterlab_pygments==0.3.0
langcodes==3.3.0
lark==1.1.4
lazy-object-proxy==1.9.0
lazy-object-proxy==1.10.0
leb128==1.0.5
limits==3.6.0
limits==3.7.0
linear-tsv==1.1.0
linkify-it-py==2.0.2
lkml==1.3.1
lkml==1.3.3
locket==1.0.0
lockfile==0.12.2
looker-sdk==23.0.0
lxml==4.9.3
lxml==4.9.4
lz4==4.3.2
makefun==1.15.1
Mako==1.2.4
Markdown==3.5
makefun==1.15.2
Mako==1.3.0
Markdown==3.5.1
markdown-it-py==3.0.0
MarkupSafe==2.1.3
marshmallow==3.20.1
@ -190,26 +188,26 @@ mdit-py-plugins==0.4.0
mdurl==0.1.2
mistune==3.0.2
mixpanel==4.10.0
mlflow-skinny==2.7.1
mlflow-skinny==2.9.2
mmh3==4.0.1
mmhash3==3.0.1
more-itertools==10.1.0
moreorless==0.4.0
moto==4.2.5
moto==4.2.12
msal==1.22.0
multidict==6.0.4
murmurhash==1.0.10
mypy==1.6.0
mypy==1.8.0
mypy-extensions==1.0.0
nbclassic==1.0.0
nbclient==0.6.3
nbconvert==7.9.2
nbconvert==7.13.1
nbformat==5.9.1
nest-asyncio==1.5.8
networkx==3.1
networkx==3.2.1
notebook==6.5.6
notebook_shim==0.2.3
numpy==1.26.0
numpy==1.26.2
oauthlib==3.2.2
okta==1.7.0
openlineage-airflow==1.2.0
@ -217,110 +215,107 @@ openlineage-integration-common==1.2.0
openlineage-python==1.2.0
openlineage_sql==1.2.0
openpyxl==3.1.2
opentelemetry-api==1.20.0
opentelemetry-exporter-otlp==1.20.0
opentelemetry-exporter-otlp-proto-common==1.20.0
opentelemetry-exporter-otlp-proto-grpc==1.20.0
opentelemetry-exporter-otlp-proto-http==1.20.0
opentelemetry-proto==1.20.0
opentelemetry-sdk==1.20.0
opentelemetry-semantic-conventions==0.41b0
opentelemetry-api==1.22.0
opentelemetry-exporter-otlp==1.22.0
opentelemetry-exporter-otlp-proto-common==1.22.0
opentelemetry-exporter-otlp-proto-grpc==1.22.0
opentelemetry-exporter-otlp-proto-http==1.22.0
opentelemetry-proto==1.22.0
opentelemetry-sdk==1.22.0
opentelemetry-semantic-conventions==0.43b0
ordered-set==4.1.0
oscrypto==1.3.0
packaging==23.2
pandas==1.5.3
pandavro==1.5.2
pandocfilters==1.5.0
parse==1.19.1
parse==1.20.0
parso==0.8.3
partd==1.4.1
pathspec==0.11.2
pathy==0.10.2
pathspec==0.12.1
pathy==0.10.3
pendulum==2.1.2
pexpect==4.8.0
pexpect==4.9.0
phonenumbers==8.13.0
pickleshare==0.7.5
platformdirs==3.11.0
pluggy==1.3.0
preshed==3.0.9
prison==0.2.1
progressbar2==4.2.0
prometheus-client==0.17.1
prompt-toolkit==3.0.39
proto-plus==1.22.3
protobuf==4.24.4
psutil==5.9.5
progressbar2==4.3.2
prometheus-client==0.19.0
prompt-toolkit==3.0.43
proto-plus==1.23.0
protobuf==4.25.1
psutil==5.9.7
psycopg2-binary==2.9.9
ptyprocess==0.7.0
pure-eval==0.2.2
pure-sasl==0.6.2
py-partiql-parser==0.3.7
py-partiql-parser==0.5.0
pyarrow==11.0.0
pyasn1==0.5.0
pyasn1==0.5.1
pyasn1-modules==0.3.0
pyathena==2.4.1
pycountry==22.3.5
pyathena==2.25.2
pycountry==23.12.11
pycparser==2.21
pycryptodome==3.19.0
pycryptodomex==3.19.0
pydantic==1.10.13
pydantic_core==2.14.6
pydash==7.0.6
pydruid==0.6.5
Pygments==2.16.1
pydruid==0.6.6
Pygments==2.17.2
pyiceberg==0.4.0
pymongo==4.5.0
pymongo==4.6.1
PyMySQL==1.1.0
pyOpenSSL==23.2.0
pyOpenSSL==23.3.0
pyparsing==3.0.9
pyspnego==0.10.2
python-daemon==3.0.1
python-dateutil==2.8.2
python-dotenv==1.0.0
python-jose==3.3.0
python-ldap==3.4.3
python-ldap==3.4.4
python-nvd3==0.15.0
python-slugify==8.0.1
python-stdnum==1.19
python-tds==1.13.0
python-tds==1.14.0
python-utils==3.8.1
python3-openid==3.2.0
pytz==2023.3.post1
pytzdata==2020.1
PyYAML==6.0.1
pyzmq==24.0.1
ratelimiter==1.2.0.post0
redash-toolbelt==0.1.9
redshift-connector==2.0.914
referencing==0.30.2
regex==2023.10.3
redshift-connector==2.0.918
referencing==0.32.0
regex==2023.12.25
requests==2.31.0
requests-file==1.5.1
requests-gssapi==1.2.3
requests-ntlm==1.2.0
requests-toolbelt==0.10.1
responses==0.23.3
responses==0.24.1
rfc3339-validator==0.1.4
rfc3986==2.0.0
rich==13.6.0
rich-argparse==1.3.0
rpds-py==0.10.6
rich==13.7.0
rich-argparse==1.4.0
rpds-py==0.15.2
rsa==4.9
ruamel.yaml==0.17.17
ruamel.yaml.clib==0.2.8
s3transfer==0.7.0
schwifty==2023.9.0
scipy==1.11.3
s3transfer==0.10.0
schwifty==2023.11.2
scipy==1.11.4
scramp==1.4.4
Send2Trash==1.8.2
sentry-sdk==1.32.0
sentry-sdk==1.39.1
setproctitle==1.3.3
simple-salesforce==1.12.5
six==1.16.0
smart-open==6.4.0
smmap==5.0.1
sniffio==1.3.0
snowflake-connector-python==3.2.1
snowflake-sqlalchemy==1.5.0
snowflake-connector-python==3.6.0
snowflake-sqlalchemy==1.5.1
sortedcontainers==2.4.0
soupsieve==2.5
spacy==3.4.3
@ -328,67 +323,71 @@ spacy-legacy==3.0.12
spacy-loggers==1.0.5
sql-metadata==2.2.2
SQLAlchemy==1.4.44
sqlalchemy-bigquery==1.8.0
SQLAlchemy-JSONField==1.0.1.post0
sqlalchemy-bigquery==1.9.0
sqlalchemy-hana==1.1.1
SQLAlchemy-JSONField==1.0.2
sqlalchemy-pytds==0.3.5
sqlalchemy-redshift==0.8.14
SQLAlchemy-Utils==0.41.1
sqlalchemy2-stubs==0.0.2a35
sqlalchemy2-stubs==0.0.2a37
sqllineage==1.3.8
sqlparse==0.4.4
srsly==2.4.8
stack-data==0.6.3
starlette==0.27.0
starlette==0.32.0.post1
strictyaml==1.7.3
tableauserverclient==0.25
tableschema==1.20.2
tabulate==0.9.0
tabulator==1.53.5
tenacity==8.2.3
termcolor==2.3.0
terminado==0.17.1
teradatasql==20.0.0.2
teradatasqlalchemy==17.20.0.0
termcolor==2.4.0
terminado==0.18.0
text-unidecode==1.3
thinc==8.1.12
thrift==0.13.0
thrift==0.16.0
thrift-sasl==0.4.3
tinycss2==1.2.1
toml==0.10.2
tomli==2.0.1
tomlkit==0.12.1
tomlkit==0.12.3
toolz==0.12.0
tornado==6.3.3
tornado==6.4
tqdm==4.66.1
traitlets==5.2.1.post0
trino==0.327.0
typeguard==2.13.3
typer==0.7.0
types-PyYAML==6.0.12.12
typing-inspect==0.9.0
typing_extensions==4.8.0
tzlocal==5.1
typing_extensions==4.9.0
tzlocal==5.2
uc-micro-py==1.0.2
ujson==5.8.0
ujson==5.9.0
unicodecsv==0.14.1
urllib3==1.26.17
uvicorn==0.23.2
uvloop==0.17.0
vertica-python==1.3.5
vertica-sqlalchemy-dialect==0.0.8
universal-pathlib==0.1.4
urllib3==1.26.18
uvicorn==0.25.0
uvloop==0.19.0
vertica-python==1.3.8
vertica-sqlalchemy-dialect==0.0.8.1
vininfo==1.7.0
volatile==2.1.0
wasabi==0.10.1
watchfiles==0.20.0
watchfiles==0.21.0
wcmatch==8.5
wcwidth==0.2.8
wcwidth==0.2.12
webencodings==0.5.1
websocket-client==1.6.4
websockets==11.0.3
websocket-client==1.7.0
websockets==12.0
Werkzeug==2.2.3
widgetsnbextension==4.0.9
wrapt==1.15.0
WTForms==3.1.0
wrapt==1.16.0
WTForms==3.0.1
xlrd==2.0.1
xmltodict==0.13.0
yarl==1.9.2
yarl==1.9.4
zeep==4.2.1
zstd==1.5.5.1
zipp==3.17.0
zstd==1.5.5.1

View File

@ -0,0 +1,37 @@
#!/bin/bash
# This script is used to regenerate the base-requirements.txt file
set -euxo pipefail
cd "$( dirname "${BASH_SOURCE[0]}" )"
SCRIPT_NAME=$(basename "$0")
DATAHUB_DIR=$(pwd)/../..
# Create a virtualenv.
VENV_DIR=$(mktemp -d)
python -c "import sys; assert sys.version_info >= (3, 9), 'Python 3.9 or higher is required.'"
python -m venv $VENV_DIR
source $VENV_DIR/bin/activate
pip install --upgrade pip setuptools wheel
echo "Using virtualenv at $VENV_DIR"
# Install stuff.
pushd $DATAHUB_DIR/metadata-ingestion
pip install -e .
pip install -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]'
pip install -e '.[all]'
popd
# Generate the requirements file.
# Removing Flask deps due as per https://github.com/datahub-project/datahub/pull/6867/files
# Removing py4j and PyJWT due to https://github.com/datahub-project/datahub/pull/6868/files
# Removing pyspark and pydeequ because we don't want them in the slim image, so they can be added separately.
# TODO: It's unclear if these removals are still actually needed.
echo "# Generated requirements file. Run ./$SCRIPT_NAME to regenerate." > base-requirements.txt
pip freeze \
| grep -v -E "^-e" \
| grep -v "Flask-" \
| grep -v -E "(py4j|PyJWT)==" \
| grep -v -E "(pyspark|pydeequ)==" \
>> base-requirements.txt