mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-13 17:59:48 +00:00
fix(ingest): move to acryl-great-expectations (#13398)
This commit is contained in:
parent
b7ef234bc7
commit
e2844b6c95
@ -131,6 +131,22 @@ cachetools_lib = {
|
|||||||
"cachetools",
|
"cachetools",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
great_expectations_lib = {
|
||||||
|
# 1. Our original dep was this:
|
||||||
|
# "great-expectations>=0.15.12, <=0.15.50",
|
||||||
|
# 2. For hive, we had additional restrictions:
|
||||||
|
# Due to https://github.com/great-expectations/great_expectations/issues/6146,
|
||||||
|
# we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by
|
||||||
|
# https://github.com/great-expectations/great_expectations/pull/6149.
|
||||||
|
# "great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26",
|
||||||
|
# 3. Since then, we've ended up forking great-expectations in order to
|
||||||
|
# add pydantic 2.x support. The fork is pretty simple
|
||||||
|
# https://github.com/great-expectations/great_expectations/compare/0.15.50...hsheth2:great_expectations:0.15.50-pydantic-2-patch?expand=1
|
||||||
|
# This was derived from work done by @jskrzypek in
|
||||||
|
# https://github.com/datahub-project/datahub/issues/8115#issuecomment-2264219783
|
||||||
|
"acryl-great-expectations==0.15.50.1",
|
||||||
|
}
|
||||||
|
|
||||||
sql_common_slim = {
|
sql_common_slim = {
|
||||||
# Required for all SQL sources.
|
# Required for all SQL sources.
|
||||||
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
|
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
|
||||||
@ -140,8 +156,8 @@ sql_common = (
|
|||||||
{
|
{
|
||||||
*sql_common_slim,
|
*sql_common_slim,
|
||||||
# Required for SQL profiling.
|
# Required for SQL profiling.
|
||||||
"great-expectations>=0.15.12, <=0.15.50",
|
*great_expectations_lib,
|
||||||
*pydantic_no_v2, # because of great-expectations
|
"pydantic<2", # keeping this for now, but can be removed eventually
|
||||||
# scipy version restricted to reduce backtracking, used by great-expectations,
|
# scipy version restricted to reduce backtracking, used by great-expectations,
|
||||||
"scipy>=1.7.2",
|
"scipy>=1.7.2",
|
||||||
# GE added handling for higher version of jinja2
|
# GE added handling for higher version of jinja2
|
||||||
@ -450,10 +466,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
| pyhive_common
|
| pyhive_common
|
||||||
| {
|
| {
|
||||||
"databricks-dbapi",
|
"databricks-dbapi",
|
||||||
# Due to https://github.com/great-expectations/great_expectations/issues/6146,
|
*great_expectations_lib,
|
||||||
# we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by
|
|
||||||
# https://github.com/great-expectations/great_expectations/pull/6149.
|
|
||||||
"great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26",
|
|
||||||
},
|
},
|
||||||
# keep in sync with presto-on-hive until presto-on-hive will be removed
|
# keep in sync with presto-on-hive until presto-on-hive will be removed
|
||||||
"hive-metastore": sql_common
|
"hive-metastore": sql_common
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import concurrent.futures
|
|||||||
import contextlib
|
import contextlib
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import functools
|
import functools
|
||||||
|
import importlib.metadata
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
@ -84,6 +85,30 @@ if TYPE_CHECKING:
|
|||||||
from pyathena.cursor import Cursor
|
from pyathena.cursor import Cursor
|
||||||
|
|
||||||
assert MARKUPSAFE_PATCHED
|
assert MARKUPSAFE_PATCHED
|
||||||
|
|
||||||
|
# We need to ensure that acryl-great-expectations is installed
|
||||||
|
# and great-expectations is not installed.
|
||||||
|
try:
|
||||||
|
acryl_gx_version = bool(importlib.metadata.distribution("acryl-great-expectations"))
|
||||||
|
except importlib.metadata.PackageNotFoundError:
|
||||||
|
acryl_gx_version = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
original_gx_version = bool(importlib.metadata.distribution("great-expectations"))
|
||||||
|
except importlib.metadata.PackageNotFoundError:
|
||||||
|
original_gx_version = False
|
||||||
|
|
||||||
|
if acryl_gx_version and original_gx_version:
|
||||||
|
raise RuntimeError(
|
||||||
|
"acryl-great-expectations and great-expectations cannot both be installed because their files will conflict. "
|
||||||
|
"You will need to (1) uninstall great-expectations and (2) re-install acryl-great-expectations. "
|
||||||
|
"See https://github.com/pypa/pip/issues/4625."
|
||||||
|
)
|
||||||
|
elif original_gx_version:
|
||||||
|
raise RuntimeError(
|
||||||
|
"We expect acryl-great-expectations to be installed, but great-expectations is installed instead."
|
||||||
|
)
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger(__name__)
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_original_get_column_median = SqlAlchemyDataset.get_column_median
|
_original_get_column_median = SqlAlchemyDataset.get_column_median
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user