fix(ingest): move to acryl-great-expectations (#13398)

This commit is contained in:
Harshal Sheth 2025-05-02 13:04:53 -07:00 committed by GitHub
parent b7ef234bc7
commit e2844b6c95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 6 deletions

View File

@ -131,6 +131,22 @@ cachetools_lib = {
"cachetools",
}
great_expectations_lib = {
# 1. Our original dep was this:
# "great-expectations>=0.15.12, <=0.15.50",
# 2. For hive, we had additional restrictions:
# Due to https://github.com/great-expectations/great_expectations/issues/6146,
# we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by
# https://github.com/great-expectations/great_expectations/pull/6149.
# "great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26",
# 3. Since then, we've ended up forking great-expectations in order to
# add pydantic 2.x support. The fork is pretty simple
# https://github.com/great-expectations/great_expectations/compare/0.15.50...hsheth2:great_expectations:0.15.50-pydantic-2-patch?expand=1
# This was derived from work done by @jskrzypek in
# https://github.com/datahub-project/datahub/issues/8115#issuecomment-2264219783
"acryl-great-expectations==0.15.50.1",
}
sql_common_slim = {
# Required for all SQL sources.
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
@ -140,8 +156,8 @@ sql_common = (
{
*sql_common_slim,
# Required for SQL profiling.
"great-expectations>=0.15.12, <=0.15.50",
*pydantic_no_v2, # because of great-expectations
*great_expectations_lib,
"pydantic<2", # keeping this for now, but can be removed eventually
# scipy version restricted to reduce backtracking, used by great-expectations,
"scipy>=1.7.2",
# GE added handling for higher version of jinja2
@ -450,10 +466,7 @@ plugins: Dict[str, Set[str]] = {
| pyhive_common
| {
"databricks-dbapi",
# Due to https://github.com/great-expectations/great_expectations/issues/6146,
# we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by
# https://github.com/great-expectations/great_expectations/pull/6149.
"great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26",
*great_expectations_lib,
},
# keep in sync with presto-on-hive until presto-on-hive will be removed
"hive-metastore": sql_common

View File

@ -5,6 +5,7 @@ import concurrent.futures
import contextlib
import dataclasses
import functools
import importlib.metadata
import json
import logging
import re
@ -84,6 +85,30 @@ if TYPE_CHECKING:
from pyathena.cursor import Cursor
assert MARKUPSAFE_PATCHED
# We need to ensure that acryl-great-expectations is installed
# and great-expectations is not installed.
try:
acryl_gx_version = bool(importlib.metadata.distribution("acryl-great-expectations"))
except importlib.metadata.PackageNotFoundError:
acryl_gx_version = False
try:
original_gx_version = bool(importlib.metadata.distribution("great-expectations"))
except importlib.metadata.PackageNotFoundError:
original_gx_version = False
if acryl_gx_version and original_gx_version:
raise RuntimeError(
"acryl-great-expectations and great-expectations cannot both be installed because their files will conflict. "
"You will need to (1) uninstall great-expectations and (2) re-install acryl-great-expectations. "
"See https://github.com/pypa/pip/issues/4625."
)
elif original_gx_version:
raise RuntimeError(
"We expect acryl-great-expectations to be installed, but great-expectations is installed instead."
)
logger: logging.Logger = logging.getLogger(__name__)
_original_get_column_median = SqlAlchemyDataset.get_column_median