mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-10 00:11:15 +00:00
chore(cli): drop support for python 3.7 (#9731)
This commit is contained in:
parent
f3cc4e068a
commit
1498c36875
4
.github/workflows/metadata-ingestion.yml
vendored
4
.github/workflows/metadata-ingestion.yml
vendored
@ -31,7 +31,7 @@ jobs:
|
|||||||
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
|
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.7", "3.10"]
|
python-version: ["3.8", "3.10"]
|
||||||
command:
|
command:
|
||||||
[
|
[
|
||||||
"testQuick",
|
"testQuick",
|
||||||
@ -40,7 +40,7 @@ jobs:
|
|||||||
"testIntegrationBatch2",
|
"testIntegrationBatch2",
|
||||||
]
|
]
|
||||||
include:
|
include:
|
||||||
- python-version: "3.7"
|
- python-version: "3.8"
|
||||||
- python-version: "3.10"
|
- python-version: "3.10"
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@ -24,7 +24,7 @@ source venv/bin/activate # activate the environment
|
|||||||
Once inside the virtual environment, install `datahub` using the following commands
|
Once inside the virtual environment, install `datahub` using the following commands
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# Requires Python 3.7+
|
# Requires Python 3.8+
|
||||||
python3 -m pip install --upgrade pip wheel setuptools
|
python3 -m pip install --upgrade pip wheel setuptools
|
||||||
python3 -m pip install --upgrade acryl-datahub
|
python3 -m pip install --upgrade acryl-datahub
|
||||||
# validate that the install was successful
|
# validate that the install was successful
|
||||||
|
|||||||
@ -10,8 +10,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
|||||||
- Neo4j 5.x, may require migration from 4.x
|
- Neo4j 5.x, may require migration from 4.x
|
||||||
- Build requires JDK17 (Runtime Java 11)
|
- Build requires JDK17 (Runtime Java 11)
|
||||||
- Build requires Docker Compose > 2.20
|
- Build requires Docker Compose > 2.20
|
||||||
|
- #9731 - The `acryl-datahub` CLI now requires Python 3.8+
|
||||||
- #9601 - The Unity Catalog(UC) ingestion source config `include_metastore` is now disabled by default. This change will affect the urns of all entities in the workspace.<br/>
|
- #9601 - The Unity Catalog(UC) ingestion source config `include_metastore` is now disabled by default. This change will affect the urns of all entities in the workspace.<br/>
|
||||||
Entity Hierarchy with `include_metastore: true` (Old)
|
Entity Hierarchy with `include_metastore: true` (Old)
|
||||||
|
|
||||||
```
|
```
|
||||||
- UC Metastore
|
- UC Metastore
|
||||||
- Catalog
|
- Catalog
|
||||||
@ -20,15 +22,18 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
|||||||
```
|
```
|
||||||
|
|
||||||
Entity Hierarchy with `include_metastore: false` (New)
|
Entity Hierarchy with `include_metastore: false` (New)
|
||||||
|
|
||||||
```
|
```
|
||||||
- Catalog
|
- Catalog
|
||||||
- Schema
|
- Schema
|
||||||
- Table
|
- Table
|
||||||
```
|
```
|
||||||
|
|
||||||
We recommend using `platform_instance` for differentiating across metastores.
|
We recommend using `platform_instance` for differentiating across metastores.
|
||||||
|
|
||||||
If stateful ingestion is enabled, running ingestion with latest cli version will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
|
If stateful ingestion is enabled, running ingestion with latest cli version will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
|
||||||
`datahub delete --platform databricks --soft` and then reingesting with latest cli version.
|
`datahub delete --platform databricks --soft` and then reingesting with latest cli version.
|
||||||
|
|
||||||
- #9601 - The Unity Catalog(UC) ingestion source config `include_hive_metastore` is now enabled by default. This requires config `warehouse_id` to be set. You can disable `include_hive_metastore` by setting it to `False` to avoid ingesting legacy hive metastore catalog in Databricks.
|
- #9601 - The Unity Catalog(UC) ingestion source config `include_hive_metastore` is now enabled by default. This requires config `warehouse_id` to be set. You can disable `include_hive_metastore` by setting it to `False` to avoid ingesting legacy hive metastore catalog in Databricks.
|
||||||
|
|
||||||
### Potential Downtime
|
### Potential Downtime
|
||||||
|
|||||||
@ -22,7 +22,7 @@ If you're interested in a managed version, [Acryl Data](https://www.acryldata.io
|
|||||||
| Linux | [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/) and [Docker Compose](https://docs.docker.com/compose/install/linux/) |
|
| Linux | [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/) and [Docker Compose](https://docs.docker.com/compose/install/linux/) |
|
||||||
|
|
||||||
- **Launch the Docker engine** from command line or the desktop app.
|
- **Launch the Docker engine** from command line or the desktop app.
|
||||||
- Ensure you have **Python 3.7+** installed & configured. (Check using `python3 --version`).
|
- Ensure you have **Python 3.8+** installed & configured. (Check using `python3 --version`).
|
||||||
|
|
||||||
:::note Docker Resource Allocation
|
:::note Docker Resource Allocation
|
||||||
|
|
||||||
|
|||||||
@ -18,16 +18,10 @@ _version: str = package_metadata["__version__"]
|
|||||||
_self_pin = f"=={_version}" if not _version.endswith("dev0") else ""
|
_self_pin = f"=={_version}" if not _version.endswith("dev0") else ""
|
||||||
|
|
||||||
|
|
||||||
rest_common = {"requests", "requests_file"}
|
|
||||||
|
|
||||||
base_requirements = {
|
base_requirements = {
|
||||||
# Compatibility.
|
f"acryl-datahub[datahub-rest]{_self_pin}",
|
||||||
"dataclasses>=0.6; python_version < '3.7'",
|
|
||||||
"mypy_extensions>=0.4.3",
|
|
||||||
# Actual dependencies.
|
# Actual dependencies.
|
||||||
"pydantic>=1.5.1",
|
|
||||||
"apache-airflow >= 2.0.2",
|
"apache-airflow >= 2.0.2",
|
||||||
*rest_common,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
plugins: Dict[str, Set[str]] = {
|
plugins: Dict[str, Set[str]] = {
|
||||||
@ -42,9 +36,8 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
},
|
},
|
||||||
"plugin-v1": set(),
|
"plugin-v1": set(),
|
||||||
"plugin-v2": {
|
"plugin-v2": {
|
||||||
# The v2 plugin requires Python 3.8+.
|
|
||||||
f"acryl-datahub[sql-parser]{_self_pin}",
|
f"acryl-datahub[sql-parser]{_self_pin}",
|
||||||
"openlineage-airflow==1.2.0; python_version >= '3.8'",
|
"openlineage-airflow==1.2.0",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,7 +137,6 @@ setuptools.setup(
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.7",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
@ -161,7 +153,7 @@ setuptools.setup(
|
|||||||
],
|
],
|
||||||
# Package info.
|
# Package info.
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
python_requires=">=3.7",
|
python_requires=">=3.8",
|
||||||
package_data={
|
package_data={
|
||||||
"datahub_airflow_plugin": ["py.typed"],
|
"datahub_airflow_plugin": ["py.typed"],
|
||||||
},
|
},
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
@ -318,8 +317,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
|
|||||||
# Check that the right things were emitted.
|
# Check that the right things were emitted.
|
||||||
assert mock_emitter.emit.call_count == 17 if capture_executions else 9
|
assert mock_emitter.emit.call_count == 17 if capture_executions else 9
|
||||||
|
|
||||||
# Running further checks based on python version because args only exists in python 3.8+
|
# TODO: Replace this with a golden file-based comparison.
|
||||||
if sys.version_info > (3, 8):
|
|
||||||
assert mock_emitter.method_calls[0].args[0].aspectName == "dataFlowInfo"
|
assert mock_emitter.method_calls[0].args[0].aspectName == "dataFlowInfo"
|
||||||
assert (
|
assert (
|
||||||
mock_emitter.method_calls[0].args[0].entityUrn
|
mock_emitter.method_calls[0].args[0].entityUrn
|
||||||
@ -344,9 +342,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
|
|||||||
== "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
|
== "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert mock_emitter.method_calls[4].args[0].aspectName == "dataJobInputOutput"
|
||||||
mock_emitter.method_calls[4].args[0].aspectName == "dataJobInputOutput"
|
|
||||||
)
|
|
||||||
assert (
|
assert (
|
||||||
mock_emitter.method_calls[4].args[0].entityUrn
|
mock_emitter.method_calls[4].args[0].entityUrn
|
||||||
== "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
|
== "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
|
||||||
|
|||||||
@ -17,7 +17,7 @@ def get_coverage_arg(test_name) {
|
|||||||
|
|
||||||
task checkPythonVersion(type: Exec) {
|
task checkPythonVersion(type: Exec) {
|
||||||
commandLine python_executable, '-c',
|
commandLine python_executable, '-c',
|
||||||
'import sys; assert (3, 11) > sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"'
|
'import sys; assert (3, 11) > sys.version_info >= (3, 8), f"Python version {sys.version_info[:2]} not allowed"'
|
||||||
}
|
}
|
||||||
|
|
||||||
task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
|
task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
|
||||||
|
|||||||
@ -3,14 +3,16 @@
|
|||||||
## Installing the CLI
|
## Installing the CLI
|
||||||
|
|
||||||
Make sure you have installed DataHub CLI before following this guide.
|
Make sure you have installed DataHub CLI before following this guide.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# Requires Python 3.7+
|
# Requires Python 3.8+
|
||||||
python3 -m pip install --upgrade pip wheel setuptools
|
python3 -m pip install --upgrade pip wheel setuptools
|
||||||
python3 -m pip install --upgrade acryl-datahub
|
python3 -m pip install --upgrade acryl-datahub
|
||||||
# validate that the install was successful
|
# validate that the install was successful
|
||||||
datahub version
|
datahub version
|
||||||
# If you see "command not found", try running this instead: python3 -m datahub version
|
# If you see "command not found", try running this instead: python3 -m datahub version
|
||||||
```
|
```
|
||||||
|
|
||||||
Check out the [CLI Installation Guide](../docs/cli.md#installation) for more installation options and troubleshooting tips.
|
Check out the [CLI Installation Guide](../docs/cli.md#installation) for more installation options and troubleshooting tips.
|
||||||
|
|
||||||
After that, install the required plugin for the ingestion.
|
After that, install the required plugin for the ingestion.
|
||||||
@ -18,10 +20,13 @@ After that, install the required plugin for the ingestion.
|
|||||||
```shell
|
```shell
|
||||||
pip install 'acryl-datahub[datahub-rest]' # install the required plugin
|
pip install 'acryl-datahub[datahub-rest]' # install the required plugin
|
||||||
```
|
```
|
||||||
|
|
||||||
Check out the [alternative installation options](../docs/cli.md#alternate-installation-options) for more reference.
|
Check out the [alternative installation options](../docs/cli.md#alternate-installation-options) for more reference.
|
||||||
|
|
||||||
## Configuring a Recipe
|
## Configuring a Recipe
|
||||||
|
|
||||||
Create a recipe.yml file that defines the source and sink for metadata, as shown below.
|
Create a recipe.yml file that defines the source and sink for metadata, as shown below.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# my_reipe.yml
|
# my_reipe.yml
|
||||||
source:
|
source:
|
||||||
@ -39,6 +44,7 @@ sink:
|
|||||||
For more information and examples on configuring recipes, please refer to [Recipes](recipe_overview.md).
|
For more information and examples on configuring recipes, please refer to [Recipes](recipe_overview.md).
|
||||||
|
|
||||||
## Ingesting Metadata
|
## Ingesting Metadata
|
||||||
|
|
||||||
You can run ingestion using `datahub ingest` like below.
|
You can run ingestion using `datahub ingest` like below.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
@ -48,6 +54,7 @@ datahub ingest -c <path_to_recipe_file.yml>
|
|||||||
## Reference
|
## Reference
|
||||||
|
|
||||||
Please refer the following pages for advanced guids on CLI ingestion.
|
Please refer the following pages for advanced guids on CLI ingestion.
|
||||||
|
|
||||||
- [Reference for `datahub ingest` command](../docs/cli.md#ingest)
|
- [Reference for `datahub ingest` command](../docs/cli.md#ingest)
|
||||||
- [UI Ingestion Guide](../docs/ui-ingestion.md)
|
- [UI Ingestion Guide](../docs/ui-ingestion.md)
|
||||||
|
|
||||||
|
|||||||
@ -9,10 +9,10 @@ Also take a look at the guide to [adding a source](./adding-source.md).
|
|||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
|
|
||||||
1. Python 3.7+ must be installed in your host environment.
|
1. Python 3.8+ must be installed in your host environment.
|
||||||
2. Java 17 (gradle won't work with newer or older versions)
|
2. Java 17 (gradle won't work with newer or older versions)
|
||||||
4. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv`
|
3. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv`
|
||||||
5. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel`
|
4. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel`
|
||||||
|
|
||||||
### Set up your Python environment
|
### Set up your Python environment
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
import sys
|
|
||||||
from typing import Dict, Set
|
from typing import Dict, Set
|
||||||
|
|
||||||
import setuptools
|
import setuptools
|
||||||
@ -11,7 +10,6 @@ with open("./src/datahub/__init__.py") as fp:
|
|||||||
base_requirements = {
|
base_requirements = {
|
||||||
# Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
|
# Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
|
||||||
"typing_extensions>=3.7.4.3",
|
"typing_extensions>=3.7.4.3",
|
||||||
"mypy_extensions>=0.4.3",
|
|
||||||
# Actual dependencies.
|
# Actual dependencies.
|
||||||
"typing-inspect",
|
"typing-inspect",
|
||||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||||
@ -48,9 +46,7 @@ framework_common = {
|
|||||||
"click-spinner",
|
"click-spinner",
|
||||||
"requests_file",
|
"requests_file",
|
||||||
"jsonref",
|
"jsonref",
|
||||||
# jsonschema drops python 3.7 support in v4.18.0
|
"jsonschema",
|
||||||
"jsonschema<=4.17.3; python_version < '3.8'",
|
|
||||||
"jsonschema; python_version >= '3.8'",
|
|
||||||
"ruamel.yaml",
|
"ruamel.yaml",
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,7 +459,7 @@ base_dev_requirements = {
|
|||||||
"black==22.12.0",
|
"black==22.12.0",
|
||||||
"coverage>=5.1",
|
"coverage>=5.1",
|
||||||
"faker>=18.4.0",
|
"faker>=18.4.0",
|
||||||
"flake8>=3.8.3", # DEPRECATION: Once we drop Python 3.7, we can pin to 6.x.
|
"flake8>=6.0.0",
|
||||||
"flake8-tidy-imports>=4.3.0",
|
"flake8-tidy-imports>=4.3.0",
|
||||||
"flake8-bugbear==23.3.12",
|
"flake8-bugbear==23.3.12",
|
||||||
"isort>=5.7.0",
|
"isort>=5.7.0",
|
||||||
@ -489,9 +485,9 @@ base_dev_requirements = {
|
|||||||
"delta-lake",
|
"delta-lake",
|
||||||
"druid",
|
"druid",
|
||||||
"elasticsearch",
|
"elasticsearch",
|
||||||
"feast" if sys.version_info >= (3, 8) else None,
|
"feast",
|
||||||
"iceberg" if sys.version_info >= (3, 8) else None,
|
"iceberg",
|
||||||
"mlflow" if sys.version_info >= (3, 8) else None,
|
"mlflow",
|
||||||
"json-schema",
|
"json-schema",
|
||||||
"ldap",
|
"ldap",
|
||||||
"looker",
|
"looker",
|
||||||
@ -544,14 +540,14 @@ full_test_dev_requirements = {
|
|||||||
"clickhouse",
|
"clickhouse",
|
||||||
"delta-lake",
|
"delta-lake",
|
||||||
"druid",
|
"druid",
|
||||||
"feast" if sys.version_info >= (3, 8) else None,
|
"feast",
|
||||||
"hana",
|
"hana",
|
||||||
"hive",
|
"hive",
|
||||||
"iceberg" if sys.version_info >= (3, 8) else None,
|
"iceberg",
|
||||||
"kafka-connect",
|
"kafka-connect",
|
||||||
"ldap",
|
"ldap",
|
||||||
"mongodb",
|
"mongodb",
|
||||||
"mssql" if sys.version_info >= (3, 8) else None,
|
"mssql",
|
||||||
"mysql",
|
"mysql",
|
||||||
"mariadb",
|
"mariadb",
|
||||||
"redash",
|
"redash",
|
||||||
@ -699,7 +695,6 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.7",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
@ -716,7 +711,7 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
|
|||||||
],
|
],
|
||||||
# Package info.
|
# Package info.
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
python_requires=">=3.7",
|
python_requires=">=3.8",
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
packages=setuptools.find_namespace_packages(where="./src"),
|
packages=setuptools.find_namespace_packages(where="./src"),
|
||||||
package_data={
|
package_data={
|
||||||
|
|||||||
@ -16,16 +16,9 @@ def nice_version_name() -> str:
|
|||||||
return __version__
|
return __version__
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 7):
|
if sys.version_info < (3, 8):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"DataHub requires Python 3.7 or newer. "
|
"DataHub requires Python 3.8 or newer. "
|
||||||
"Please upgrade your Python version to continue using DataHub.",
|
|
||||||
FutureWarning,
|
|
||||||
stacklevel=2,
|
|
||||||
)
|
|
||||||
elif sys.version_info < (3, 8):
|
|
||||||
warnings.warn(
|
|
||||||
"DataHub will require Python 3.8 or newer soon. "
|
|
||||||
"Please upgrade your Python version to continue using DataHub.",
|
"Please upgrade your Python version to continue using DataHub.",
|
||||||
FutureWarning,
|
FutureWarning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
|
|||||||
@ -2,11 +2,10 @@ import dataclasses
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import pprint
|
import pprint
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import humanfriendly
|
import humanfriendly
|
||||||
import pydantic
|
import pydantic
|
||||||
@ -19,12 +18,6 @@ from datahub.utilities.lossy_collections import LossyList
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
|
LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
|
||||||
|
|
||||||
# The sort_dicts option was added in Python 3.8.
|
|
||||||
if sys.version_info >= (3, 8):
|
|
||||||
PPRINT_OPTIONS = {"sort_dicts": False}
|
|
||||||
else:
|
|
||||||
PPRINT_OPTIONS: Dict = {}
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class SupportsAsObj(Protocol):
|
class SupportsAsObj(Protocol):
|
||||||
@ -32,14 +25,6 @@ class SupportsAsObj(Protocol):
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
def _stacklevel_if_supported(level: int) -> dict:
|
|
||||||
# The logging module added support for stacklevel in Python 3.8.
|
|
||||||
if sys.version_info >= (3, 8):
|
|
||||||
return {"stacklevel": level}
|
|
||||||
else:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Report(SupportsAsObj):
|
class Report(SupportsAsObj):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -95,7 +80,7 @@ class Report(SupportsAsObj):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def as_string(self) -> str:
|
def as_string(self) -> str:
|
||||||
return pprint.pformat(self.as_obj(), width=150, **PPRINT_OPTIONS)
|
return pprint.pformat(self.as_obj(), width=150, sort_dicts=False)
|
||||||
|
|
||||||
def as_json(self) -> str:
|
def as_json(self) -> str:
|
||||||
return json.dumps(self.as_obj())
|
return json.dumps(self.as_obj())
|
||||||
@ -118,7 +103,7 @@ class ReportAttribute(BaseModel):
|
|||||||
return log_levels[self.severity]
|
return log_levels[self.severity]
|
||||||
|
|
||||||
def log(self, msg: str) -> None:
|
def log(self, msg: str) -> None:
|
||||||
logger.log(level=self.logger_sev, msg=msg, **_stacklevel_if_supported(3))
|
logger.log(level=self.logger_sev, msg=msg, stacklevel=3)
|
||||||
|
|
||||||
|
|
||||||
class EntityFilterReport(ReportAttribute):
|
class EntityFilterReport(ReportAttribute):
|
||||||
|
|||||||
@ -1,8 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info < (3, 8):
|
|
||||||
raise ImportError("Feast is only supported on Python 3.8+")
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info < (3, 8):
|
|
||||||
raise ImportError("Iceberg is only supported on Python 3.8+")
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
|||||||
@ -1,9 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info < (3, 8):
|
|
||||||
raise ImportError("MLflow is only supported on Python 3.8+")
|
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Callable, Iterable, Optional, TypeVar, Union
|
from typing import Any, Callable, Iterable, Optional, TypeVar, Union
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
from typing import Any, Counter as CounterType, Dict, Sequence, Tuple, Union
|
from typing import Any, Counter as CounterType, Dict, Sequence, Tuple, Union
|
||||||
|
|
||||||
from mypy_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
|
||||||
class BasicSchemaDescription(TypedDict):
|
class BasicSchemaDescription(TypedDict):
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from freezegun import freeze_time
|
from freezegun import freeze_time
|
||||||
|
|
||||||
from datahub.ingestion.run.pipeline import Pipeline
|
from datahub.ingestion.run.pipeline import Pipeline
|
||||||
@ -8,10 +5,6 @@ from tests.test_helpers import mce_helpers
|
|||||||
|
|
||||||
FROZEN_TIME = "2020-04-14 07:00:00"
|
FROZEN_TIME = "2020-04-14 07:00:00"
|
||||||
|
|
||||||
pytestmark = pytest.mark.skipif(
|
|
||||||
sys.version_info < (3, 8), reason="requires python 3.8 or higher"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
def test_feast_repository_ingest(pytestconfig, tmp_path, mock_time):
|
def test_feast_repository_ingest(pytestconfig, tmp_path, mock_time):
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@ -15,13 +14,7 @@ from tests.test_helpers.state_helpers import (
|
|||||||
validate_all_providers_have_committed_successfully,
|
validate_all_providers_have_committed_successfully,
|
||||||
)
|
)
|
||||||
|
|
||||||
pytestmark = [
|
pytestmark = pytest.mark.integration_batch_1
|
||||||
pytest.mark.integration_batch_1,
|
|
||||||
# Skip tests if not on Python 3.8 or higher.
|
|
||||||
pytest.mark.skipif(
|
|
||||||
sys.version_info < (3, 8), reason="Requires python 3.8 or higher"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
FROZEN_TIME = "2020-04-14 07:00:00"
|
FROZEN_TIME = "2020-04-14 07:00:00"
|
||||||
GMS_PORT = 8080
|
GMS_PORT = 8080
|
||||||
GMS_SERVER = f"http://localhost:{GMS_PORT}"
|
GMS_SERVER = f"http://localhost:{GMS_PORT}"
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 8):
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, TypeVar
|
from typing import Any, Dict, TypeVar
|
||||||
|
|
||||||
@ -12,14 +9,17 @@ if sys.version_info >= (3, 8):
|
|||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tracking_uri(tmp_path: Path) -> str:
|
def tracking_uri(tmp_path: Path) -> str:
|
||||||
return str(tmp_path / "mlruns")
|
return str(tmp_path / "mlruns")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sink_file_path(tmp_path: Path) -> str:
|
def sink_file_path(tmp_path: Path) -> str:
|
||||||
return str(tmp_path / "mlflow_source_mcps.json")
|
return str(tmp_path / "mlflow_source_mcps.json")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def pipeline_config(tracking_uri: str, sink_file_path: str) -> Dict[str, Any]:
|
def pipeline_config(tracking_uri: str, sink_file_path: str) -> Dict[str, Any]:
|
||||||
source_type = "mlflow"
|
source_type = "mlflow"
|
||||||
@ -39,6 +39,7 @@ if sys.version_info >= (3, 8):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def generate_mlflow_data(tracking_uri: str) -> None:
|
def generate_mlflow_data(tracking_uri: str) -> None:
|
||||||
client = MlflowClient(tracking_uri=tracking_uri)
|
client = MlflowClient(tracking_uri=tracking_uri)
|
||||||
@ -80,6 +81,7 @@ if sys.version_info >= (3, 8):
|
|||||||
stage="Archived",
|
stage="Archived",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_ingestion(
|
def test_ingestion(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
mock_time,
|
mock_time,
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -9,10 +8,6 @@ from tests.test_helpers import mce_helpers
|
|||||||
from tests.test_helpers.click_helpers import run_datahub_cmd
|
from tests.test_helpers.click_helpers import run_datahub_cmd
|
||||||
from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
|
from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
|
||||||
|
|
||||||
pytestmark = pytest.mark.skipif(
|
|
||||||
sys.version_info < (3, 8), reason="requires python 3.8 or higher"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def mssql_runner(docker_compose_runner, pytestconfig):
|
def mssql_runner(docker_compose_runner, pytestconfig):
|
||||||
|
|||||||
@ -1,12 +1,9 @@
|
|||||||
import sys
|
|
||||||
import uuid
|
import uuid
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
if sys.version_info >= (3, 8):
|
|
||||||
from pyiceberg.schema import Schema
|
from pyiceberg.schema import Schema
|
||||||
from pyiceberg.types import (
|
from pyiceberg.types import (
|
||||||
BinaryType,
|
BinaryType,
|
||||||
@ -51,9 +48,6 @@ if sys.version_info >= (3, 8):
|
|||||||
TimeTypeClass,
|
TimeTypeClass,
|
||||||
)
|
)
|
||||||
|
|
||||||
pytestmark = pytest.mark.skipif(
|
|
||||||
sys.version_info < (3, 8), reason="requires python 3.8 or higher"
|
|
||||||
)
|
|
||||||
|
|
||||||
def with_iceberg_source() -> IcebergSource:
|
def with_iceberg_source() -> IcebergSource:
|
||||||
catalog: IcebergCatalogConfig = IcebergCatalogConfig(
|
catalog: IcebergCatalogConfig = IcebergCatalogConfig(
|
||||||
@ -64,12 +58,14 @@ if sys.version_info >= (3, 8):
|
|||||||
config=IcebergSourceConfig(catalog=catalog),
|
config=IcebergSourceConfig(catalog=catalog),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def with_iceberg_profiler() -> IcebergProfiler:
|
def with_iceberg_profiler() -> IcebergProfiler:
|
||||||
iceberg_source_instance = with_iceberg_source()
|
iceberg_source_instance = with_iceberg_source()
|
||||||
return IcebergProfiler(
|
return IcebergProfiler(
|
||||||
iceberg_source_instance.report, iceberg_source_instance.config.profiling
|
iceberg_source_instance.report, iceberg_source_instance.config.profiling
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def assert_field(
|
def assert_field(
|
||||||
schema_field: SchemaField,
|
schema_field: SchemaField,
|
||||||
expected_description: Optional[str],
|
expected_description: Optional[str],
|
||||||
@ -86,6 +82,7 @@ if sys.version_info >= (3, 8):
|
|||||||
schema_field.type.type, expected_type
|
schema_field.type.type, expected_type
|
||||||
), f"Field type {schema_field.type.type} is different from expected type {expected_type}"
|
), f"Field type {schema_field.type.type} is different from expected type {expected_type}"
|
||||||
|
|
||||||
|
|
||||||
def test_config_no_catalog():
|
def test_config_no_catalog():
|
||||||
"""
|
"""
|
||||||
Test when no Iceberg catalog is provided.
|
Test when no Iceberg catalog is provided.
|
||||||
@ -93,6 +90,7 @@ if sys.version_info >= (3, 8):
|
|||||||
with pytest.raises(ValidationError, match="catalog"):
|
with pytest.raises(ValidationError, match="catalog"):
|
||||||
IcebergSourceConfig() # type: ignore
|
IcebergSourceConfig() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def test_config_catalog_not_configured():
|
def test_config_catalog_not_configured():
|
||||||
"""
|
"""
|
||||||
Test when an Iceberg catalog is provided, but not properly configured.
|
Test when an Iceberg catalog is provided, but not properly configured.
|
||||||
@ -106,12 +104,14 @@ if sys.version_info >= (3, 8):
|
|||||||
with pytest.raises(ValidationError, match="type"):
|
with pytest.raises(ValidationError, match="type"):
|
||||||
IcebergCatalogConfig(conf={}) # type: ignore
|
IcebergCatalogConfig(conf={}) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def test_config_for_tests():
|
def test_config_for_tests():
|
||||||
"""
|
"""
|
||||||
Test valid iceberg source that will be used in unit tests.
|
Test valid iceberg source that will be used in unit tests.
|
||||||
"""
|
"""
|
||||||
with_iceberg_source()
|
with_iceberg_source()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"iceberg_type, expected_schema_field_type",
|
"iceberg_type, expected_schema_field_type",
|
||||||
[
|
[
|
||||||
@ -159,9 +159,7 @@ if sys.version_info >= (3, 8):
|
|||||||
),
|
),
|
||||||
]:
|
]:
|
||||||
schema = Schema(column)
|
schema = Schema(column)
|
||||||
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
|
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
|
||||||
schema
|
|
||||||
)
|
|
||||||
assert (
|
assert (
|
||||||
len(schema_fields) == 1
|
len(schema_fields) == 1
|
||||||
), f"Expected 1 field, but got {len(schema_fields)}"
|
), f"Expected 1 field, but got {len(schema_fields)}"
|
||||||
@ -172,6 +170,7 @@ if sys.version_info >= (3, 8):
|
|||||||
expected_schema_field_type,
|
expected_schema_field_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"iceberg_type, expected_array_nested_type",
|
"iceberg_type, expected_array_nested_type",
|
||||||
[
|
[
|
||||||
@ -241,9 +240,7 @@ if sys.version_info >= (3, 8):
|
|||||||
]:
|
]:
|
||||||
iceberg_source_instance = with_iceberg_source()
|
iceberg_source_instance = with_iceberg_source()
|
||||||
schema = Schema(list_column)
|
schema = Schema(list_column)
|
||||||
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
|
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
|
||||||
schema
|
|
||||||
)
|
|
||||||
assert (
|
assert (
|
||||||
len(schema_fields) == 1
|
len(schema_fields) == 1
|
||||||
), f"Expected 1 field, but got {len(schema_fields)}"
|
), f"Expected 1 field, but got {len(schema_fields)}"
|
||||||
@ -258,6 +255,7 @@ if sys.version_info >= (3, 8):
|
|||||||
expected_array_nested_type
|
expected_array_nested_type
|
||||||
], f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
|
], f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"iceberg_type, expected_map_type",
|
"iceberg_type, expected_map_type",
|
||||||
[
|
[
|
||||||
@ -327,9 +325,7 @@ if sys.version_info >= (3, 8):
|
|||||||
]:
|
]:
|
||||||
iceberg_source_instance = with_iceberg_source()
|
iceberg_source_instance = with_iceberg_source()
|
||||||
schema = Schema(map_column)
|
schema = Schema(map_column)
|
||||||
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
|
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
|
||||||
schema
|
|
||||||
)
|
|
||||||
# Converting an Iceberg Map type will be done by creating an array of struct(key, value) records.
|
# Converting an Iceberg Map type will be done by creating an array of struct(key, value) records.
|
||||||
# The first field will be the array.
|
# The first field will be the array.
|
||||||
assert (
|
assert (
|
||||||
@ -350,6 +346,7 @@ if sys.version_info >= (3, 8):
|
|||||||
expected_map_type,
|
expected_map_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"iceberg_type, expected_schema_field_type",
|
"iceberg_type, expected_schema_field_type",
|
||||||
[
|
[
|
||||||
@ -394,9 +391,7 @@ if sys.version_info >= (3, 8):
|
|||||||
iceberg_source_instance = with_iceberg_source()
|
iceberg_source_instance = with_iceberg_source()
|
||||||
schema = Schema(struct_column)
|
schema = Schema(struct_column)
|
||||||
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
|
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
|
||||||
assert (
|
assert len(schema_fields) == 2, f"Expected 2 fields, but got {len(schema_fields)}"
|
||||||
len(schema_fields) == 2
|
|
||||||
), f"Expected 2 fields, but got {len(schema_fields)}"
|
|
||||||
assert_field(
|
assert_field(
|
||||||
schema_fields[0], struct_column.doc, struct_column.optional, RecordTypeClass
|
schema_fields[0], struct_column.doc, struct_column.optional, RecordTypeClass
|
||||||
)
|
)
|
||||||
@ -404,6 +399,7 @@ if sys.version_info >= (3, 8):
|
|||||||
schema_fields[1], field1.doc, field1.optional, expected_schema_field_type
|
schema_fields[1], field1.doc, field1.optional, expected_schema_field_type
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"value_type, value, expected_value",
|
"value_type, value, expected_value",
|
||||||
[
|
[
|
||||||
@ -444,6 +440,7 @@ if sys.version_info >= (3, 8):
|
|||||||
== expected_value
|
== expected_value
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_avro_decimal_bytes_nullable() -> None:
|
def test_avro_decimal_bytes_nullable() -> None:
|
||||||
"""
|
"""
|
||||||
The following test exposes a problem with decimal (bytes) not preserving extra attributes like _nullable. Decimal (fixed) and Boolean for example do.
|
The following test exposes a problem with decimal (bytes) not preserving extra attributes like _nullable. Decimal (fixed) and Boolean for example do.
|
||||||
@ -457,9 +454,7 @@ if sys.version_info >= (3, 8):
|
|||||||
print(
|
print(
|
||||||
f"Original avro schema string: {decimal_avro_schema_string}"
|
f"Original avro schema string: {decimal_avro_schema_string}"
|
||||||
)
|
)
|
||||||
print(
|
print(f"After avro parsing, _nullable attribute is missing: {decimal_avro_schema}")
|
||||||
f"After avro parsing, _nullable attribute is missing: {decimal_avro_schema}"
|
|
||||||
)
|
|
||||||
|
|
||||||
decimal_fixed_avro_schema_string = """{"type": "record", "name": "__struct_", "fields": [{"type": {"type": "fixed", "logicalType": "decimal", "precision": 3, "scale": 2, "native_data_type": "decimal(3, 2)", "_nullable": false, "name": "bogusName", "size": 16}, "name": "required_field", "doc": "required field documentation"}]}"""
|
decimal_fixed_avro_schema_string = """{"type": "record", "name": "__struct_", "fields": [{"type": {"type": "fixed", "logicalType": "decimal", "precision": 3, "scale": 2, "native_data_type": "decimal(3, 2)", "_nullable": false, "name": "bogusName", "size": 16}, "name": "required_field", "doc": "required field documentation"}]}"""
|
||||||
decimal_fixed_avro_schema = avro.schema.parse(decimal_fixed_avro_schema_string)
|
decimal_fixed_avro_schema = avro.schema.parse(decimal_fixed_avro_schema_string)
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 8):
|
|
||||||
import datetime
|
import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, TypeVar, Union
|
from typing import Any, TypeVar, Union
|
||||||
@ -16,10 +13,12 @@ if sys.version_info >= (3, 8):
|
|||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tracking_uri(tmp_path: Path) -> str:
|
def tracking_uri(tmp_path: Path) -> str:
|
||||||
return str(tmp_path / "mlruns")
|
return str(tmp_path / "mlruns")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def source(tracking_uri: str) -> MLflowSource:
|
def source(tracking_uri: str) -> MLflowSource:
|
||||||
return MLflowSource(
|
return MLflowSource(
|
||||||
@ -27,11 +26,13 @@ if sys.version_info >= (3, 8):
|
|||||||
config=MLflowConfig(tracking_uri=tracking_uri),
|
config=MLflowConfig(tracking_uri=tracking_uri),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def registered_model(source: MLflowSource) -> RegisteredModel:
|
def registered_model(source: MLflowSource) -> RegisteredModel:
|
||||||
model_name = "abc"
|
model_name = "abc"
|
||||||
return RegisteredModel(name=model_name)
|
return RegisteredModel(name=model_name)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def model_version(
|
def model_version(
|
||||||
source: MLflowSource,
|
source: MLflowSource,
|
||||||
@ -44,6 +45,7 @@ if sys.version_info >= (3, 8):
|
|||||||
creation_timestamp=datetime.datetime.now(),
|
creation_timestamp=datetime.datetime.now(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
|
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
|
||||||
dummy_pages = dict(
|
dummy_pages = dict(
|
||||||
page_1=PagedList(items=["a", "b"], token="page_2"),
|
page_1=PagedList(items=["a", "b"], token="page_2"),
|
||||||
@ -61,6 +63,7 @@ if sys.version_info >= (3, 8):
|
|||||||
)
|
)
|
||||||
return page_to_return
|
return page_to_return
|
||||||
|
|
||||||
|
|
||||||
def test_stages(source):
|
def test_stages(source):
|
||||||
mlflow_registered_model_stages = {
|
mlflow_registered_model_stages = {
|
||||||
"Production",
|
"Production",
|
||||||
@ -76,6 +79,7 @@ if sys.version_info >= (3, 8):
|
|||||||
"mlflow_" + str(stage).lower() for stage in mlflow_registered_model_stages
|
"mlflow_" + str(stage).lower() for stage in mlflow_registered_model_stages
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_config_model_name_separator(source, model_version):
|
def test_config_model_name_separator(source, model_version):
|
||||||
name_version_sep = "+"
|
name_version_sep = "+"
|
||||||
source.config.model_name_separator = name_version_sep
|
source.config.model_name_separator = name_version_sep
|
||||||
@ -88,6 +92,7 @@ if sys.version_info >= (3, 8):
|
|||||||
|
|
||||||
assert urn == expected_urn
|
assert urn == expected_urn
|
||||||
|
|
||||||
|
|
||||||
def test_model_without_run(source, registered_model, model_version):
|
def test_model_without_run(source, registered_model, model_version):
|
||||||
run = source._get_mlflow_run(model_version)
|
run = source._get_mlflow_run(model_version)
|
||||||
wu = source._get_ml_model_properties_workunit(
|
wu = source._get_ml_model_properties_workunit(
|
||||||
@ -100,6 +105,7 @@ if sys.version_info >= (3, 8):
|
|||||||
assert aspect.hyperParams is None
|
assert aspect.hyperParams is None
|
||||||
assert aspect.trainingMetrics is None
|
assert aspect.trainingMetrics is None
|
||||||
|
|
||||||
|
|
||||||
def test_traverse_mlflow_search_func(source):
|
def test_traverse_mlflow_search_func(source):
|
||||||
expected_items = ["a", "b", "c", "d", "e"]
|
expected_items = ["a", "b", "c", "d", "e"]
|
||||||
|
|
||||||
@ -107,15 +113,15 @@ if sys.version_info >= (3, 8):
|
|||||||
|
|
||||||
assert items == expected_items
|
assert items == expected_items
|
||||||
|
|
||||||
|
|
||||||
def test_traverse_mlflow_search_func_with_kwargs(source):
|
def test_traverse_mlflow_search_func_with_kwargs(source):
|
||||||
expected_items = ["A", "B", "C", "D", "E"]
|
expected_items = ["A", "B", "C", "D", "E"]
|
||||||
|
|
||||||
items = list(
|
items = list(source._traverse_mlflow_search_func(dummy_search_func, case="upper"))
|
||||||
source._traverse_mlflow_search_func(dummy_search_func, case="upper")
|
|
||||||
)
|
|
||||||
|
|
||||||
assert items == expected_items
|
assert items == expected_items
|
||||||
|
|
||||||
|
|
||||||
def test_make_external_link_local(source, model_version):
|
def test_make_external_link_local(source, model_version):
|
||||||
expected_url = None
|
expected_url = None
|
||||||
|
|
||||||
@ -123,6 +129,7 @@ if sys.version_info >= (3, 8):
|
|||||||
|
|
||||||
assert url == expected_url
|
assert url == expected_url
|
||||||
|
|
||||||
|
|
||||||
def test_make_external_link_remote(source, model_version):
|
def test_make_external_link_remote(source, model_version):
|
||||||
tracking_uri_remote = "https://dummy-mlflow-tracking-server.org"
|
tracking_uri_remote = "https://dummy-mlflow-tracking-server.org"
|
||||||
source.client = MlflowClient(tracking_uri=tracking_uri_remote)
|
source.client = MlflowClient(tracking_uri=tracking_uri_remote)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user