mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-18 12:27:15 +00:00
fix(ingest/delta-lake): skip file count if require_files is false (#11611)
This commit is contained in:
parent
3387110b41
commit
b74ba11d93
@ -223,15 +223,14 @@ class DeltaLakeSource(Source):
|
||||
)
|
||||
|
||||
customProperties = {
|
||||
"number_of_files": str(get_file_count(delta_table)),
|
||||
"partition_columns": str(delta_table.metadata().partition_columns),
|
||||
"table_creation_time": str(delta_table.metadata().created_time),
|
||||
"id": str(delta_table.metadata().id),
|
||||
"version": str(delta_table.version()),
|
||||
"location": self.source_config.complete_path,
|
||||
}
|
||||
if not self.source_config.require_files:
|
||||
del customProperties["number_of_files"] # always 0
|
||||
if self.source_config.require_files:
|
||||
customProperties["number_of_files"] = str(get_file_count(delta_table))
|
||||
|
||||
dataset_properties = DatasetPropertiesClass(
|
||||
description=delta_table.metadata().description,
|
||||
|
@ -1,6 +1,6 @@
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, TypeVar, Union
|
||||
from typing import Any, Union
|
||||
|
||||
import pytest
|
||||
from mlflow import MlflowClient
|
||||
@ -11,8 +11,6 @@ from mlflow.store.entities import PagedList
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tracking_uri(tmp_path: Path) -> str:
|
||||
@ -46,7 +44,7 @@ def model_version(
|
||||
)
|
||||
|
||||
|
||||
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
|
||||
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[str]:
|
||||
dummy_pages = dict(
|
||||
page_1=PagedList(items=["a", "b"], token="page_2"),
|
||||
page_2=PagedList(items=["c", "d"], token="page_3"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user