refactor(ingest/tableau): mark the fetch_size configuration as deprecated (#12126)

This commit is contained in:
sid-acryl 2024-12-21 00:36:57 +05:30 committed by GitHub
parent e52a4deba8
commit 98c056d569
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 7 deletions

View File

@ -49,6 +49,7 @@ from datahub.configuration.source_common import (
DatasetSourceConfigMixin,
)
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import (
ContainerKey,
@ -380,11 +381,6 @@ class TableauConfig(
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
)
fetch_size: int = Field(
default=250,
description="Specifies the number of records to retrieve in each batch during a query execution.",
)
# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
# returns warnings like this:
# {
@ -499,6 +495,10 @@ class TableauConfig(
"This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
)
_fetch_size = pydantic_removed_field(
"fetch_size",
)
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
@root_validator(pre=True)
def projects_backward_compatibility(cls, values: Dict) -> Dict:
@ -1147,7 +1147,7 @@ class TableauSiteSource:
connection_type: str,
query_filter: str,
current_cursor: Optional[str],
fetch_size: int = 250,
fetch_size: int,
retry_on_auth_error: bool = True,
retries_remaining: Optional[int] = None,
) -> Tuple[dict, Optional[str], int]:
@ -1344,7 +1344,11 @@ class TableauSiteSource:
connection_type=connection_type,
query_filter=filter_,
current_cursor=current_cursor,
fetch_size=self.config.fetch_size,
# `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.).
# The number of IDs is always less than or equal to page_size.
# If the IDs are primary keys, the number of metadata objects to load matches the number of records to return.
# In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size.
fetch_size=page_size,
)
yield from connection_objects.get(c.NODES) or []

View File

@ -1324,6 +1324,7 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
query_filter=mock.MagicMock(),
current_cursor=None,
retries_remaining=1,
fetch_size=10,
)
warnings = list(reporter.warnings)