feat(ingest): add subtype capability modifier (#14039)

This commit is contained in:
Aseem Bansal 2025-07-11 14:09:18 +05:30 committed by GitHub
parent 213d98c599
commit c9de0fe36d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 134 additions and 28 deletions

View File

@ -1,5 +1,5 @@
{
"generated_at": "2025-07-07T09:06:40.727786+00:00",
"generated_at": "2025-07-11T05:33:33.512319+00:00",
"generated_by": "metadata-ingestion/scripts/capability_summary.py",
"plugin_details": {
"abs": {
@ -33,7 +33,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -127,7 +130,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Project",
"Dataset"
],
"supported": true
},
{
@ -245,7 +251,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -343,7 +352,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -675,7 +687,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -871,7 +886,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"GCS bucket",
"Folder"
],
"supported": true
},
{
@ -955,7 +973,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1071,7 +1092,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1143,7 +1167,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1497,7 +1524,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1703,7 +1733,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1783,7 +1816,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -1935,7 +1971,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -2001,7 +2040,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -2199,7 +2241,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -2271,7 +2316,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -2677,7 +2725,9 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Sigma Workspace"
],
"supported": true
},
{
@ -2753,7 +2803,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -2859,7 +2912,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -3122,7 +3178,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{
@ -3194,7 +3253,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Catalog",
"Schema"
],
"supported": true
},
{
@ -3288,7 +3350,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database",
"Schema"
],
"supported": true
},
{

View File

@ -45,6 +45,7 @@ from datahub.ingestion.source.bigquery_v2.queries_extractor import (
BigQueryQueriesExtractorConfig,
)
from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
from datahub.ingestion.source.state.redundant_run_skip_handler import (
RedundantLineageRunSkipHandler,
@ -78,7 +79,14 @@ def cleanup(config: BigQueryV2Config) -> None:
supported=False,
)
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.BIGQUERY_PROJECT,
SourceCapabilityModifier.BIGQUERY_DATASET,
],
)
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@capability(
SourceCapability.DATA_PROFILING,

View File

@ -16,6 +16,7 @@ from datahub.ingestion.api.decorators import (
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceCapability
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
from datahub.ingestion.source.data_lake_common.data_lake_utils import PLATFORM_GCS
from datahub.ingestion.source.data_lake_common.object_store import (
@ -82,7 +83,14 @@ class GCSSourceReport(DataLakeSourceReport):
@platform_name("Google Cloud Storage", id=PLATFORM_GCS)
@config_class(GCSSourceConfig)
@support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.GCS_BUCKET,
SourceCapabilityModifier.FOLDER,
],
)
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@capability(SourceCapability.DATA_PROFILING, "Not supported", supported=False)
class GCSSource(StatefulIngestionSourceBase):

View File

@ -30,6 +30,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import (
BIContainerSubTypes,
DatasetSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.sigma.config import (
PlatformDetail,
@ -95,7 +96,11 @@ logger = logging.getLogger(__name__)
@platform_name("Sigma")
@config_class(SigmaSourceConfig)
@support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[SourceCapabilityModifier.SIGMA_WORKSPACE],
)
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default.")
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")

View File

@ -32,6 +32,7 @@ from datahub.ingestion.api.source import (
)
from datahub.ingestion.api.source_helpers import auto_workunit
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.snowflake.constants import (
GENERIC_PERMISSION_ERROR_KEY,
SnowflakeEdition,
@ -97,7 +98,14 @@ logger: logging.Logger = logging.getLogger(__name__)
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
SourceCapabilityModifier.SCHEMA,
],
)
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@capability(
SourceCapability.DATA_PROFILING,

View File

@ -292,6 +292,10 @@ class ProfileMetadata:
SourceCapability.CONTAINERS,
"Enabled by default",
supported=True,
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
SourceCapabilityModifier.SCHEMA,
],
)
@capability(
SourceCapability.DESCRIPTIONS,

View File

@ -56,6 +56,7 @@ from datahub.ingestion.source.aws.s3_util import (
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalHandler,
@ -152,7 +153,14 @@ logger: logging.Logger = logging.getLogger(__name__)
@capability(SourceCapability.USAGE_STATS, "Enabled by default")
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.CATALOG,
SourceCapabilityModifier.SCHEMA,
],
)
@capability(SourceCapability.OWNERSHIP, "Supported via the `include_ownership` config")
@capability(
SourceCapability.DATA_PROFILING, "Supported via the `profiling.enabled` config"