fix(ingest): update capability modifiers as per telemetry (#14206)

This commit is contained in:
Aseem Bansal 2025-07-24 19:41:09 +05:30 committed by GitHub
parent 08c587d065
commit 7ec30652b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 149 additions and 10 deletions

View File

@ -23,7 +23,8 @@
"Bash(yarn test:*)", "Bash(yarn test:*)",
"Bash(yarn generate:*)", "Bash(yarn generate:*)",
"Bash(./gradlew :datahub-web-react:yarnLintFix)", "Bash(./gradlew :datahub-web-react:yarnLintFix)",
"Bash(./gradlew :datahub-web-react:yarnLint)" "Bash(./gradlew :datahub-web-react:yarnLint)",
"Bash(./gradlew :metadata-ingestion:capabilitySummary)"
], ],
"deny": [] "deny": []
} }

View File

@ -1,5 +1,5 @@
{ {
"generated_at": "2025-07-14T09:20:09.632850+00:00", "generated_at": "2025-07-24T13:24:05.751563+00:00",
"generated_by": "metadata-ingestion/scripts/capability_summary.py", "generated_by": "metadata-ingestion/scripts/capability_summary.py",
"plugin_details": { "plugin_details": {
"abs": { "abs": {
@ -49,6 +49,7 @@
"capability": "LINEAGE_FINE", "capability": "LINEAGE_FINE",
"description": "Supported for S3 tables", "description": "Supported for S3 tables",
"subtype_modifier": [ "subtype_modifier": [
"View",
"Table" "Table"
], ],
"supported": true "supported": true
@ -95,6 +96,7 @@
"capability": "LINEAGE_COARSE", "capability": "LINEAGE_COARSE",
"description": "Supported for S3 tables", "description": "Supported for S3 tables",
"subtype_modifier": [ "subtype_modifier": [
"View",
"Table" "Table"
], ],
"supported": true "supported": true
@ -305,7 +307,8 @@
"capability": "LINEAGE_COARSE", "capability": "LINEAGE_COARSE",
"description": "Enabled by default to get lineage for views via `include_view_lineage`", "description": "Enabled by default to get lineage for views via `include_view_lineage`",
"subtype_modifier": [ "subtype_modifier": [
"View" "View",
"Table"
], ],
"supported": true "supported": true
}, },
@ -462,6 +465,12 @@
}, },
"datahub": { "datahub": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"supported": true
},
{ {
"capability": "DELETION_DETECTION", "capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion", "description": "Enabled by default via stateful ingestion",
@ -595,6 +604,12 @@
}, },
"delta-lake": { "delta-lake": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"supported": true
},
{ {
"capability": "DELETION_DETECTION", "capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion", "description": "Enabled by default via stateful ingestion",
@ -918,6 +933,14 @@
}, },
"glue": { "glue": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"Database"
],
"supported": true
},
{ {
"capability": "LINEAGE_FINE", "capability": "LINEAGE_FINE",
"description": "Support via the `emit_s3_lineage` config field", "description": "Support via the `emit_s3_lineage` config field",
@ -1057,6 +1080,14 @@
"subtype_modifier": null, "subtype_modifier": null,
"supported": true "supported": true
}, },
{
"capability": "USAGE_STATS",
"description": "Supported by default",
"subtype_modifier": [
"Project"
],
"supported": true
},
{ {
"capability": "DESCRIPTIONS", "capability": "DESCRIPTIONS",
"description": "Supported by default", "description": "Supported by default",
@ -1433,6 +1464,15 @@
}, },
"looker": { "looker": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"LookML Model",
"Folder"
],
"supported": true
},
{ {
"capability": "LINEAGE_FINE", "capability": "LINEAGE_FINE",
"description": "Enabled by default, configured using `extract_column_level_lineage`", "description": "Enabled by default, configured using `extract_column_level_lineage`",
@ -1489,6 +1529,14 @@
}, },
"lookml": { "lookml": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"LookML Project"
],
"supported": true
},
{ {
"capability": "LINEAGE_FINE", "capability": "LINEAGE_FINE",
"description": "Enabled by default, configured using `extract_column_level_lineage`", "description": "Enabled by default, configured using `extract_column_level_lineage`",
@ -1712,6 +1760,14 @@
}, },
"mongodb": { "mongodb": {
"capabilities": [ "capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"Database"
],
"supported": true
},
{ {
"capability": "DELETION_DETECTION", "capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion", "description": "Enabled by default via stateful ingestion",
@ -2511,7 +2567,9 @@
{ {
"capability": "CONTAINERS", "capability": "CONTAINERS",
"description": "Enabled by default", "description": "Enabled by default",
"subtype_modifier": null, "subtype_modifier": [
"Database"
],
"supported": true "supported": true
}, },
{ {
@ -2591,7 +2649,10 @@
{ {
"capability": "CONTAINERS", "capability": "CONTAINERS",
"description": "Enabled by default", "description": "Enabled by default",
"subtype_modifier": null, "subtype_modifier": [
"Folder",
"S3 bucket"
],
"supported": true "supported": true
}, },
{ {

View File

@ -75,6 +75,7 @@ from datahub.ingestion.source.aws.tag_entities import (
from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes, DatasetContainerSubTypes,
DatasetSubTypes, DatasetSubTypes,
SourceCapabilityModifier,
) )
from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig
from datahub.ingestion.source.state.stale_entity_removal_handler import ( from datahub.ingestion.source.state.stale_entity_removal_handler import (
@ -275,6 +276,13 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
@capability( @capability(
SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field" SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field"
) )
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
class GlueSource(StatefulIngestionSourceBase): class GlueSource(StatefulIngestionSourceBase):
""" """
Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub. Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.

View File

@ -6,7 +6,9 @@ from typing import Dict, Iterable, List, Optional
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import ( from datahub.ingestion.api.decorators import (
SourceCapability,
SupportStatus, SupportStatus,
capability,
config_class, config_class,
platform_name, platform_name,
support_status, support_status,
@ -37,6 +39,7 @@ logger = logging.getLogger(__name__)
@platform_name("DataHub") @platform_name("DataHub")
@config_class(DataHubSourceConfig) @config_class(DataHubSourceConfig)
@support_status(SupportStatus.TESTING) @support_status(SupportStatus.TESTING)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
class DataHubSource(StatefulIngestionSourceBase): class DataHubSource(StatefulIngestionSourceBase):
platform: str = "datahub" platform: str = "datahub"

View File

@ -85,6 +85,7 @@ OPERATION_STATEMENT_TYPES = {
@config_class(DeltaLakeSourceConfig) @config_class(DeltaLakeSourceConfig)
@support_status(SupportStatus.INCUBATING) @support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled") @capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
class DeltaLakeSource(StatefulIngestionSourceBase): class DeltaLakeSource(StatefulIngestionSourceBase):
""" """
This plugin extracts: This plugin extracts:

View File

@ -22,6 +22,7 @@ from datahub.ingestion.api.decorators import (
) )
from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.hex.api import HexApi, HexApiReport from datahub.ingestion.source.hex.api import HexApi, HexApiReport
from datahub.ingestion.source.hex.constants import ( from datahub.ingestion.source.hex.constants import (
DATAHUB_API_PAGE_SIZE_DEFAULT, DATAHUB_API_PAGE_SIZE_DEFAULT,
@ -179,6 +180,13 @@ class HexReport(
@capability(SourceCapability.OWNERSHIP, "Supported by default") @capability(SourceCapability.OWNERSHIP, "Supported by default")
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.USAGE_STATS,
"Supported by default",
subtype_modifier=[
SourceCapabilityModifier.HEX_PROJECT,
],
)
class HexSource(StatefulIngestionSourceBase): class HexSource(StatefulIngestionSourceBase):
def __init__(self, config: HexSourceConfig, ctx: PipelineContext): def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
super().__init__(config, ctx) super().__init__(config, ctx)

View File

@ -51,6 +51,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.common.subtypes import (
BIAssetSubTypes, BIAssetSubTypes,
BIContainerSubTypes, BIContainerSubTypes,
SourceCapabilityModifier,
) )
from datahub.ingestion.source.looker import looker_usage from datahub.ingestion.source.looker import looker_usage
from datahub.ingestion.source.looker.looker_common import ( from datahub.ingestion.source.looker.looker_common import (
@ -127,6 +128,14 @@ logger = logging.getLogger(__name__)
"Enabled by default, configured using `extract_usage_history`", "Enabled by default, configured using `extract_usage_history`",
) )
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default") @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.LOOKML_MODEL,
SourceCapabilityModifier.LOOKER_FOLDER,
],
)
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
""" """
This plugin extracts the following: This plugin extracts the following:

View File

@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.common.subtypes import (
BIContainerSubTypes, BIContainerSubTypes,
DatasetSubTypes, DatasetSubTypes,
SourceCapabilityModifier,
) )
from datahub.ingestion.source.git.git_import import GitClone from datahub.ingestion.source.git.git_import import GitClone
from datahub.ingestion.source.looker.looker_common import ( from datahub.ingestion.source.looker.looker_common import (
@ -273,6 +274,13 @@ class LookerManifest:
SourceCapability.LINEAGE_FINE, SourceCapability.LINEAGE_FINE,
"Enabled by default, configured using `extract_column_level_lineage`", "Enabled by default, configured using `extract_column_level_lineage`",
) )
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.LOOKML_PROJECT,
],
)
class LookMLSource(StatefulIngestionSourceBase): class LookMLSource(StatefulIngestionSourceBase):
""" """
This plugin extracts the following: This plugin extracts the following:

View File

@ -36,7 +36,10 @@ from datahub.ingestion.api.decorators import (
) )
from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.schema_inference.object import ( from datahub.ingestion.source.schema_inference.object import (
SchemaDescription, SchemaDescription,
construct_schema, construct_schema,
@ -249,6 +252,13 @@ def construct_schema_pymongo(
@support_status(SupportStatus.CERTIFIED) @support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
@dataclass @dataclass
class MongoDBSource(StatefulIngestionSourceBase): class MongoDBSource(StatefulIngestionSourceBase):
""" """

View File

@ -47,6 +47,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes, DatasetContainerSubTypes,
DatasetSubTypes, DatasetSubTypes,
SourceCapabilityModifier,
) )
from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.config import RedshiftConfig
from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper
@ -126,7 +127,13 @@ logger: logging.Logger = logging.getLogger(__name__)
@platform_name("Redshift") @platform_name("Redshift")
@config_class(RedshiftConfig) @config_class(RedshiftConfig)
@support_status(SupportStatus.CERTIFIED) @support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")

View File

@ -41,6 +41,7 @@ from datahub.ingestion.source.aws.s3_util import (
get_key_prefix, get_key_prefix,
strip_s3_prefix, strip_s3_prefix,
) )
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.data_lake_common.data_lake_utils import ( from datahub.ingestion.source.data_lake_common.data_lake_utils import (
ContainerWUCreator, ContainerWUCreator,
add_partition_columns_to_schema, add_partition_columns_to_schema,
@ -196,7 +197,14 @@ class TableData:
@platform_name("S3 / Local Files", id="s3") @platform_name("S3 / Local Files", id="s3")
@config_class(DataLakeSourceConfig) @config_class(DataLakeSourceConfig)
@support_status(SupportStatus.INCUBATING) @support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.FOLDER,
SourceCapabilityModifier.S3_BUCKET,
],
)
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability( @capability(
SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types" SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types"

View File

@ -347,12 +347,18 @@ class Partitionitem:
@capability( @capability(
SourceCapability.LINEAGE_COARSE, SourceCapability.LINEAGE_COARSE,
"Supported for S3 tables", "Supported for S3 tables",
subtype_modifier=[SourceCapabilityModifier.TABLE], subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
) )
@capability( @capability(
SourceCapability.LINEAGE_FINE, SourceCapability.LINEAGE_FINE,
"Supported for S3 tables", "Supported for S3 tables",
subtype_modifier=[SourceCapabilityModifier.TABLE], subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
) )
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
class AthenaSource(SQLAlchemySource): class AthenaSource(SQLAlchemySource):

View File

@ -32,6 +32,7 @@ from datahub.ingestion.api.decorators import (
support_status, support_status,
) )
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.sql.sql_common import ( from datahub.ingestion.source.sql.sql_common import (
SqlWorkUnit, SqlWorkUnit,
logger, logger,
@ -383,6 +384,14 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion" SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
) )
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(
SourceCapability.LINEAGE_COARSE,
"Enabled by default to get lineage for views via `include_view_lineage`",
subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
)
class ClickHouseSource(TwoTierSQLAlchemySource): class ClickHouseSource(TwoTierSQLAlchemySource):
""" """
This plugin extracts the following: This plugin extracts the following: