fix(ingest): update capability modifiers as per telemetry (#14206)

This commit is contained in:
Aseem Bansal 2025-07-24 19:41:09 +05:30 committed by GitHub
parent 08c587d065
commit 7ec30652b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 149 additions and 10 deletions

View File

@ -23,7 +23,8 @@
"Bash(yarn test:*)",
"Bash(yarn generate:*)",
"Bash(./gradlew :datahub-web-react:yarnLintFix)",
"Bash(./gradlew :datahub-web-react:yarnLint)"
"Bash(./gradlew :datahub-web-react:yarnLint)",
"Bash(./gradlew :metadata-ingestion:capabilitySummary)"
],
"deny": []
}

View File

@ -1,5 +1,5 @@
{
"generated_at": "2025-07-14T09:20:09.632850+00:00",
"generated_at": "2025-07-24T13:24:05.751563+00:00",
"generated_by": "metadata-ingestion/scripts/capability_summary.py",
"plugin_details": {
"abs": {
@ -49,6 +49,7 @@
"capability": "LINEAGE_FINE",
"description": "Supported for S3 tables",
"subtype_modifier": [
"View",
"Table"
],
"supported": true
@ -95,6 +96,7 @@
"capability": "LINEAGE_COARSE",
"description": "Supported for S3 tables",
"subtype_modifier": [
"View",
"Table"
],
"supported": true
@ -305,7 +307,8 @@
"capability": "LINEAGE_COARSE",
"description": "Enabled by default to get lineage for views via `include_view_lineage`",
"subtype_modifier": [
"View"
"View",
"Table"
],
"supported": true
},
@ -462,6 +465,12 @@
},
"datahub": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"supported": true
},
{
"capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion",
@ -595,6 +604,12 @@
},
"delta-lake": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"supported": true
},
{
"capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion",
@ -918,6 +933,14 @@
},
"glue": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"Database"
],
"supported": true
},
{
"capability": "LINEAGE_FINE",
"description": "Support via the `emit_s3_lineage` config field",
@ -1057,6 +1080,14 @@
"subtype_modifier": null,
"supported": true
},
{
"capability": "USAGE_STATS",
"description": "Supported by default",
"subtype_modifier": [
"Project"
],
"supported": true
},
{
"capability": "DESCRIPTIONS",
"description": "Supported by default",
@ -1433,6 +1464,15 @@
},
"looker": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"LookML Model",
"Folder"
],
"supported": true
},
{
"capability": "LINEAGE_FINE",
"description": "Enabled by default, configured using `extract_column_level_lineage`",
@ -1489,6 +1529,14 @@
},
"lookml": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"LookML Project"
],
"supported": true
},
{
"capability": "LINEAGE_FINE",
"description": "Enabled by default, configured using `extract_column_level_lineage`",
@ -1712,6 +1760,14 @@
},
"mongodb": {
"capabilities": [
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": [
"Database"
],
"supported": true
},
{
"capability": "DELETION_DETECTION",
"description": "Enabled by default via stateful ingestion",
@ -2511,7 +2567,9 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Database"
],
"supported": true
},
{
@ -2591,7 +2649,10 @@
{
"capability": "CONTAINERS",
"description": "Enabled by default",
"subtype_modifier": null,
"subtype_modifier": [
"Folder",
"S3 bucket"
],
"supported": true
},
{

View File

@ -75,6 +75,7 @@ from datahub.ingestion.source.aws.tag_entities import (
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig
from datahub.ingestion.source.state.stale_entity_removal_handler import (
@ -275,6 +276,13 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
@capability(
SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field"
)
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
class GlueSource(StatefulIngestionSourceBase):
"""
Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.

View File

@ -6,7 +6,9 @@ from typing import Dict, Iterable, List, Optional
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SourceCapability,
SupportStatus,
capability,
config_class,
platform_name,
support_status,
@ -37,6 +39,7 @@ logger = logging.getLogger(__name__)
@platform_name("DataHub")
@config_class(DataHubSourceConfig)
@support_status(SupportStatus.TESTING)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
class DataHubSource(StatefulIngestionSourceBase):
platform: str = "datahub"

View File

@ -85,6 +85,7 @@ OPERATION_STATEMENT_TYPES = {
@config_class(DeltaLakeSourceConfig)
@support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
class DeltaLakeSource(StatefulIngestionSourceBase):
"""
This plugin extracts:

View File

@ -22,6 +22,7 @@ from datahub.ingestion.api.decorators import (
)
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.hex.api import HexApi, HexApiReport
from datahub.ingestion.source.hex.constants import (
DATAHUB_API_PAGE_SIZE_DEFAULT,
@ -179,6 +180,13 @@ class HexReport(
@capability(SourceCapability.OWNERSHIP, "Supported by default")
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.USAGE_STATS,
"Supported by default",
subtype_modifier=[
SourceCapabilityModifier.HEX_PROJECT,
],
)
class HexSource(StatefulIngestionSourceBase):
def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
super().__init__(config, ctx)

View File

@ -51,6 +51,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import (
BIAssetSubTypes,
BIContainerSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.looker import looker_usage
from datahub.ingestion.source.looker.looker_common import (
@ -127,6 +128,14 @@ logger = logging.getLogger(__name__)
"Enabled by default, configured using `extract_usage_history`",
)
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.LOOKML_MODEL,
SourceCapabilityModifier.LOOKER_FOLDER,
],
)
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
"""
This plugin extracts the following:

View File

@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import (
BIContainerSubTypes,
DatasetSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.git.git_import import GitClone
from datahub.ingestion.source.looker.looker_common import (
@ -273,6 +274,13 @@ class LookerManifest:
SourceCapability.LINEAGE_FINE,
"Enabled by default, configured using `extract_column_level_lineage`",
)
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.LOOKML_PROJECT,
],
)
class LookMLSource(StatefulIngestionSourceBase):
"""
This plugin extracts the following:

View File

@ -36,7 +36,10 @@ from datahub.ingestion.api.decorators import (
)
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.schema_inference.object import (
SchemaDescription,
construct_schema,
@ -249,6 +252,13 @@ def construct_schema_pymongo(
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
@dataclass
class MongoDBSource(StatefulIngestionSourceBase):
"""

View File

@ -47,6 +47,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
SourceCapabilityModifier,
)
from datahub.ingestion.source.redshift.config import RedshiftConfig
from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper
@ -126,7 +127,13 @@ logger: logging.Logger = logging.getLogger(__name__)
@platform_name("Redshift")
@config_class(RedshiftConfig)
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.DATABASE,
],
)
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")

View File

@ -41,6 +41,7 @@ from datahub.ingestion.source.aws.s3_util import (
get_key_prefix,
strip_s3_prefix,
)
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.data_lake_common.data_lake_utils import (
ContainerWUCreator,
add_partition_columns_to_schema,
@ -196,7 +197,14 @@ class TableData:
@platform_name("S3 / Local Files", id="s3")
@config_class(DataLakeSourceConfig)
@support_status(SupportStatus.INCUBATING)
@capability(SourceCapability.CONTAINERS, "Enabled by default")
@capability(
SourceCapability.CONTAINERS,
"Enabled by default",
subtype_modifier=[
SourceCapabilityModifier.FOLDER,
SourceCapabilityModifier.S3_BUCKET,
],
)
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(
SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types"

View File

@ -347,12 +347,18 @@ class Partitionitem:
@capability(
SourceCapability.LINEAGE_COARSE,
"Supported for S3 tables",
subtype_modifier=[SourceCapabilityModifier.TABLE],
subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
)
@capability(
SourceCapability.LINEAGE_FINE,
"Supported for S3 tables",
subtype_modifier=[SourceCapabilityModifier.TABLE],
subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
)
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
class AthenaSource(SQLAlchemySource):

View File

@ -32,6 +32,7 @@ from datahub.ingestion.api.decorators import (
support_status,
)
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
from datahub.ingestion.source.sql.sql_common import (
SqlWorkUnit,
logger,
@ -383,6 +384,14 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
)
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(
SourceCapability.LINEAGE_COARSE,
"Enabled by default to get lineage for views via `include_view_lineage`",
subtype_modifier=[
SourceCapabilityModifier.VIEW,
SourceCapabilityModifier.TABLE,
],
)
class ClickHouseSource(TwoTierSQLAlchemySource):
"""
This plugin extracts the following: