mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 04:10:43 +00:00
fix(ingest): update capability modifiers as per telemetry (#14206)
This commit is contained in:
parent
08c587d065
commit
7ec30652b6
@ -23,7 +23,8 @@
|
||||
"Bash(yarn test:*)",
|
||||
"Bash(yarn generate:*)",
|
||||
"Bash(./gradlew :datahub-web-react:yarnLintFix)",
|
||||
"Bash(./gradlew :datahub-web-react:yarnLint)"
|
||||
"Bash(./gradlew :datahub-web-react:yarnLint)",
|
||||
"Bash(./gradlew :metadata-ingestion:capabilitySummary)"
|
||||
],
|
||||
"deny": []
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"generated_at": "2025-07-14T09:20:09.632850+00:00",
|
||||
"generated_at": "2025-07-24T13:24:05.751563+00:00",
|
||||
"generated_by": "metadata-ingestion/scripts/capability_summary.py",
|
||||
"plugin_details": {
|
||||
"abs": {
|
||||
@ -49,6 +49,7 @@
|
||||
"capability": "LINEAGE_FINE",
|
||||
"description": "Supported for S3 tables",
|
||||
"subtype_modifier": [
|
||||
"View",
|
||||
"Table"
|
||||
],
|
||||
"supported": true
|
||||
@ -95,6 +96,7 @@
|
||||
"capability": "LINEAGE_COARSE",
|
||||
"description": "Supported for S3 tables",
|
||||
"subtype_modifier": [
|
||||
"View",
|
||||
"Table"
|
||||
],
|
||||
"supported": true
|
||||
@ -305,7 +307,8 @@
|
||||
"capability": "LINEAGE_COARSE",
|
||||
"description": "Enabled by default to get lineage for views via `include_view_lineage`",
|
||||
"subtype_modifier": [
|
||||
"View"
|
||||
"View",
|
||||
"Table"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
@ -462,6 +465,12 @@
|
||||
},
|
||||
"datahub": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": null,
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "DELETION_DETECTION",
|
||||
"description": "Enabled by default via stateful ingestion",
|
||||
@ -595,6 +604,12 @@
|
||||
},
|
||||
"delta-lake": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": null,
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "DELETION_DETECTION",
|
||||
"description": "Enabled by default via stateful ingestion",
|
||||
@ -918,6 +933,14 @@
|
||||
},
|
||||
"glue": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": [
|
||||
"Database"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "LINEAGE_FINE",
|
||||
"description": "Support via the `emit_s3_lineage` config field",
|
||||
@ -1057,6 +1080,14 @@
|
||||
"subtype_modifier": null,
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "USAGE_STATS",
|
||||
"description": "Supported by default",
|
||||
"subtype_modifier": [
|
||||
"Project"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "DESCRIPTIONS",
|
||||
"description": "Supported by default",
|
||||
@ -1433,6 +1464,15 @@
|
||||
},
|
||||
"looker": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": [
|
||||
"LookML Model",
|
||||
"Folder"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "LINEAGE_FINE",
|
||||
"description": "Enabled by default, configured using `extract_column_level_lineage`",
|
||||
@ -1489,6 +1529,14 @@
|
||||
},
|
||||
"lookml": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": [
|
||||
"LookML Project"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "LINEAGE_FINE",
|
||||
"description": "Enabled by default, configured using `extract_column_level_lineage`",
|
||||
@ -1712,6 +1760,14 @@
|
||||
},
|
||||
"mongodb": {
|
||||
"capabilities": [
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": [
|
||||
"Database"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
"capability": "DELETION_DETECTION",
|
||||
"description": "Enabled by default via stateful ingestion",
|
||||
@ -2511,7 +2567,9 @@
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": null,
|
||||
"subtype_modifier": [
|
||||
"Database"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
@ -2591,7 +2649,10 @@
|
||||
{
|
||||
"capability": "CONTAINERS",
|
||||
"description": "Enabled by default",
|
||||
"subtype_modifier": null,
|
||||
"subtype_modifier": [
|
||||
"Folder",
|
||||
"S3 bucket"
|
||||
],
|
||||
"supported": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -75,6 +75,7 @@ from datahub.ingestion.source.aws.tag_entities import (
|
||||
from datahub.ingestion.source.common.subtypes import (
|
||||
DatasetContainerSubTypes,
|
||||
DatasetSubTypes,
|
||||
SourceCapabilityModifier,
|
||||
)
|
||||
from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig
|
||||
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
||||
@ -275,6 +276,13 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
|
||||
@capability(
|
||||
SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field"
|
||||
)
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.DATABASE,
|
||||
],
|
||||
)
|
||||
class GlueSource(StatefulIngestionSourceBase):
|
||||
"""
|
||||
Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
|
||||
|
||||
@ -6,7 +6,9 @@ from typing import Dict, Iterable, List, Optional
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.api.decorators import (
|
||||
SourceCapability,
|
||||
SupportStatus,
|
||||
capability,
|
||||
config_class,
|
||||
platform_name,
|
||||
support_status,
|
||||
@ -37,6 +39,7 @@ logger = logging.getLogger(__name__)
|
||||
@platform_name("DataHub")
|
||||
@config_class(DataHubSourceConfig)
|
||||
@support_status(SupportStatus.TESTING)
|
||||
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
||||
class DataHubSource(StatefulIngestionSourceBase):
|
||||
platform: str = "datahub"
|
||||
|
||||
|
||||
@ -85,6 +85,7 @@ OPERATION_STATEMENT_TYPES = {
|
||||
@config_class(DeltaLakeSourceConfig)
|
||||
@support_status(SupportStatus.INCUBATING)
|
||||
@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
|
||||
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
||||
class DeltaLakeSource(StatefulIngestionSourceBase):
|
||||
"""
|
||||
This plugin extracts:
|
||||
|
||||
@ -22,6 +22,7 @@ from datahub.ingestion.api.decorators import (
|
||||
)
|
||||
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
||||
from datahub.ingestion.source.hex.api import HexApi, HexApiReport
|
||||
from datahub.ingestion.source.hex.constants import (
|
||||
DATAHUB_API_PAGE_SIZE_DEFAULT,
|
||||
@ -179,6 +180,13 @@ class HexReport(
|
||||
@capability(SourceCapability.OWNERSHIP, "Supported by default")
|
||||
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
||||
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
||||
@capability(
|
||||
SourceCapability.USAGE_STATS,
|
||||
"Supported by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.HEX_PROJECT,
|
||||
],
|
||||
)
|
||||
class HexSource(StatefulIngestionSourceBase):
|
||||
def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
|
||||
super().__init__(config, ctx)
|
||||
|
||||
@ -51,6 +51,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import (
|
||||
BIAssetSubTypes,
|
||||
BIContainerSubTypes,
|
||||
SourceCapabilityModifier,
|
||||
)
|
||||
from datahub.ingestion.source.looker import looker_usage
|
||||
from datahub.ingestion.source.looker.looker_common import (
|
||||
@ -127,6 +128,14 @@ logger = logging.getLogger(__name__)
|
||||
"Enabled by default, configured using `extract_usage_history`",
|
||||
)
|
||||
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.LOOKML_MODEL,
|
||||
SourceCapabilityModifier.LOOKER_FOLDER,
|
||||
],
|
||||
)
|
||||
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
"""
|
||||
This plugin extracts the following:
|
||||
|
||||
@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import (
|
||||
BIContainerSubTypes,
|
||||
DatasetSubTypes,
|
||||
SourceCapabilityModifier,
|
||||
)
|
||||
from datahub.ingestion.source.git.git_import import GitClone
|
||||
from datahub.ingestion.source.looker.looker_common import (
|
||||
@ -273,6 +274,13 @@ class LookerManifest:
|
||||
SourceCapability.LINEAGE_FINE,
|
||||
"Enabled by default, configured using `extract_column_level_lineage`",
|
||||
)
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.LOOKML_PROJECT,
|
||||
],
|
||||
)
|
||||
class LookMLSource(StatefulIngestionSourceBase):
|
||||
"""
|
||||
This plugin extracts the following:
|
||||
|
||||
@ -36,7 +36,10 @@ from datahub.ingestion.api.decorators import (
|
||||
)
|
||||
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
|
||||
from datahub.ingestion.source.common.subtypes import (
|
||||
DatasetContainerSubTypes,
|
||||
SourceCapabilityModifier,
|
||||
)
|
||||
from datahub.ingestion.source.schema_inference.object import (
|
||||
SchemaDescription,
|
||||
construct_schema,
|
||||
@ -249,6 +252,13 @@ def construct_schema_pymongo(
|
||||
@support_status(SupportStatus.CERTIFIED)
|
||||
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
||||
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.DATABASE,
|
||||
],
|
||||
)
|
||||
@dataclass
|
||||
class MongoDBSource(StatefulIngestionSourceBase):
|
||||
"""
|
||||
|
||||
@ -47,6 +47,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
|
||||
from datahub.ingestion.source.common.subtypes import (
|
||||
DatasetContainerSubTypes,
|
||||
DatasetSubTypes,
|
||||
SourceCapabilityModifier,
|
||||
)
|
||||
from datahub.ingestion.source.redshift.config import RedshiftConfig
|
||||
from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper
|
||||
@ -126,7 +127,13 @@ logger: logging.Logger = logging.getLogger(__name__)
|
||||
@platform_name("Redshift")
|
||||
@config_class(RedshiftConfig)
|
||||
@support_status(SupportStatus.CERTIFIED)
|
||||
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.DATABASE,
|
||||
],
|
||||
)
|
||||
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
||||
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
||||
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
||||
|
||||
@ -41,6 +41,7 @@ from datahub.ingestion.source.aws.s3_util import (
|
||||
get_key_prefix,
|
||||
strip_s3_prefix,
|
||||
)
|
||||
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
||||
from datahub.ingestion.source.data_lake_common.data_lake_utils import (
|
||||
ContainerWUCreator,
|
||||
add_partition_columns_to_schema,
|
||||
@ -196,7 +197,14 @@ class TableData:
|
||||
@platform_name("S3 / Local Files", id="s3")
|
||||
@config_class(DataLakeSourceConfig)
|
||||
@support_status(SupportStatus.INCUBATING)
|
||||
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
||||
@capability(
|
||||
SourceCapability.CONTAINERS,
|
||||
"Enabled by default",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.FOLDER,
|
||||
SourceCapabilityModifier.S3_BUCKET,
|
||||
],
|
||||
)
|
||||
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
||||
@capability(
|
||||
SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types"
|
||||
|
||||
@ -347,12 +347,18 @@ class Partitionitem:
|
||||
@capability(
|
||||
SourceCapability.LINEAGE_COARSE,
|
||||
"Supported for S3 tables",
|
||||
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.VIEW,
|
||||
SourceCapabilityModifier.TABLE,
|
||||
],
|
||||
)
|
||||
@capability(
|
||||
SourceCapability.LINEAGE_FINE,
|
||||
"Supported for S3 tables",
|
||||
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.VIEW,
|
||||
SourceCapabilityModifier.TABLE,
|
||||
],
|
||||
)
|
||||
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
||||
class AthenaSource(SQLAlchemySource):
|
||||
|
||||
@ -32,6 +32,7 @@ from datahub.ingestion.api.decorators import (
|
||||
support_status,
|
||||
)
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
||||
from datahub.ingestion.source.sql.sql_common import (
|
||||
SqlWorkUnit,
|
||||
logger,
|
||||
@ -383,6 +384,14 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
|
||||
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
||||
)
|
||||
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
||||
@capability(
|
||||
SourceCapability.LINEAGE_COARSE,
|
||||
"Enabled by default to get lineage for views via `include_view_lineage`",
|
||||
subtype_modifier=[
|
||||
SourceCapabilityModifier.VIEW,
|
||||
SourceCapabilityModifier.TABLE,
|
||||
],
|
||||
)
|
||||
class ClickHouseSource(TwoTierSQLAlchemySource):
|
||||
"""
|
||||
This plugin extracts the following:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user