diff --git a/.github/scripts/pre-commit-override.yaml b/.github/scripts/pre-commit-override.yaml index 30f5992762..d5808d2a8f 100644 --- a/.github/scripts/pre-commit-override.yaml +++ b/.github/scripts/pre-commit-override.yaml @@ -7,3 +7,9 @@ repos: language: system files: ^smoke-test/tests/cypress/.*\.tsx$ pass_filenames: false + - id: update-capability-summary + name: update-capability-summary + entry: ./gradlew :metadata-ingestion:capabilitySummary + language: system + files: ^metadata-ingestion/src/datahub/ingestion/source/.*\.py$ + pass_filenames: false diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 9b3baa07d8..776be44886 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -69,9 +69,21 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Install package run: ./gradlew :metadata-ingestion:installPackageOnly - - name: Run lint alongwith testQuick + - name: Check lint and capability_summary.json being up-to-date if: ${{ matrix.command == 'testQuick' }} - run: ./gradlew :metadata-ingestion:lint + run: | + ./gradlew :metadata-ingestion:lint + echo "Lint passed. Checking if capability_summary.json is up-to-date." + ./gradlew :metadata-ingestion:capabilitySummary + # Check if capability summary file has changed + if git diff --quiet metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json; then + echo "✅ Capability summary file is unchanged" + else + echo "❌ Capability summary file has changed. Please commit the updated file." + echo "Changed lines:" + git diff metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json + exit 1 + fi - name: Run metadata-ingestion tests run: ./gradlew :metadata-ingestion:${{ matrix.command }} - name: Debug info diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02da599fb5..f445a1ae4b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Auto-generated by .github/scripts/generate_pre_commit.py at 2025-04-21 19:41:02 UTC +# Auto-generated by .github/scripts/generate_pre_commit.py at 2025-06-27 12:14:33 UTC # Do not edit this file directly. Run the script to regenerate. # Add additional hooks in .github/scripts/pre-commit-override.yaml repos: @@ -493,3 +493,10 @@ repos: language: system files: ^smoke-test/tests/cypress/.*\.tsx$ pass_filenames: false + + - id: update-capability-summary + name: update-capability-summary + entry: ./gradlew :metadata-ingestion:capabilitySummary + language: system + files: ^metadata-ingestion/src/datahub/ingestion/source/.*\.py$ + pass_filenames: false diff --git a/datahub-web-react/.gitignore b/datahub-web-react/.gitignore index a0747696b1..5ff5ce4fac 100644 --- a/datahub-web-react/.gitignore +++ b/datahub-web-react/.gitignore @@ -30,4 +30,5 @@ yarn-error.log* /.vscode .yarn-test-sentinel -.yarn-lint-sentinel \ No newline at end of file +.yarn-lint-sentinel +public/assets/ingestion/** \ No newline at end of file diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index d512ff6d3c..096037fb4b 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -146,6 +146,24 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { outputs.dir('dist') } +task copyCapabilitySummary(type: Copy) { + def sourceFile = file('../metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json') + + if (!sourceFile.exists()) { + // We don't want frontend devs to have to run this task + // But still keeping it here to make sure the dependency is there properly in gradle + dependsOn ':metadata-ingestion:capabilitySummary' + } + + from sourceFile + into 'public/assets/ingestion' + + inputs.file(sourceFile) + outputs.file('public/assets/ingestion/capability_summary.json') +} + +yarnBuild.dependsOn copyCapabilitySummary + // Define a list of configurations for prettier tasks def externalPrettierConfigs = [ [ @@ -201,6 +219,7 @@ clean { delete 'tmp' delete 'just' delete fileTree(dir: 'src', include: '*.generated.ts') + delete 'public/assets/capability_summary.json' } configurations { diff --git a/datahub-web-react/src/app/ingestV2/ManageIngestionPage.tsx b/datahub-web-react/src/app/ingestV2/ManageIngestionPage.tsx index 3ea7174e7a..19d5ddd3e1 100644 --- a/datahub-web-react/src/app/ingestV2/ManageIngestionPage.tsx +++ b/datahub-web-react/src/app/ingestV2/ManageIngestionPage.tsx @@ -18,6 +18,27 @@ import { import { useAppConfig } from '@app/useAppConfig'; import { useShowNavBarRedesign } from '@app/useShowNavBarRedesign'; +interface Capability { + capability: string; + description: string; + supported: boolean; +} + +interface PluginDetails { + capabilities: Capability[]; + classname: string; + platform_id: string; + platform_name: string; + support_status: string; +} + +// this type is based off of the type in metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json +interface CapabilitySummary { + generated_at: string; + generated_by: string; + plugin_details: Record; +} + const PageContainer = styled.div<{ $isShowNavBarRedesign?: boolean }>` padding-top: 20px; background-color: white; @@ -78,9 +99,63 @@ export const ManageIngestionPage = () => { const [showCreateSecretModal, setShowCreateSecretModal] = useState(false); const [hideSystemSources, setHideSystemSources] = useState(true); + const [capabilitySummary, setCapabilitySummary] = useState(null); + const [isCapabilitySummaryLoading, setIsCapabilitySummaryLoading] = useState(true); + const [isCapabilitySummaryError, setIsCapabilitySummaryError] = useState(null); + const history = useHistory(); const shouldPreserveParams = useRef(false); + useEffect(() => { + const fetchCapabilitySummary = async () => { + setIsCapabilitySummaryLoading(true); + try { + const response = await fetch('/assets/ingestion/capability_summary.json'); + if (!response.ok) { + throw new Error(`Failed to fetch capability summary: ${response.status} ${response.statusText}`); + } + const data = await response.json(); + setCapabilitySummary(data); + } catch (error) { + console.error('Error fetching capability summary:', error); + setIsCapabilitySummaryError( + error instanceof Error ? error.message : 'Failed to fetch capability summary', + ); + } finally { + setIsCapabilitySummaryLoading(false); + } + }; + + fetchCapabilitySummary(); + }, []); + + const getPluginCapabilities = (platformId: string): PluginDetails | null => { + if (!capabilitySummary?.plugin_details?.[platformId]) { + return null; + } + return capabilitySummary.plugin_details[platformId]; + }; + const isCapabilitySupported = (platformId: string, capabilityName: string): boolean => { + const capabilities = getPluginCapabilities(platformId)?.capabilities; + if (!capabilities) { + return false; + } + return capabilities?.some((capability) => capability.capability === capabilityName && capability.supported); + }; + + const isProfilingSupported = (platformId: string): boolean => isCapabilitySupported(platformId, 'DATA_PROFILING'); + // const isLineageSupported = (platformId: string): boolean => isCapabilitySupported(platformId, 'LINEAGE_COARSE'); + // const isFineGrainedLineageSupported = (platformId: string): boolean => + // isCapabilitySupported(platformId, 'LINEAGE_FINE'); + // const isUsageStatsSupported = (platformId: string): boolean => isCapabilitySupported(platformId, 'USAGE_STATS'); + + if (!isCapabilitySummaryLoading && !isCapabilitySummaryError) { + console.log( + 'Example to be removed when is actually used for something is profiling support for bigquery', + isProfilingSupported('bigquery'), + ); + } + // defaultTab might not be calculated correctly on mount, if `config` or `me` haven't been loaded yet useEffect(() => { if (loaded && me.loaded && !showIngestionTab && selectedTab === TabType.Sources) { diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 034201260f..b096ea1cd1 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -112,16 +112,16 @@ task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) { task lint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', venv_activate_command + - "ruff check src/ tests/ examples/ && " + - "ruff format --check src/ tests/ examples/ && " + + "ruff check scripts/capability_summary.py src/ tests/ examples/ && " + + "ruff format --check scripts/capability_summary.py src/ tests/ examples/ && " + "mypy --show-traceback --show-error-codes src/ tests/ examples/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', venv_activate_command + - "ruff check --fix src/ tests/ examples/ && " + - "ruff format src/ tests/ examples/ " + "ruff check --fix scripts/capability_summary.py src/ tests/ examples/ && " + + "ruff format scripts/capability_summary.py src/ tests/ examples/ " } def pytest_default_env = "PYTHONDEVMODE=1" @@ -191,7 +191,23 @@ task testFull(type: Exec, dependsOn: [installDevTest]) { task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { commandLine 'bash', '-c', "${venv_activate_command} ./scripts/specgen.sh" } + +task capabilitySummary(type: Exec, dependsOn: [codegen, installDevTest]) { + inputs.files( + file('scripts/capability_summary.py'), + file('scripts/docgen_types.py'), + project.fileTree(dir: "src/datahub/ingestion/source", include: "**/*.py") + ) + commandLine 'bash', '-c', "${venv_activate_command} python scripts/capability_summary.py --output-dir ./src/datahub/ingestion/autogenerated" +} + task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) { + def sourceFile = file('./src/datahub/ingestion/autogenerated/capability_summary.json') + if (!sourceFile.exists()) { + // Doing it like this cuts docGen time from 15 seconds to 9 seconds locally + // This can further reduce if we generate more things in the future + dependsOn capabilitySummary + } commandLine 'bash', '-c', "${venv_activate_command} ./scripts/docgen.sh" } diff --git a/metadata-ingestion/scripts/capability_summary.py b/metadata-ingestion/scripts/capability_summary.py new file mode 100644 index 0000000000..8c34c6e4b1 --- /dev/null +++ b/metadata-ingestion/scripts/capability_summary.py @@ -0,0 +1,234 @@ +import dataclasses +import json +import logging +import pathlib +from datetime import datetime, timezone +from typing import Dict, Optional + +import click +from docgen_types import Plugin + +from datahub.ingestion.api.decorators import SupportStatus +from datahub.ingestion.source.source_registry import source_registry + +logger = logging.getLogger(__name__) + + +DENY_LIST = { + "snowflake-summary", + "snowflake-queries", + "bigquery-queries", +} + + +def load_plugin_capabilities(plugin_name: str) -> Optional[Plugin]: + """Load plugin capabilities without generating full documentation.""" + logger.debug(f"Loading capabilities for {plugin_name}") + + try: + class_or_exception = source_registry._ensure_not_lazy(plugin_name) + if isinstance(class_or_exception, Exception): + # Log the specific error but don't re-raise it + logger.warning(f"Plugin {plugin_name} failed to load: {class_or_exception}") + return None + source_type = source_registry.get(plugin_name) + logger.debug(f"Source class is {source_type}") + + if hasattr(source_type, "get_platform_name"): + platform_name = source_type.get_platform_name() + else: + platform_name = plugin_name.title() + + platform_id = None + if hasattr(source_type, "get_platform_id"): + platform_id = source_type.get_platform_id() + if platform_id is None: + logger.warning(f"Platform ID not found for {plugin_name}") + return None + + plugin = Plugin( + name=plugin_name, + platform_id=platform_id, + platform_name=platform_name, + classname=".".join([source_type.__module__, source_type.__name__]), + ) + + if hasattr(source_type, "get_support_status"): + plugin.support_status = source_type.get_support_status() + + if hasattr(source_type, "get_capabilities"): + capabilities = list(source_type.get_capabilities()) + if capabilities: + capabilities.sort(key=lambda x: x.capability.value) + plugin.capabilities = capabilities + else: + logger.debug(f"No capabilities defined for {plugin_name}") + plugin.capabilities = [] + else: + logger.debug(f"No get_capabilities method for {plugin_name}") + plugin.capabilities = [] + + return plugin + + except Exception as e: + logger.warning(f"Failed to load capabilities for {plugin_name}: {e}") + return None + + +@dataclasses.dataclass +class CapabilitySummary: + """Summary of capabilities across all plugins.""" + + plugin_details: Dict[str, Dict] # plugin_name -> detailed info + + +def generate_capability_summary() -> CapabilitySummary: + """Generate a comprehensive summary of capabilities across all plugins.""" + + plugin_details: Dict[str, Dict] = {} + + for plugin_name in sorted(source_registry.mapping.keys()): + if plugin_name in DENY_LIST: + logger.info(f"Skipping {plugin_name} as it is on the deny list") + continue + + plugin = load_plugin_capabilities(plugin_name) + + if plugin is None: + continue + + plugin_details[plugin_name] = { + "platform_id": plugin.platform_id, + "platform_name": plugin.platform_name, + "classname": plugin.classname, + "support_status": plugin.support_status.name + if plugin.support_status != SupportStatus.UNKNOWN + else None, + "capabilities": [], + } + + if plugin.capabilities: + for cap_setting in plugin.capabilities: + capability_name = cap_setting.capability.name + + plugin_details[plugin_name]["capabilities"].append( + { + "capability": capability_name, + "supported": cap_setting.supported, + "description": cap_setting.description, + } + ) + + return CapabilitySummary( + plugin_details=plugin_details, + ) + + +def save_capability_report(summary: CapabilitySummary, output_dir: str) -> None: + """Save the capability summary as JSON files, but only write if contents have changed.""" + + output_path = pathlib.Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + existing_capabilities = {} + existing_summary_file = pathlib.Path( + "./src/datahub/ingestion/autogenerated/capability_summary.json" + ) + if existing_summary_file.exists(): + try: + with open(existing_summary_file, "r") as f: + existing_data = json.load(f) + existing_capabilities = existing_data.get("plugin_details", {}) + logger.info( + f"Loaded existing capability data for {len(existing_capabilities)} plugins" + ) + except Exception as e: + logger.warning(f"Failed to load existing capability data: {e}") + + missing_plugins = set(existing_capabilities.keys()) - set( + summary.plugin_details.keys() + ) + for plugin_name in missing_plugins: + logger.warning( + f"Plugin {plugin_name} failed to load, using existing capability data as fallback. Manually remove from capability_summary.json if you want to remove it from the report." + ) + summary.plugin_details[plugin_name] = existing_capabilities[plugin_name] + + summary_dict = dataclasses.asdict(summary) + summary_dict["generated_by"] = "metadata-ingestion/scripts/capability_summary.py" + summary_dict["generated_at"] = datetime.now(timezone.utc).isoformat() + summary_json = json.dumps(summary_dict, indent=2, sort_keys=True) + + summary_file = output_path / "capability_summary.json" + write_file = True + if summary_file.exists(): + try: + with open(summary_file, "r") as f: + existing_data = json.load(f) + + # Create copies without generated_at for comparison + existing_for_comparison = existing_data.copy() + new_for_comparison = summary_dict.copy() + existing_for_comparison.pop("generated_at", None) + new_for_comparison.pop("generated_at", None) + + if json.dumps( + existing_for_comparison, indent=2, sort_keys=True + ) == json.dumps(new_for_comparison, indent=2, sort_keys=True): + logger.info(f"No changes detected in {summary_file}, skipping write.") + write_file = False + except Exception as e: + logger.warning(f"Could not read existing summary file: {e}") + if write_file: + with open(summary_file, "w") as f: + f.write(summary_json) + logger.info(f"Capability summary saved to {summary_file}") + + +@click.command() +@click.option( + "--output-dir", + type=str, + default="./autogenerated", + help="Output directory for capability reports", +) +@click.option( + "--source", + type=str, + required=False, + help="Generate report for specific source only", +) +def generate_capability_report(output_dir: str, source: Optional[str] = None) -> None: + """Generate a comprehensive capability report for all ingestion sources.""" + + logger.info("Starting capability report generation...") + + if source: + if source not in source_registry.mapping: + logger.error(f"Source '{source}' not found in registry") + return + original_mapping = source_registry.mapping.copy() + source_registry.mapping = {source: original_mapping[source]} + + try: + summary = generate_capability_summary() + save_capability_report(summary, output_dir) + + print("Capability Report Generation Complete") + print("=====================================") + print(f"Total plugins processed: {len(summary.plugin_details)}") + print(f"Plugins with capabilities: {len(summary.plugin_details)}") + print(f"Output directory: {output_dir}") + + finally: + if source: + source_registry.mapping = original_mapping + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s %(levelname)-8s {%(name)s:%(lineno)d}] - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S %Z", + ) + generate_capability_report() diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py index 7cc49281e4..b1df404735 100644 --- a/metadata-ingestion/scripts/docgen.py +++ b/metadata-ingestion/scripts/docgen.py @@ -15,7 +15,7 @@ from docgen_types import Platform, Plugin from docs_config_table import gen_md_table_from_json_schema from datahub.configuration.common import ConfigModel -from datahub.ingestion.api.decorators import SourceCapability, SupportStatus +from datahub.ingestion.api.decorators import SourceCapability, SupportStatus, CapabilitySetting from datahub.ingestion.source.source_registry import source_registry logger = logging.getLogger(__name__) @@ -68,6 +68,20 @@ def get_capability_text(src_capability: SourceCapability) -> str: ) +def map_capability_name_to_enum(capability_name: str) -> SourceCapability: + """ + Maps capability names from the JSON file to SourceCapability enum values. + The JSON file uses enum names (e.g., "DATA_PROFILING") but the enum expects values (e.g., "Data Profiling"). + """ + try: + return SourceCapability[capability_name] + except KeyError: + try: + return SourceCapability(capability_name) + except ValueError: + raise ValueError(f"Unknown capability name: {capability_name}") + + def does_extra_exist(extra_name: str) -> bool: for key, value in metadata("acryl-datahub").items(): if key == "Provides-Extra" and value == extra_name: @@ -129,84 +143,102 @@ def rewrite_markdown(file_contents: str, path: str, relocated_path: str) -> str: return new_content -def load_plugin(plugin_name: str, out_dir: str) -> Plugin: - logger.debug(f"Loading {plugin_name}") - class_or_exception = source_registry._ensure_not_lazy(plugin_name) - if isinstance(class_or_exception, Exception): - raise class_or_exception - source_type = source_registry.get(plugin_name) - logger.debug(f"Source class is {source_type}") +def load_capability_data(capability_summary_path: str) -> Dict: + """Load capability data from the capability summary JSON file.""" + try: + with open(capability_summary_path, 'r') as f: + return json.load(f) + except FileNotFoundError: + logger.error(f"Capability summary file not found: {capability_summary_path}") + raise + except json.JSONDecodeError as e: + logger.error(f"Failed to parse capability summary JSON: {e}") + raise - if hasattr(source_type, "get_platform_name"): - platform_name = source_type.get_platform_name() - else: - platform_name = ( - plugin_name.title() - ) # we like platform names to be human readable - - platform_id = None - if hasattr(source_type, "get_platform_id"): - platform_id = source_type.get_platform_id() - if platform_id is None: - raise ValueError(f"Platform ID not found for {plugin_name}") +def create_plugin_from_capability_data(plugin_name: str, plugin_data: Dict, out_dir: str) -> Plugin: + """Create a Plugin object from capability data.""" plugin = Plugin( name=plugin_name, - platform_id=platform_id, - platform_name=platform_name, - classname=".".join([source_type.__module__, source_type.__name__]), + platform_id=plugin_data["platform_id"], + platform_name=plugin_data["platform_name"], + classname=plugin_data["classname"], ) - - if hasattr(source_type, "get_platform_doc_order"): - platform_doc_order = source_type.get_platform_doc_order() - plugin.doc_order = platform_doc_order - - plugin_file_name = "src/" + "/".join(source_type.__module__.split(".")) - if os.path.exists(plugin_file_name) and os.path.isdir(plugin_file_name): - plugin_file_name = plugin_file_name + "/__init__.py" - else: - plugin_file_name = plugin_file_name + ".py" - if os.path.exists(plugin_file_name): - plugin.filename = plugin_file_name - else: - logger.info( - f"Failed to locate filename for {plugin_name}. Guessed {plugin_file_name}, but that doesn't exist" - ) - - if hasattr(source_type, "__doc__"): - plugin.source_docstring = textwrap.dedent(source_type.__doc__ or "") - - if hasattr(source_type, "get_support_status"): - plugin.support_status = source_type.get_support_status() - - if hasattr(source_type, "get_capabilities"): - capabilities = list(source_type.get_capabilities()) - capabilities.sort(key=lambda x: x.capability.value) + + # Set support status + if plugin_data.get("support_status"): + plugin.support_status = SupportStatus[plugin_data["support_status"]] + + # Set capabilities + if plugin_data.get("capabilities"): + capabilities = [] + for cap_data in plugin_data["capabilities"]: + capability = map_capability_name_to_enum(cap_data["capability"]) + capabilities.append(CapabilitySetting( + capability=capability, + supported=cap_data["supported"], + description=cap_data["description"] + )) plugin.capabilities = capabilities - + + # Load additional plugin information that's not in capability summary try: - extra_plugin = plugin_name if does_extra_exist(plugin_name) else None - plugin.extra_deps = ( - get_additional_deps_for_extra(extra_plugin) if extra_plugin else [] - ) + # Load source class to get additional metadata + class_or_exception = source_registry._ensure_not_lazy(plugin_name) + if isinstance(class_or_exception, Exception): + raise class_or_exception + source_type = source_registry.get(plugin_name) + + # Get doc order + if hasattr(source_type, "get_platform_doc_order"): + platform_doc_order = source_type.get_platform_doc_order() + plugin.doc_order = platform_doc_order + + # Get filename + plugin_file_name = "src/" + "/".join(source_type.__module__.split(".")) + if os.path.exists(plugin_file_name) and os.path.isdir(plugin_file_name): + plugin_file_name = plugin_file_name + "/__init__.py" + else: + plugin_file_name = plugin_file_name + ".py" + if os.path.exists(plugin_file_name): + plugin.filename = plugin_file_name + else: + logger.info( + f"Failed to locate filename for {plugin_name}. Guessed {plugin_file_name}, but that doesn't exist" + ) + + # Get docstring + if hasattr(source_type, "__doc__"): + plugin.source_docstring = textwrap.dedent(source_type.__doc__ or "") + + # Get extra dependencies + try: + extra_plugin = plugin_name if does_extra_exist(plugin_name) else None + plugin.extra_deps = ( + get_additional_deps_for_extra(extra_plugin) if extra_plugin else [] + ) + except Exception as e: + logger.info( + f"Failed to load extras for {plugin_name} due to exception {e}", exc_info=e + ) + + # Get config class + if hasattr(source_type, "get_config_class"): + source_config_class: ConfigModel = source_type.get_config_class() + + plugin.config_json_schema = source_config_class.schema_json(indent=2) + plugin.config_md = gen_md_table_from_json_schema(source_config_class.schema(), current_source=plugin_name) + + # Write the config json schema to the out_dir. + config_dir = pathlib.Path(out_dir) / "config_schemas" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / f"{plugin_name}_config.json").write_text( + plugin.config_json_schema + ) + except Exception as e: - logger.info( - f"Failed to load extras for {plugin_name} due to exception {e}", exc_info=e - ) - - if hasattr(source_type, "get_config_class"): - source_config_class: ConfigModel = source_type.get_config_class() - - plugin.config_json_schema = source_config_class.schema_json(indent=2) - plugin.config_md = gen_md_table_from_json_schema(source_config_class.schema(), current_source=plugin_name) - - # Write the config json schema to the out_dir. - config_dir = pathlib.Path(out_dir) / "config_schemas" - config_dir.mkdir(parents=True, exist_ok=True) - (config_dir / f"{plugin_name}_config.json").write_text( - plugin.config_json_schema - ) - + logger.warning(f"Failed to load additional metadata for {plugin_name}: {e}") + return plugin @@ -227,15 +259,25 @@ class PlatformMetrics: @click.command() @click.option("--out-dir", type=str, required=True) +@click.option("--capability-summary", type=str, required=True, help="Path to capability summary JSON file") @click.option("--extra-docs", type=str, required=False) @click.option("--source", type=str, required=False) def generate( - out_dir: str, extra_docs: Optional[str] = None, source: Optional[str] = None + out_dir: str, capability_summary: str, extra_docs: Optional[str] = None, source: Optional[str] = None ) -> None: # noqa: C901 plugin_metrics = PluginMetrics() platform_metrics = PlatformMetrics() platforms: Dict[str, Platform] = {} + + # Load capability data + try: + capability_data = load_capability_data(capability_summary) + logger.info(f"Loaded capability data from {capability_summary}") + except Exception as e: + logger.error(f"Failed to load capability data: {e}") + sys.exit(1) + for plugin_name in sorted(source_registry.mapping.keys()): if source and source != plugin_name: continue @@ -250,7 +292,14 @@ def generate( plugin_metrics.discovered += 1 try: - plugin = load_plugin(plugin_name, out_dir=out_dir) + if plugin_name in capability_data.get("plugin_details", {}): + # Use capability data + plugin_data = capability_data["plugin_details"][plugin_name] + plugin = create_plugin_from_capability_data(plugin_name, plugin_data, out_dir=out_dir) + else: + logger.error(f"Plugin {plugin_name} not found in capability data") + plugin_metrics.failed += 1 + continue except Exception as e: logger.error( f"Failed to load {plugin_name} due to exception {e}", exc_info=e @@ -531,7 +580,7 @@ By default, the UI shows the latest version of the lineage. The time picker can In this example, data flows from Airflow/BigQuery to Snowflake tables, then to the Hive dataset, and ultimately to the features of Machine Learning Models. -:::tip The Lineage Tab is greyed out - why can’t I click on it? +:::tip The Lineage Tab is greyed out - why can't I click on it? This means you have not yet ingested lineage metadata for that entity. Please ingest lineage to proceed. ::: @@ -666,7 +715,7 @@ This is a summary of automatic lineage extraction support in our data source. Pl ### SQL Parser Lineage Extraction -If you’re using a different database system for which we don’t support column-level lineage out of the box, but you do have a database query log available, +If you're using a different database system for which we don't support column-level lineage out of the box, but you do have a database query log available, we have a SQL queries connector that generates column-level lineage and detailed table usage statistics from the query log. If these does not suit your needs, you can use the new `DataHubGraph.parse_sql_lineage()` method in our SDK. (See the source code [here](https://docs.datahub.com/docs/python-sdk/clients/graph-client)) diff --git a/metadata-ingestion/scripts/docgen.sh b/metadata-ingestion/scripts/docgen.sh index affb87f2e7..ab6e028b69 100755 --- a/metadata-ingestion/scripts/docgen.sh +++ b/metadata-ingestion/scripts/docgen.sh @@ -5,6 +5,7 @@ set -euo pipefail DATAHUB_ROOT=.. DOCS_OUT_DIR=$DATAHUB_ROOT/docs/generated/ingestion EXTRA_DOCS_DIR=$DATAHUB_ROOT/metadata-ingestion/docs/sources +CAPABILITY_SUMMARY_FILE=./src/datahub/ingestion/autogenerated/capability_summary.json rm -r $DOCS_OUT_DIR || true -python scripts/docgen.py --out-dir ${DOCS_OUT_DIR} --extra-docs ${EXTRA_DOCS_DIR} $@ +python scripts/docgen.py --out-dir ${DOCS_OUT_DIR} --capability-summary ${CAPABILITY_SUMMARY_FILE} --extra-docs ${EXTRA_DOCS_DIR} $@ diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py index a94cbbde15..dbdf4e4b7a 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py +++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py @@ -104,6 +104,7 @@ def capability( for base in cls.__bases__ ): cls.__capabilities = {} + cls.get_capabilities = lambda: cls.__capabilities.values() # If the superclasses have capability annotations, copy those over. diff --git a/metadata-ingestion/src/datahub/ingestion/autogenerated/__init__.py b/metadata-ingestion/src/datahub/ingestion/autogenerated/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json b/metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json new file mode 100644 index 0000000000..f0fadf6d9f --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/autogenerated/capability_summary.json @@ -0,0 +1,2850 @@ +{ + "generated_at": "2025-06-27T16:37:01.425637+00:00", + "generated_by": "metadata-ingestion/scripts/capability_summary.py", + "plugin_details": { + "abs": { + "capabilities": [ + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "TAGS", + "description": "Can extract ABS object/container tags if enabled", + "supported": true + } + ], + "classname": "datahub.ingestion.source.abs.source.ABSSource", + "platform_id": "abs", + "platform_name": "ABS Data Lake", + "support_status": "INCUBATING" + }, + "athena": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Supported for S3 tables", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration. Profiling uses sql queries on whole table which can be expensive operation.", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported for S3 tables", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.athena.AthenaSource", + "platform_id": "athena", + "platform_name": "Athena", + "support_status": "CERTIFIED" + }, + "azure-ad": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.identity.azure_ad.AzureADSource", + "platform_id": "azure-ad", + "platform_name": "Azure AD", + "support_status": "CERTIFIED" + }, + "bigquery": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default, can be disabled via configuration `include_usage_statistics`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PARTITION_SUPPORT", + "description": "Enabled by default, partition keys and clustering keys are supported.", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Platform instance is pre-set to the BigQuery project id", + "supported": false + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.bigquery_v2.bigquery.BigqueryV2Source", + "platform_id": "bigquery", + "platform_name": "BigQuery", + "support_status": "CERTIFIED" + }, + "cassandra": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.cassandra.cassandra.CassandraSource", + "platform_id": "cassandra", + "platform_name": "Cassandra", + "support_status": "INCUBATING" + }, + "clickhouse": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.clickhouse.ClickHouseSource", + "platform_id": "clickhouse", + "platform_name": "ClickHouse", + "support_status": "CERTIFIED" + }, + "clickhouse-usage": { + "capabilities": [ + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default to get usage stats", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.usage.clickhouse_usage.ClickHouseUsageSource", + "platform_id": "clickhouse", + "platform_name": "ClickHouse", + "support_status": "CERTIFIED" + }, + "cockroachdb": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.cockroachdb.CockroachDBSource", + "platform_id": "cockroachdb", + "platform_name": "CockroachDB", + "support_status": "TESTING" + }, + "csv-enricher": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Supported by default", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported by default", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Supported by default", + "supported": true + }, + { + "capability": "TAGS", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.csv_enricher.CSVEnricherSource", + "platform_id": "csv-enricher", + "platform_name": "CSV Enricher", + "support_status": "INCUBATING" + }, + "datahub": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.datahub.datahub_source.DataHubSource", + "platform_id": "datahub", + "platform_name": "DataHub", + "support_status": "TESTING" + }, + "datahub-apply": { + "capabilities": [], + "classname": "datahub.ingestion.source.apply.datahub_apply.DataHubApplySource", + "platform_id": "datahubapply", + "platform_name": "DataHubApply", + "support_status": "TESTING" + }, + "datahub-business-glossary": { + "capabilities": [], + "classname": "datahub.ingestion.source.metadata.business_glossary.BusinessGlossaryFileSource", + "platform_id": "business-glossary", + "platform_name": "Business Glossary", + "support_status": "CERTIFIED" + }, + "datahub-debug": { + "capabilities": [], + "classname": "datahub.ingestion.source.debug.datahub_debug.DataHubDebugSource", + "platform_id": "datahubdebug", + "platform_name": "DataHubDebug", + "support_status": "TESTING" + }, + "datahub-gc": { + "capabilities": [], + "classname": "datahub.ingestion.source.gc.datahub_gc.DataHubGcSource", + "platform_id": "datahubgc", + "platform_name": "DataHubGc", + "support_status": "TESTING" + }, + "datahub-lineage-file": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Specified in the lineage file.", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Specified in the lineage file.", + "supported": true + } + ], + "classname": "datahub.ingestion.source.metadata.lineage.LineageFileSource", + "platform_id": "file-based-lineage", + "platform_name": "File Based Lineage", + "support_status": "CERTIFIED" + }, + "datahub-mock-data": { + "capabilities": [], + "classname": "datahub.ingestion.source.mock_data.datahub_mock_data.DataHubMockDataSource", + "platform_id": "datahubmockdata", + "platform_name": "DataHubMockData", + "support_status": "TESTING" + }, + "dbt": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, configure using `include_column_lineage`", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.dbt.dbt_core.DBTCoreSource", + "platform_id": "dbt", + "platform_name": "dbt", + "support_status": "CERTIFIED" + }, + "dbt-cloud": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, configure using `include_column_lineage`", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.dbt.dbt_cloud.DBTCloudSource", + "platform_id": "dbt", + "platform_name": "dbt", + "support_status": "CERTIFIED" + }, + "delta-lake": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "TAGS", + "description": "Can extract S3 object/bucket tags if enabled", + "supported": true + } + ], + "classname": "datahub.ingestion.source.delta_lake.source.DeltaLakeSource", + "platform_id": "delta-lake", + "platform_name": "Delta Lake", + "support_status": "INCUBATING" + }, + "demo-data": { + "capabilities": [], + "classname": "datahub.ingestion.source.demo_data.DemoDataSource", + "platform_id": "demo-data", + "platform_name": "Demo Data", + "support_status": null + }, + "dremio": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default to get usage stats", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.dremio.dremio_source.DremioSource", + "platform_id": "dremio", + "platform_name": "Dremio", + "support_status": "CERTIFIED" + }, + "druid": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.druid.DruidSource", + "platform_id": "druid", + "platform_name": "Druid", + "support_status": "INCUBATING" + }, + "dynamodb": { + "capabilities": [ + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "By default, platform_instance will use the AWS account id", + "supported": true + } + ], + "classname": "datahub.ingestion.source.dynamodb.dynamodb.DynamoDBSource", + "platform_id": "dynamodb", + "platform_name": "DynamoDB", + "support_status": "TESTING" + }, + "elasticsearch": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.elastic_search.ElasticsearchSource", + "platform_id": "elasticsearch", + "platform_name": "Elasticsearch", + "support_status": "CERTIFIED" + }, + "feast": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.feast.FeastRepositorySource", + "platform_id": "feast", + "platform_name": "Feast", + "support_status": "CERTIFIED" + }, + "file": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.file.GenericFileSource", + "platform_id": "metadata-file", + "platform_name": "Metadata File", + "support_status": "CERTIFIED" + }, + "fivetran": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, can be disabled via configuration `include_column_lineage`", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.fivetran.fivetran.FivetranSource", + "platform_id": "fivetran", + "platform_name": "Fivetran", + "support_status": "INCUBATING" + }, + "gcs": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Not supported", + "supported": false + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.gcs.gcs_source.GCSSource", + "platform_id": "gcs", + "platform_name": "Google Cloud Storage", + "support_status": "INCUBATING" + }, + "glue": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Support via the `emit_s3_lineage` config field", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default when stateful ingestion is turned on.", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.aws.glue.GlueSource", + "platform_id": "glue", + "platform_name": "Glue", + "support_status": "CERTIFIED" + }, + "grafana": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.grafana.grafana_source.GrafanaSource", + "platform_id": "grafana", + "platform_name": "Grafana", + "support_status": "TESTING" + }, + "hana": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.hana.HanaSource", + "platform_id": "hana", + "platform_name": "SAP HANA", + "support_status": "TESTING" + }, + "hex": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Supported by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Supported by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.hex.hex.HexSource", + "platform_id": "hex", + "platform_name": "Hex", + "support_status": "TESTING" + }, + "hive": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.hive.HiveSource", + "platform_id": "hive", + "platform_name": "Hive", + "support_status": "CERTIFIED" + }, + "hive-metastore": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Not Supported", + "supported": false + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Not Supported", + "supported": false + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "View lineage is not supported", + "supported": false + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.hive_metastore.HiveMetastoreSource", + "platform_id": "hive-metastore", + "platform_name": "Hive Metastore", + "support_status": "CERTIFIED" + }, + "iceberg": { + "capabilities": [ + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration.", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default.", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Currently not supported.", + "supported": false + }, + { + "capability": "OWNERSHIP", + "description": "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`", + "supported": true + }, + { + "capability": "PARTITION_SUPPORT", + "description": "Currently not supported.", + "supported": false + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Optionally enabled via configuration, an Iceberg instance represents the catalog name where the table is stored.", + "supported": true + } + ], + "classname": "datahub.ingestion.source.iceberg.iceberg.IcebergSource", + "platform_id": "iceberg", + "platform_name": "Iceberg", + "support_status": "TESTING" + }, + "json-schema": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Extracts descriptions at top level and field level", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "With stateful ingestion enabled, will remove entities from DataHub if they are no longer present in the source", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Does not currently support extracting ownership", + "supported": false + }, + { + "capability": "TAGS", + "description": "Does not currently support extracting tags", + "supported": false + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Supports platform instance via config", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Extracts schemas, following references", + "supported": true + } + ], + "classname": "datahub.ingestion.source.schema.json_schema.JsonSchemaSource", + "platform_id": "json-schema", + "platform_name": "JSON Schemas", + "support_status": "INCUBATING" + }, + "kafka": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Not supported", + "supported": false + }, + { + "capability": "DATA_PROFILING", + "description": "Not supported", + "supported": false + }, + { + "capability": "DESCRIPTIONS", + "description": "Set dataset description to top level doc field for Avro schema", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "For multiple Kafka clusters, use the platform_instance configuration", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.", + "supported": false + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.kafka.kafka.KafkaSource", + "platform_id": "kafka", + "platform_name": "Kafka", + "support_status": "CERTIFIED" + }, + "kafka-connect": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.kafka_connect.kafka_connect.KafkaConnectSource", + "platform_id": "kafka-connect", + "platform_name": "Kafka Connect", + "support_status": "CERTIFIED" + }, + "ldap": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.ldap.LDAPSource", + "platform_id": "ldap", + "platform_name": "LDAP", + "support_status": "CERTIFIED" + }, + "looker": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, configured using `extract_column_level_lineage`", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default, configured using `extract_usage_history`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default, configured using `extract_owners`", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Use the `platform_instance` field", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.looker.looker_source.LookerDashboardSource", + "platform_id": "looker", + "platform_name": "Looker", + "support_status": "CERTIFIED" + }, + "lookml": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, configured using `extract_column_level_lineage`", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Use the `platform_instance` and `connection_to_platform_map` fields", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.looker.lookml_source.LookMLSource", + "platform_id": "looker", + "platform_name": "Looker", + "support_status": "CERTIFIED" + }, + "mariadb": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.mariadb.MariaDBSource", + "platform_id": "mariadb", + "platform_name": "MariaDB", + "support_status": "CERTIFIED" + }, + "metabase": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.metabase.MetabaseSource", + "platform_id": "metabase", + "platform_name": "Metabase", + "support_status": "CERTIFIED" + }, + "mlflow": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Extract descriptions for MLflow Registered Models and Model Versions", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "TAGS", + "description": "Extract tags for MLflow Registered Model Stages", + "supported": true + } + ], + "classname": "datahub.ingestion.source.mlflow.MLflowSource", + "platform_id": "mlflow", + "platform_name": "MLflow", + "support_status": "TESTING" + }, + "mode": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Supported by default", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.mode.ModeSource", + "platform_id": "mode", + "platform_name": "Mode", + "support_status": "CERTIFIED" + }, + "mongodb": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.mongodb.MongoDBSource", + "platform_id": "mongodb", + "platform_name": "MongoDB", + "support_status": "CERTIFIED" + }, + "mssql": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.mssql.source.SQLServerSource", + "platform_id": "mssql", + "platform_name": "Microsoft SQL Server", + "support_status": "CERTIFIED" + }, + "mysql": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.mysql.MySQLSource", + "platform_id": "mysql", + "platform_name": "MySQL", + "support_status": "CERTIFIED" + }, + "neo4j": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Supported via the `platform_instance` config", + "supported": true + } + ], + "classname": "datahub.ingestion.source.neo4j.neo4j_source.Neo4jSource", + "platform_id": "neo4j", + "platform_name": "Neo4j", + "support_status": "CERTIFIED" + }, + "nifi": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported. See docs for limitations", + "supported": true + } + ], + "classname": "datahub.ingestion.source.nifi.NifiSource", + "platform_id": "nifi", + "platform_name": "NiFi", + "support_status": "CERTIFIED" + }, + "okta": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.identity.okta.OktaSource", + "platform_id": "okta", + "platform_name": "Okta", + "support_status": "CERTIFIED" + }, + "openapi": { + "capabilities": [ + { + "capability": "PLATFORM_INSTANCE", + "description": "", + "supported": false + } + ], + "classname": "datahub.ingestion.source.openapi.OpenApiSource", + "platform_id": "openapi", + "platform_name": "OpenAPI", + "support_status": "INCUBATING" + }, + "oracle": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.oracle.OracleSource", + "platform_id": "oracle", + "platform_name": "Oracle", + "support_status": "INCUBATING" + }, + "postgres": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.postgres.PostgresSource", + "platform_id": "postgres", + "platform_name": "Postgres", + "support_status": "CERTIFIED" + }, + "powerbi": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Disabled by default, configured using `extract_column_level_lineage`. ", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration profiling.enabled", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "TAGS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default, configured using `extract_lineage`.", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.powerbi.powerbi.PowerBiDashboardSource", + "platform_id": "powerbi", + "platform_name": "PowerBI", + "support_status": "CERTIFIED" + }, + "powerbi-report-server": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.powerbi_report_server.report_server.PowerBiReportServerDashboardSource", + "platform_id": "powerbi-report-server", + "platform_name": "PowerBI Report Server", + "support_status": "INCUBATING" + }, + "preset": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by `domain` config to assign domain_key", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.preset.PresetSource", + "platform_id": "preset", + "platform_name": "Preset", + "support_status": "CERTIFIED" + }, + "presto": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.presto.PrestoSource", + "platform_id": "presto", + "platform_name": "Presto", + "support_status": "CERTIFIED" + }, + "presto-on-hive": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Not Supported", + "supported": false + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Not Supported", + "supported": false + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "View lineage is not supported", + "supported": false + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.hive_metastore.HiveMetastoreSource", + "platform_id": "hive-metastore", + "platform_name": "Hive Metastore", + "support_status": "CERTIFIED" + }, + "pulsar": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.pulsar.PulsarSource", + "platform_id": "pulsar", + "platform_name": "Pulsar", + "support_status": "INCUBATING" + }, + "qlik-sense": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Disabled by default. ", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default, configured using `ingest_owner`", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default.", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.qlik_sense.qlik_sense.QlikSenseSource", + "platform_id": "qlik-sense", + "platform_name": "Qlik Sense", + "support_status": "INCUBATING" + }, + "redash": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.redash.RedashSource", + "platform_id": "redash", + "platform_name": "Redash", + "support_status": "INCUBATING" + }, + "redshift": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Optionally enabled via configuration (`mixed` or `sql_based` lineage needs to be enabled)", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default, can be disabled via configuration `include_usage_statistics`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.redshift.redshift.RedshiftSource", + "platform_id": "redshift", + "platform_name": "Redshift", + "support_status": "CERTIFIED" + }, + "s3": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "TAGS", + "description": "Can extract S3 object/bucket tags if enabled", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Can infer schema from supported file types", + "supported": true + } + ], + "classname": "datahub.ingestion.source.s3.source.S3Source", + "platform_id": "s3", + "platform_name": "S3 / Local Files", + "support_status": "INCUBATING" + }, + "sac": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default (only for Import Data Models)", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default (only for Live Data Models)", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sac.sac.SACSource", + "platform_id": "sac", + "platform_name": "SAP Analytics Cloud", + "support_status": "TESTING" + }, + "sagemaker": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.aws.sagemaker.SagemakerSource", + "platform_id": "sagemaker", + "platform_name": "SageMaker", + "support_status": "CERTIFIED" + }, + "salesforce": { + "capabilities": [ + { + "capability": "DATA_PROFILING", + "description": "Only table level profiling is supported via `profiling.enabled` config field", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Not supported yet", + "supported": false + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "TAGS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Can be equivalent to Salesforce organization", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.salesforce.SalesforceSource", + "platform_id": "salesforce", + "platform_name": "Salesforce", + "support_status": "INCUBATING" + }, + "sigma": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Enabled by default, configured using `ingest_owner`", + "supported": true + }, + { + "capability": "TAGS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default.", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sigma.sigma.SigmaSource", + "platform_id": "sigma", + "platform_name": "Sigma", + "support_status": "INCUBATING" + }, + "slack": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + } + ], + "classname": "datahub.ingestion.source.slack.slack.SlackSource", + "platform_id": "slack", + "platform_name": "Slack", + "support_status": "TESTING" + }, + "snowflake": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, can be disabled via configuration `include_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration `profiling.enabled`", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default, can be disabled via configuration `include_usage_stats`", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "TAGS", + "description": "Optionally enabled via `extract_tags`", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default, can be disabled via configuration `include_table_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.snowflake.snowflake_v2.SnowflakeV2Source", + "platform_id": "snowflake", + "platform_name": "Snowflake", + "support_status": "CERTIFIED" + }, + "sql-queries": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Parsed from SQL queries", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Parsed from SQL queries", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql_queries.SqlQueriesSource", + "platform_id": "sql-queries", + "platform_name": "SQL Queries", + "support_status": "INCUBATING" + }, + "sqlalchemy": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.sql_generic.SQLAlchemyGenericSource", + "platform_id": "sqlalchemy", + "platform_name": "SQLAlchemy", + "support_status": "INCUBATING" + }, + "starburst-trino-usage": { + "capabilities": [ + { + "capability": "USAGE_STATS", + "description": "Enabled by default to get usage stats", + "supported": true + } + ], + "classname": "datahub.ingestion.source.usage.starburst_trino_usage.TrinoUsageSource", + "platform_id": "trino", + "platform_name": "Trino", + "support_status": "CERTIFIED" + }, + "superset": { + "capabilities": [ + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by `domain` config to assign domain_key", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Supported by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.superset.SupersetSource", + "platform_id": "superset", + "platform_name": "Superset", + "support_status": "CERTIFIED" + }, + "tableau": { + "capabilities": [ + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default, configure using `extract_column_level_lineage`", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Dashboard/Chart view counts, enabled using extract_usage_stats config", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default when stateful ingestion is turned on.", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Requires transformer", + "supported": false + }, + { + "capability": "OWNERSHIP", + "description": "Requires recipe configuration", + "supported": true + }, + { + "capability": "TAGS", + "description": "Requires recipe configuration", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.tableau.tableau.TableauSource", + "platform_id": "tableau", + "platform_name": "Tableau", + "support_status": "CERTIFIED" + }, + "teradata": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.teradata.TeradataSource", + "platform_id": "teradata", + "platform_name": "Teradata", + "support_status": "TESTING" + }, + "trino": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default to get lineage for views via `include_view_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.trino.TrinoSource", + "platform_id": "trino", + "platform_name": "Trino", + "support_status": "CERTIFIED" + }, + "unity-catalog": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Supported via the `profiling.enabled` config", + "supported": true + }, + { + "capability": "USAGE_STATS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "OWNERSHIP", + "description": "Supported via the `include_ownership` config", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.unity.source.UnityCatalogSource", + "platform_id": "databricks", + "platform_name": "Databricks", + "support_status": "INCUBATING" + }, + "vertexai": { + "capabilities": [ + { + "capability": "DESCRIPTIONS", + "description": "Extract descriptions for Vertex AI Registered Models and Model Versions", + "supported": true + } + ], + "classname": "datahub.ingestion.source.vertexai.vertexai.VertexAISource", + "platform_id": "vertexai", + "platform_name": "Vertex AI", + "support_status": "TESTING" + }, + "vertica": { + "capabilities": [ + { + "capability": "CONTAINERS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "CLASSIFICATION", + "description": "Optionally enabled via `classification.enabled`", + "supported": true + }, + { + "capability": "LINEAGE_FINE", + "description": "Enabled by default to get lineage for views via `include_view_column_lineage`", + "supported": true + }, + { + "capability": "DATA_PROFILING", + "description": "Optionally enabled via configuration", + "supported": true + }, + { + "capability": "DESCRIPTIONS", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "DELETION_DETECTION", + "description": "Enabled by default via stateful ingestion", + "supported": true + }, + { + "capability": "DOMAINS", + "description": "Supported via the `domain` config field", + "supported": true + }, + { + "capability": "PLATFORM_INSTANCE", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "SCHEMA_METADATA", + "description": "Enabled by default", + "supported": true + }, + { + "capability": "LINEAGE_COARSE", + "description": "Enabled by default, can be disabled via configuration `include_view_lineage` and `include_projection_lineage`", + "supported": true + }, + { + "capability": "TEST_CONNECTION", + "description": "Enabled by default", + "supported": true + } + ], + "classname": "datahub.ingestion.source.sql.vertica.VerticaSource", + "platform_id": "vertica", + "platform_name": "Vertica", + "support_status": "CERTIFIED" + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py index bee053dbe1..0df2fa3710 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py +++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py @@ -80,7 +80,7 @@ class KeyspaceKey(ContainerKey): @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability( SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", + "Enabled by default via stateful ingestion", supported=True, ) class CassandraSource(StatefulIngestionSourceBase): diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py index 6102e6d61a..76322141a7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py @@ -167,7 +167,7 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport): @config_class(AzureADConfig) @support_status(SupportStatus.CERTIFIED) @capability( - SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" + SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion" ) class AzureADSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index d0c84d90d6..f4b0c784a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -202,7 +202,7 @@ class OktaSourceReport(StaleEntityRemovalSourceReport): @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration") @capability( - SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" + SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion" ) class OktaSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/preset.py b/metadata-ingestion/src/datahub/ingestion/source/preset.py index 145238bb8b..57baa35b13 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/preset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/preset.py @@ -71,7 +71,7 @@ class PresetConfig(SupersetConfig): @config_class(PresetConfig) @support_status(SupportStatus.CERTIFIED) @capability( - SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" + SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion" ) class PresetSource(SupersetSource): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index d99c9a037b..8d30010e27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -118,7 +118,7 @@ logger: logging.Logger = logging.getLogger(__name__) ) @capability( SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", + "Enabled by default via stateful ingestion", supported=True, ) @capability( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index 92487d48b9..39e5196478 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -116,7 +116,7 @@ class VerticaConfig(BasicSQLAlchemyConfig): ) @capability( SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", + "Enabled by default via stateful ingestion", supported=True, ) class VerticaSource(SQLAlchemySource): diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index 4e9e1425a9..994854836a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -179,7 +179,7 @@ class StatefulIngestionReport(SourceReport): @capability( SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", + "Enabled by default via stateful ingestion", supported=True, ) class StatefulIngestionSourceBase(Source): diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 94d296d0c4..c3d2e6388f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -272,7 +272,7 @@ def get_filter_name(filter_obj): @config_class(SupersetConfig) @support_status(SupportStatus.CERTIFIED) @capability( - SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" + SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion" ) @capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 4fd880e07a..0be66eed4d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -159,7 +159,7 @@ logger: logging.Logger = logging.getLogger(__name__) ) @capability( SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", + "Enabled by default via stateful ingestion", supported=True, ) @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")