mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(ingest): add preset source (#10954)
Co-authored-by: MARK CHENG <hcheng@wealthsimple.com> Co-authored-by: hwmarkcheng <94201005+hwmarkcheng@users.noreply.github.com>
This commit is contained in:
parent
0414443b77
commit
f147b51fc8
@ -722,6 +722,7 @@ entry_points = {
|
||||
"snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource",
|
||||
"snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource",
|
||||
"superset = datahub.ingestion.source.superset:SupersetSource",
|
||||
"preset = datahub.ingestion.source.preset:PresetSource",
|
||||
"tableau = datahub.ingestion.source.tableau.tableau:TableauSource",
|
||||
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
|
||||
"metabase = datahub.ingestion.source.metabase:MetabaseSource",
|
||||
|
114
metadata-ingestion/src/datahub/ingestion/source/preset.py
Normal file
114
metadata-ingestion/src/datahub/ingestion/source/preset.py
Normal file
@ -0,0 +1,114 @@
|
||||
import logging
|
||||
from typing import Dict, Optional
|
||||
|
||||
import requests
|
||||
from pydantic.class_validators import root_validator, validator
|
||||
from pydantic.fields import Field
|
||||
|
||||
from datahub.emitter.mce_builder import DEFAULT_ENV
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.api.decorators import (
|
||||
SourceCapability,
|
||||
SupportStatus,
|
||||
capability,
|
||||
config_class,
|
||||
platform_name,
|
||||
support_status,
|
||||
)
|
||||
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
||||
StaleEntityRemovalSourceReport,
|
||||
StatefulStaleMetadataRemovalConfig,
|
||||
)
|
||||
from datahub.ingestion.source.superset import SupersetConfig, SupersetSource
|
||||
from datahub.utilities import config_clean
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PresetConfig(SupersetConfig):
|
||||
manager_uri: str = Field(
|
||||
default="https://api.app.preset.io", description="Preset.io API URL"
|
||||
)
|
||||
connect_uri: str = Field(default="", description="Preset workspace URL.")
|
||||
display_uri: Optional[str] = Field(
|
||||
default=None,
|
||||
description="optional URL to use in links (if `connect_uri` is only for ingestion)",
|
||||
)
|
||||
api_key: Optional[str] = Field(default=None, description="Preset.io API key.")
|
||||
api_secret: Optional[str] = Field(default=None, description="Preset.io API secret.")
|
||||
|
||||
# Configuration for stateful ingestion
|
||||
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
||||
default=None, description="Preset Stateful Ingestion Config."
|
||||
)
|
||||
|
||||
options: Dict = Field(default={}, description="")
|
||||
env: str = Field(
|
||||
default=DEFAULT_ENV,
|
||||
description="Environment to use in namespace when constructing URNs",
|
||||
)
|
||||
database_alias: Dict[str, str] = Field(
|
||||
default={},
|
||||
description="Can be used to change mapping for database names in superset to what you have in datahub",
|
||||
)
|
||||
|
||||
@validator("connect_uri", "display_uri")
|
||||
def remove_trailing_slash(cls, v):
|
||||
return config_clean.remove_trailing_slashes(v)
|
||||
|
||||
@root_validator
|
||||
def default_display_uri_to_connect_uri(cls, values):
|
||||
base = values.get("display_uri")
|
||||
if base is None:
|
||||
values["display_uri"] = values.get("connect_uri")
|
||||
return values
|
||||
|
||||
|
||||
@platform_name("Preset")
|
||||
@config_class(PresetConfig)
|
||||
@support_status(SupportStatus.TESTING)
|
||||
@capability(
|
||||
SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
|
||||
)
|
||||
class PresetSource(SupersetSource):
|
||||
"""
|
||||
Variation of the Superset plugin that works with Preset.io (Apache Superset SaaS).
|
||||
"""
|
||||
|
||||
config: PresetConfig
|
||||
report: StaleEntityRemovalSourceReport
|
||||
platform = "preset"
|
||||
|
||||
def __init__(self, ctx: PipelineContext, config: PresetConfig):
|
||||
logger.info(f"ctx is {ctx}")
|
||||
|
||||
super().__init__(ctx, config)
|
||||
self.config = config
|
||||
self.report = StaleEntityRemovalSourceReport()
|
||||
|
||||
def login(self):
|
||||
try:
|
||||
login_response = requests.post(
|
||||
f"{self.config.manager_uri}/v1/auth/",
|
||||
json={"name": self.config.api_key, "secret": self.config.api_secret},
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Failed to authenticate with Preset: {e}")
|
||||
raise e
|
||||
|
||||
self.access_token = login_response.json()["payload"]["access_token"]
|
||||
logger.debug("Got access token from Preset")
|
||||
|
||||
requests_session = requests.Session()
|
||||
requests_session.headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {self.access_token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "*/*",
|
||||
}
|
||||
)
|
||||
# Test the connection
|
||||
test_response = requests_session.get(f"{self.config.connect_uri}/version")
|
||||
if not test_response.ok:
|
||||
logger.error("Unable to connect to workspace")
|
||||
return requests_session
|
@ -101,7 +101,11 @@ class SupersetConfig(
|
||||
)
|
||||
username: Optional[str] = Field(default=None, description="Superset username.")
|
||||
password: Optional[str] = Field(default=None, description="Superset password.")
|
||||
|
||||
api_key: Optional[str] = Field(default=None, description="Preset.io API key.")
|
||||
api_secret: Optional[str] = Field(default=None, description="Preset.io API secret.")
|
||||
manager_uri: str = Field(
|
||||
default="https://api.app.preset.io", description="Preset.io API URL"
|
||||
)
|
||||
# Configuration for stateful ingestion
|
||||
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
||||
default=None, description="Superset Stateful Ingestion Config."
|
||||
@ -179,7 +183,14 @@ class SupersetSource(StatefulIngestionSourceBase):
|
||||
super().__init__(config, ctx)
|
||||
self.config = config
|
||||
self.report = StaleEntityRemovalSourceReport()
|
||||
if self.config.domain:
|
||||
self.domain_registry = DomainRegistry(
|
||||
cached_domains=[domain_id for domain_id in self.config.domain],
|
||||
graph=self.ctx.graph,
|
||||
)
|
||||
self.session = self.login()
|
||||
|
||||
def login(self) -> requests.Session:
|
||||
login_response = requests.post(
|
||||
f"{self.config.connect_uri}/api/v1/security/login",
|
||||
json={
|
||||
@ -193,8 +204,8 @@ class SupersetSource(StatefulIngestionSourceBase):
|
||||
self.access_token = login_response.json()["access_token"]
|
||||
logger.debug("Got access token from superset")
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
requests_session = requests.Session()
|
||||
requests_session.headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {self.access_token}",
|
||||
"Content-Type": "application/json",
|
||||
@ -202,17 +213,14 @@ class SupersetSource(StatefulIngestionSourceBase):
|
||||
}
|
||||
)
|
||||
|
||||
if self.config.domain:
|
||||
self.domain_registry = DomainRegistry(
|
||||
cached_domains=[domain_id for domain_id in self.config.domain],
|
||||
graph=self.ctx.graph,
|
||||
)
|
||||
|
||||
# Test the connection
|
||||
test_response = self.session.get(f"{self.config.connect_uri}/api/v1/dashboard/")
|
||||
test_response = requests_session.get(
|
||||
f"{self.config.connect_uri}/api/v1/dashboard/"
|
||||
)
|
||||
if test_response.status_code == 200:
|
||||
pass
|
||||
# TODO(Gabe): how should we message about this error?
|
||||
return requests_session
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
||||
|
@ -0,0 +1,286 @@
|
||||
[
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
|
||||
"urn": "urn:li:dashboard:(preset,1)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
|
||||
"customProperties": {
|
||||
"Status": "published",
|
||||
"IsPublished": "true",
|
||||
"Owners": "test_username_1, test_username_2",
|
||||
"IsCertified": "true",
|
||||
"CertifiedBy": "Certification team",
|
||||
"CertificationDetails": "Approved"
|
||||
},
|
||||
"title": "test_dashboard_title_1",
|
||||
"description": "",
|
||||
"charts": [
|
||||
"urn:li:chart:(preset,10)",
|
||||
"urn:li:chart:(preset,11)"
|
||||
],
|
||||
"datasets": [],
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"dashboardUrl": "mock://mock-domain.preset.io/dashboard/test_dashboard_url_1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
|
||||
"urn": "urn:li:dashboard:(preset,2)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
|
||||
"customProperties": {
|
||||
"Status": "draft",
|
||||
"IsPublished": "false",
|
||||
"Owners": "unknown",
|
||||
"IsCertified": "false"
|
||||
},
|
||||
"title": "test_dashboard_title_2",
|
||||
"description": "",
|
||||
"charts": [
|
||||
"urn:li:chart:(preset,12)",
|
||||
"urn:li:chart:(preset,13)"
|
||||
],
|
||||
"datasets": [],
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_2"
|
||||
}
|
||||
},
|
||||
"dashboardUrl": "mock://mock-domain.preset.io/dashboard/test_dashboard_url_2"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,10)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_1",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_10",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "BAR"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,11)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_2",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_11",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "PIE"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,12)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_3",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_2"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_12",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "AREA"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,13)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_4",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_2"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_13",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "HISTOGRAM"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,261 @@
|
||||
[
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
|
||||
"urn": "urn:li:dashboard:(preset,1)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
|
||||
"customProperties": {
|
||||
"Status": "published",
|
||||
"IsPublished": "true",
|
||||
"Owners": "test_username_1, test_username_2",
|
||||
"IsCertified": "true",
|
||||
"CertifiedBy": "Certification team",
|
||||
"CertificationDetails": "Approved"
|
||||
},
|
||||
"title": "test_dashboard_title_1",
|
||||
"description": "",
|
||||
"charts": [
|
||||
"urn:li:chart:(preset,10)",
|
||||
"urn:li:chart:(preset,11)"
|
||||
],
|
||||
"datasets": [],
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"dashboardUrl": "mock://mock-domain.preset.io/dashboard/test_dashboard_url_1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,10)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_1",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_10",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "BAR"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,11)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_2",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_1"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_11",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "PIE"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,12)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_3",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_2"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_12",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "AREA"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
|
||||
"urn": "urn:li:chart:(preset,13)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.chart.ChartInfo": {
|
||||
"customProperties": {
|
||||
"Metrics": "",
|
||||
"Filters": "",
|
||||
"Dimensions": ""
|
||||
},
|
||||
"title": "test_chart_title_4",
|
||||
"description": "",
|
||||
"lastModified": {
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 1720594800000,
|
||||
"actor": "urn:li:corpuser:test_username_2"
|
||||
}
|
||||
},
|
||||
"chartUrl": "mock://mock-domain.preset.io/explore/test_chart_url_13",
|
||||
"inputs": [
|
||||
{
|
||||
"string": "urn:li:dataset:(urn:li:dataPlatform:external,test_database_name.test_schema_name.test_table_name,PROD)"
|
||||
}
|
||||
],
|
||||
"type": "HISTOGRAM"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dashboard",
|
||||
"entityUrn": "urn:li:dashboard:(preset,2)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "status",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"removed": true
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1720594800000,
|
||||
"runId": "preset-2024_07_10-07_00_00",
|
||||
"lastRunId": "no-run-id-provided",
|
||||
"pipelineName": "test_pipeline"
|
||||
}
|
||||
}
|
||||
]
|
366
metadata-ingestion/tests/integration/preset/test_preset.py
Normal file
366
metadata-ingestion/tests/integration/preset/test_preset.py
Normal file
@ -0,0 +1,366 @@
|
||||
from typing import Any, Dict, Optional
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from freezegun import freeze_time
|
||||
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
from tests.test_helpers import mce_helpers
|
||||
from tests.test_helpers.state_helpers import (
|
||||
get_current_checkpoint_from_pipeline,
|
||||
run_and_get_pipeline,
|
||||
validate_all_providers_have_committed_successfully,
|
||||
)
|
||||
|
||||
FROZEN_TIME = "2024-07-10 07:00:00"
|
||||
GMS_PORT = 8080
|
||||
GMS_SERVER = f"http://localhost:{GMS_PORT}"
|
||||
|
||||
|
||||
def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None:
|
||||
if override_data is None:
|
||||
override_data = {}
|
||||
|
||||
api_vs_response = {
|
||||
"mock://mock-domain.preset.io/v1/auth/": {
|
||||
"method": "POST",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"payload": {
|
||||
"access_token": "test_token",
|
||||
}
|
||||
},
|
||||
},
|
||||
"mock://mock-domain.preset.io/version": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"ci": {
|
||||
"built_at": "Tue Jul 10 00:00:00 UTC 2024",
|
||||
"build_num": "1",
|
||||
"triggered_by": "Not triggered by a user",
|
||||
},
|
||||
"git": {
|
||||
"branch": "4.0.1.6",
|
||||
"sha": "test_sha",
|
||||
"sha_superset": "test_sha_superset",
|
||||
"release_name": "test_release_name",
|
||||
},
|
||||
"chart_version": "1.16.1",
|
||||
"start_time": "2024-07-10 00:00:00",
|
||||
"mt_deployment": True,
|
||||
},
|
||||
},
|
||||
"mock://mock-domain.preset.io/api/v1/dashboard/": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"count": 2,
|
||||
"result": [
|
||||
{
|
||||
"id": "1",
|
||||
"changed_by": {
|
||||
"username": "test_username_1",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"dashboard_title": "test_dashboard_title_1",
|
||||
"url": "/dashboard/test_dashboard_url_1",
|
||||
"position_json": '{"CHART-test-1": {"meta": { "chartId": "10" }}, "CHART-test-2": {"meta": { "chartId": "11" }}}',
|
||||
"status": "published",
|
||||
"published": True,
|
||||
"owners": [
|
||||
{
|
||||
"username": "test_username_1",
|
||||
},
|
||||
{
|
||||
"username": "test_username_2",
|
||||
},
|
||||
],
|
||||
"certified_by": "Certification team",
|
||||
"certification_details": "Approved",
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"changed_by": {
|
||||
"username": "test_username_2",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"dashboard_title": "test_dashboard_title_2",
|
||||
"url": "/dashboard/test_dashboard_url_2",
|
||||
"position_json": '{"CHART-test-3": {"meta": { "chartId": "12" }}, "CHART-test-4": {"meta": { "chartId": "13" }}}',
|
||||
"status": "draft",
|
||||
"published": False,
|
||||
"owners": [
|
||||
{
|
||||
"first_name": "name",
|
||||
},
|
||||
],
|
||||
"certified_by": "",
|
||||
"certification_details": "",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
"mock://mock-domain.preset.io/api/v1/chart/": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"count": 4,
|
||||
"result": [
|
||||
{
|
||||
"id": "10",
|
||||
"changed_by": {
|
||||
"username": "test_username_1",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"slice_name": "test_chart_title_1",
|
||||
"viz_type": "box_plot",
|
||||
"url": "/explore/test_chart_url_10",
|
||||
"datasource_id": "20",
|
||||
"params": '{"metrics": [], "adhoc_filters": []}',
|
||||
},
|
||||
{
|
||||
"id": "11",
|
||||
"changed_by": {
|
||||
"username": "test_username_1",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"slice_name": "test_chart_title_2",
|
||||
"viz_type": "pie",
|
||||
"url": "/explore/test_chart_url_11",
|
||||
"datasource_id": "20",
|
||||
"params": '{"metrics": [], "adhoc_filters": []}',
|
||||
},
|
||||
{
|
||||
"id": "12",
|
||||
"changed_by": {
|
||||
"username": "test_username_2",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"slice_name": "test_chart_title_3",
|
||||
"viz_type": "treemap",
|
||||
"url": "/explore/test_chart_url_12",
|
||||
"datasource_id": "20",
|
||||
"params": '{"metrics": [], "adhoc_filters": []}',
|
||||
},
|
||||
{
|
||||
"id": "13",
|
||||
"changed_by": {
|
||||
"username": "test_username_2",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"slice_name": "test_chart_title_4",
|
||||
"viz_type": "histogram",
|
||||
"url": "/explore/test_chart_url_13",
|
||||
"datasource_id": "20",
|
||||
"params": '{"metrics": [], "adhoc_filters": []}',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
"mock://mock-domain.preset.io/api/v1/dataset/20": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"result": {
|
||||
"schema": "test_schema_name",
|
||||
"table_name": "test_table_name",
|
||||
"database": {
|
||||
"id": "30",
|
||||
"database_name": "test_database_name",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"mock://mock-domain.preset.io/api/v1/database/30": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"result": {
|
||||
"sqlalchemy_uri": "test_sqlalchemy_uri",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
api_vs_response.update(override_data)
|
||||
|
||||
for url in api_vs_response:
|
||||
request_mock.register_uri(
|
||||
api_vs_response[url]["method"],
|
||||
url,
|
||||
json=api_vs_response[url]["json"],
|
||||
status_code=api_vs_response[url]["status_code"],
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@pytest.mark.integration
|
||||
def test_preset_ingest(pytestconfig, tmp_path, mock_time, requests_mock):
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/preset"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
|
||||
pipeline = Pipeline.create(
|
||||
{
|
||||
"run_id": "preset-test",
|
||||
"source": {
|
||||
"type": "preset",
|
||||
"config": {
|
||||
"connect_uri": "mock://mock-domain.preset.io/",
|
||||
"manager_uri": "mock://mock-domain.preset.io",
|
||||
"api_key": "test_key",
|
||||
"api_secret": "test_secret",
|
||||
"provider": "db",
|
||||
},
|
||||
},
|
||||
"sink": {
|
||||
"type": "file",
|
||||
"config": {
|
||||
"filename": f"{tmp_path}/preset_mces.json",
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
pipeline.run()
|
||||
pipeline.raise_from_status()
|
||||
golden_file = "golden_test_ingest.json"
|
||||
|
||||
mce_helpers.check_golden_file(
|
||||
pytestconfig,
|
||||
output_path=tmp_path / "preset_mces.json",
|
||||
golden_path=f"{test_resources_dir}/{golden_file}",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@pytest.mark.integration
|
||||
def test_preset_stateful_ingest(
|
||||
pytestconfig, tmp_path, mock_time, requests_mock, mock_datahub_graph
|
||||
):
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/preset"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
|
||||
pipeline_config_dict: Dict[str, Any] = {
|
||||
"source": {
|
||||
"type": "preset",
|
||||
"config": {
|
||||
"connect_uri": "mock://mock-domain.preset.io/",
|
||||
"manager_uri": "mock://mock-domain.preset.io",
|
||||
"api_key": "test_key",
|
||||
"api_secret": "test_secret",
|
||||
"provider": "db",
|
||||
# enable stateful ingestion
|
||||
"stateful_ingestion": {
|
||||
"enabled": True,
|
||||
"remove_stale_metadata": True,
|
||||
"fail_safe_threshold": 100.0,
|
||||
"state_provider": {
|
||||
"type": "datahub",
|
||||
"config": {"datahub_api": {"server": GMS_SERVER}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"sink": {
|
||||
# we are not really interested in the resulting events for this test
|
||||
"type": "console"
|
||||
},
|
||||
"pipeline_name": "test_pipeline",
|
||||
}
|
||||
|
||||
dashboard_endpoint_override = {
|
||||
"mock://mock-domain.preset.io/api/v1/dashboard/": {
|
||||
"method": "GET",
|
||||
"status_code": 200,
|
||||
"json": {
|
||||
"count": 1,
|
||||
"result": [
|
||||
{
|
||||
"id": "1",
|
||||
"changed_by": {
|
||||
"username": "test_username_1",
|
||||
},
|
||||
"changed_on_utc": "2024-07-10T07:00:00.000000+0000",
|
||||
"dashboard_title": "test_dashboard_title_1",
|
||||
"url": "/dashboard/test_dashboard_url_1",
|
||||
"position_json": '{"CHART-test-1": {"meta": { "chartId": "10" }}, "CHART-test-2": {"meta": { "chartId": "11" }}}',
|
||||
"status": "published",
|
||||
"published": True,
|
||||
"owners": [
|
||||
{
|
||||
"username": "test_username_1",
|
||||
},
|
||||
{
|
||||
"username": "test_username_2",
|
||||
},
|
||||
],
|
||||
"certified_by": "Certification team",
|
||||
"certification_details": "Approved",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
|
||||
mock_datahub_graph,
|
||||
) as mock_checkpoint:
|
||||
# Both checkpoint and reporting will use the same mocked graph instance.
|
||||
mock_checkpoint.return_value = mock_datahub_graph
|
||||
|
||||
# Do the first run of the pipeline and get the default job's checkpoint.
|
||||
pipeline_run1 = run_and_get_pipeline(pipeline_config_dict)
|
||||
checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1)
|
||||
|
||||
assert checkpoint1
|
||||
assert checkpoint1.state
|
||||
|
||||
# Remove one dashboard from the preset config.
|
||||
register_mock_api(
|
||||
request_mock=requests_mock, override_data=dashboard_endpoint_override
|
||||
)
|
||||
|
||||
# Capture MCEs of second run to validate Status(removed=true)
|
||||
deleted_mces_path = f"{tmp_path}/preset_deleted_mces.json"
|
||||
pipeline_config_dict["sink"]["type"] = "file"
|
||||
pipeline_config_dict["sink"]["config"] = {"filename": deleted_mces_path}
|
||||
|
||||
# Do the second run of the pipeline.
|
||||
pipeline_run2 = run_and_get_pipeline(pipeline_config_dict)
|
||||
checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2)
|
||||
|
||||
assert checkpoint2
|
||||
assert checkpoint2.state
|
||||
|
||||
# Perform all assertions on the states. The deleted dashboard should not be
|
||||
# part of the second state
|
||||
state1 = checkpoint1.state
|
||||
state2 = checkpoint2.state
|
||||
difference_urns = list(
|
||||
state1.get_urns_not_in(type="dashboard", other_checkpoint_state=state2)
|
||||
)
|
||||
|
||||
assert len(difference_urns) == 1
|
||||
|
||||
urn1 = "urn:li:dashboard:(preset,2)"
|
||||
|
||||
assert urn1 in difference_urns
|
||||
|
||||
# Validate that all providers have committed successfully.
|
||||
validate_all_providers_have_committed_successfully(
|
||||
pipeline=pipeline_run1, expected_providers=1
|
||||
)
|
||||
validate_all_providers_have_committed_successfully(
|
||||
pipeline=pipeline_run2, expected_providers=1
|
||||
)
|
||||
|
||||
# Verify the output.
|
||||
mce_helpers.check_golden_file(
|
||||
pytestconfig,
|
||||
output_path=deleted_mces_path,
|
||||
golden_path=test_resources_dir / "golden_test_stateful_ingest.json",
|
||||
)
|
22
metadata-ingestion/tests/unit/test_preset_source.py
Normal file
22
metadata-ingestion/tests/unit/test_preset_source.py
Normal file
@ -0,0 +1,22 @@
|
||||
from datahub.ingestion.source.preset import PresetConfig
|
||||
|
||||
|
||||
def test_default_values():
|
||||
config = PresetConfig.parse_obj({})
|
||||
|
||||
assert config.connect_uri == ""
|
||||
assert config.manager_uri == "https://api.app.preset.io"
|
||||
assert config.display_uri == ""
|
||||
assert config.env == "PROD"
|
||||
assert config.api_key is None
|
||||
assert config.api_secret is None
|
||||
|
||||
|
||||
def test_set_display_uri():
|
||||
display_uri = "some_host:1234"
|
||||
|
||||
config = PresetConfig.parse_obj({"display_uri": display_uri})
|
||||
|
||||
assert config.connect_uri == ""
|
||||
assert config.manager_uri == "https://api.app.preset.io"
|
||||
assert config.display_uri == display_uri
|
Loading…
x
Reference in New Issue
Block a user