mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-17 03:43:56 +00:00
119 lines
3.9 KiB
Python
119 lines
3.9 KiB
Python
import logging
|
|
from typing import Dict, Optional
|
|
|
|
import requests
|
|
from pydantic.class_validators import root_validator, validator
|
|
from pydantic.fields import Field
|
|
|
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
from datahub.ingestion.api.common import PipelineContext
|
|
from datahub.ingestion.api.decorators import (
|
|
SourceCapability,
|
|
SupportStatus,
|
|
capability,
|
|
config_class,
|
|
platform_name,
|
|
support_status,
|
|
)
|
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
StatefulStaleMetadataRemovalConfig,
|
|
)
|
|
from datahub.ingestion.source.superset import (
|
|
SupersetConfig,
|
|
SupersetSource,
|
|
SupersetSourceReport,
|
|
)
|
|
from datahub.utilities import config_clean
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PresetConfig(SupersetConfig):
|
|
manager_uri: str = Field(
|
|
default="https://api.app.preset.io", description="Preset.io API URL"
|
|
)
|
|
connect_uri: str = Field(default="", description="Preset workspace URL.")
|
|
display_uri: Optional[str] = Field(
|
|
default=None,
|
|
description="optional URL to use in links (if `connect_uri` is only for ingestion)",
|
|
)
|
|
api_key: Optional[str] = Field(default=None, description="Preset.io API key.")
|
|
api_secret: Optional[str] = Field(default=None, description="Preset.io API secret.")
|
|
|
|
# Configuration for stateful ingestion
|
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
|
default=None, description="Preset Stateful Ingestion Config."
|
|
)
|
|
|
|
options: Dict = Field(default={}, description="")
|
|
env: str = Field(
|
|
default=DEFAULT_ENV,
|
|
description="Environment to use in namespace when constructing URNs",
|
|
)
|
|
database_alias: Dict[str, str] = Field(
|
|
default={},
|
|
description="Can be used to change mapping for database names in superset to what you have in datahub",
|
|
)
|
|
|
|
@validator("connect_uri", "display_uri")
|
|
def remove_trailing_slash(cls, v):
|
|
return config_clean.remove_trailing_slashes(v)
|
|
|
|
@root_validator(skip_on_failure=True)
|
|
def default_display_uri_to_connect_uri(cls, values):
|
|
base = values.get("display_uri")
|
|
if base is None:
|
|
values["display_uri"] = values.get("connect_uri")
|
|
return values
|
|
|
|
|
|
@platform_name("Preset")
|
|
@config_class(PresetConfig)
|
|
@support_status(SupportStatus.CERTIFIED)
|
|
@capability(
|
|
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
)
|
|
class PresetSource(SupersetSource):
|
|
"""
|
|
Variation of the Superset plugin that works with Preset.io (Apache Superset SaaS).
|
|
"""
|
|
|
|
config: PresetConfig
|
|
report: SupersetSourceReport
|
|
platform = "preset"
|
|
|
|
def __init__(self, ctx: PipelineContext, config: PresetConfig):
|
|
logger.info(f"ctx is {ctx}")
|
|
|
|
super().__init__(ctx, config)
|
|
self.config = config
|
|
self.report = SupersetSourceReport()
|
|
self.platform = "preset"
|
|
|
|
def login(self):
|
|
try:
|
|
login_response = requests.post(
|
|
f"{self.config.manager_uri}/v1/auth/",
|
|
json={"name": self.config.api_key, "secret": self.config.api_secret},
|
|
)
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Failed to authenticate with Preset: {e}")
|
|
raise e
|
|
|
|
self.access_token = login_response.json()["payload"]["access_token"]
|
|
logger.debug("Got access token from Preset")
|
|
|
|
requests_session = requests.Session()
|
|
requests_session.headers.update(
|
|
{
|
|
"Authorization": f"Bearer {self.access_token}",
|
|
"Content-Type": "application/json",
|
|
"Accept": "*/*",
|
|
}
|
|
)
|
|
# Test the connection
|
|
test_response = requests_session.get(f"{self.config.connect_uri}/version")
|
|
if not test_response.ok:
|
|
logger.error("Unable to connect to workspace")
|
|
return requests_session
|