119 lines
3.9 KiB
Python

import logging
from typing import Dict, Optional
import requests
from pydantic.class_validators import root_validator, validator
from pydantic.fields import Field
from datahub.emitter.mce_builder import DEFAULT_ENV
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SourceCapability,
SupportStatus,
capability,
config_class,
platform_name,
support_status,
)
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StatefulStaleMetadataRemovalConfig,
)
from datahub.ingestion.source.superset import (
SupersetConfig,
SupersetSource,
SupersetSourceReport,
)
from datahub.utilities import config_clean
logger = logging.getLogger(__name__)
class PresetConfig(SupersetConfig):
manager_uri: str = Field(
default="https://api.app.preset.io", description="Preset.io API URL"
)
connect_uri: str = Field(default="", description="Preset workspace URL.")
display_uri: Optional[str] = Field(
default=None,
description="optional URL to use in links (if `connect_uri` is only for ingestion)",
)
api_key: Optional[str] = Field(default=None, description="Preset.io API key.")
api_secret: Optional[str] = Field(default=None, description="Preset.io API secret.")
# Configuration for stateful ingestion
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
default=None, description="Preset Stateful Ingestion Config."
)
options: Dict = Field(default={}, description="")
env: str = Field(
default=DEFAULT_ENV,
description="Environment to use in namespace when constructing URNs",
)
database_alias: Dict[str, str] = Field(
default={},
description="Can be used to change mapping for database names in superset to what you have in datahub",
)
@validator("connect_uri", "display_uri")
def remove_trailing_slash(cls, v):
return config_clean.remove_trailing_slashes(v)
@root_validator(skip_on_failure=True)
def default_display_uri_to_connect_uri(cls, values):
base = values.get("display_uri")
if base is None:
values["display_uri"] = values.get("connect_uri")
return values
@platform_name("Preset")
@config_class(PresetConfig)
@support_status(SupportStatus.CERTIFIED)
@capability(
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
)
class PresetSource(SupersetSource):
"""
Variation of the Superset plugin that works with Preset.io (Apache Superset SaaS).
"""
config: PresetConfig
report: SupersetSourceReport
platform = "preset"
def __init__(self, ctx: PipelineContext, config: PresetConfig):
logger.info(f"ctx is {ctx}")
super().__init__(ctx, config)
self.config = config
self.report = SupersetSourceReport()
self.platform = "preset"
def login(self):
try:
login_response = requests.post(
f"{self.config.manager_uri}/v1/auth/",
json={"name": self.config.api_key, "secret": self.config.api_secret},
)
except requests.exceptions.RequestException as e:
logger.error(f"Failed to authenticate with Preset: {e}")
raise e
self.access_token = login_response.json()["payload"]["access_token"]
logger.debug("Got access token from Preset")
requests_session = requests.Session()
requests_session.headers.update(
{
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json",
"Accept": "*/*",
}
)
# Test the connection
test_response = requests_session.get(f"{self.config.connect_uri}/version")
if not test_response.ok:
logger.error("Unable to connect to workspace")
return requests_session