ISSUE #2033-C - Support For DBX Exporter + Minor Fix to Status (#23313)

* feat: added config support for databricks

* fix: allow incrementing record count directly without storing element

* Update generated TypeScript types

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Teddy 2025-09-10 12:04:46 +02:00 committed by GitHub
parent 39cb165164
commit f3cb001d2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 63 additions and 12 deletions

View File

@ -53,6 +53,7 @@ class Status(BaseModel):
) )
records: Annotated[List[Any], Field(default_factory=list)] records: Annotated[List[Any], Field(default_factory=list)]
record_count: int = Field(default=0)
updated_records: Annotated[List[Any], Field(default_factory=list)] updated_records: Annotated[List[Any], Field(default_factory=list)]
warnings: Annotated[List[Any], Field(default_factory=list)] warnings: Annotated[List[Any], Field(default_factory=list)]
filtered: Annotated[List[Dict[str, str]], Field(default_factory=list)] filtered: Annotated[List[Dict[str, str]], Field(default_factory=list)]
@ -75,6 +76,9 @@ class Status(BaseModel):
if log_name := get_log_name(record): if log_name := get_log_name(record):
self.updated_records.append(log_name) self.updated_records.append(log_name)
def increment_record_count(self, increment: int = 1) -> None:
self.record_count += increment
def warning(self, key: str, reason: str) -> None: def warning(self, key: str, reason: str) -> None:
self.warnings.append({key: reason}) self.warnings.append({key: reason})
@ -108,8 +112,9 @@ class Status(BaseModel):
self.failures.extend(failures) self.failures.extend(failures)
def calculate_success(self) -> float: def calculate_success(self) -> float:
record_count = self.record_count if self.record_count > 0 else len(self.records)
source_success = max( source_success = max(
len(self.records) + len(self.updated_records), 1 record_count + len(self.updated_records), 1
) # To avoid ZeroDivisionError using minimum value as 1 ) # To avoid ZeroDivisionError using minimum value as 1
source_failed = len(self.failures) source_failed = len(self.failures)
return round(source_success * 100 / (source_success + source_failed), 2) return round(source_success * 100 / (source_success + source_failed), 2)

View File

@ -77,7 +77,9 @@ class Summary(StepSummary):
"""Compute summary from Step""" """Compute summary from Step"""
return Summary( return Summary(
name=step.name, name=step.name,
records=len(step.status.records), records=step.status.record_count
if step.status.record_count > 0
else len(step.status.records),
updated_records=len(step.status.updated_records), updated_records=len(step.status.updated_records),
warnings=len(step.status.warnings), warnings=len(step.status.warnings),
errors=len(step.status.failures), errors=len(step.status.failures),

View File

@ -324,8 +324,15 @@ class BaseWorkflow(ABC, WorkflowStatusMixin):
""" """
try: try:
for step in self.workflow_steps(): for step in self.workflow_steps():
record_count: int = (
step.status.record_count
if step.status.record_count > 0
else len(step.status.records)
)
logger.info( logger.info(
f"{step.name}: Processed {len(step.status.records)} records," f"{step.name}: Processed {record_count} records,"
f" updated {len(step.status.updated_records)} records," f" updated {len(step.status.updated_records)} records,"
f" filtered {len(step.status.filtered)} records," f" filtered {len(step.status.filtered)} records,"
f" found {len(step.status.failures)} errors" f" found {len(step.status.failures)} errors"

View File

@ -74,6 +74,9 @@
"oneOf": [ "oneOf": [
{ {
"$ref": "metadataExporterConnectors/snowflakeConnection.json" "$ref": "metadataExporterConnectors/snowflakeConnection.json"
},
{
"$ref": "metadataExporterConnectors/databricksConnection.json"
} }
] ]
}, },

View File

@ -22,7 +22,7 @@ export interface MetadataExporterAppConfig {
/** /**
* Connection details for the Metadata Exporter Application. * Connection details for the Metadata Exporter Application.
*/ */
connectionConfig: SnowflakeConnection; connectionConfig: Connection;
/** /**
* List of event types to export. * List of event types to export.
*/ */
@ -46,13 +46,15 @@ export interface MetadataExporterAppConfig {
* Connection details for the Metadata Exporter Application. * Connection details for the Metadata Exporter Application.
* *
* Snowflake Connection Config * Snowflake Connection Config
*
* Databricks Connection Config
*/ */
export interface SnowflakeConnection { export interface Connection {
/** /**
* If the Snowflake URL is https://xyz1234.us-east-1.gcp.snowflakecomputing.com, then the * If the Snowflake URL is https://xyz1234.us-east-1.gcp.snowflakecomputing.com, then the
* account is xyz1234.us-east-1.gcp * account is xyz1234.us-east-1.gcp
*/ */
account: string; account?: string;
/** /**
* Optional configuration for ingestion to keep the client session active in case the * Optional configuration for ingestion to keep the client session active in case the
* ingestion process runs for longer durations. * ingestion process runs for longer durations.
@ -86,7 +88,7 @@ export interface SnowflakeConnection {
/** /**
* SQLAlchemy driver scheme options. * SQLAlchemy driver scheme options.
*/ */
scheme?: SnowflakeScheme; scheme?: Scheme;
/** /**
* Snowflake Passphrase Key used with Private Key * Snowflake Passphrase Key used with Private Key
*/ */
@ -94,22 +96,53 @@ export interface SnowflakeConnection {
/** /**
* Service Type * Service Type
*/ */
type?: SnowflakeType; type?: Type;
/** /**
* Username to connect to Snowflake. This user should have privileges to read all the * Username to connect to Snowflake. This user should have privileges to read all the
* metadata in Snowflake. * metadata in Snowflake.
*/ */
username: string; username?: string;
/** /**
* Snowflake warehouse. * Snowflake warehouse.
*/ */
warehouse: string; warehouse?: string;
/**
* Catalog of the data source(Example: hive_metastore). This is optional parameter, if you
* would like to restrict the metadata reading to a single catalog. When left blank,
* OpenMetadata Ingestion attempts to scan all the catalog.
*/
catalog?: string;
/**
* The maximum amount of time (in seconds) to wait for a successful connection to the data
* source. If the connection attempt takes longer than this timeout period, an error will be
* returned.
*/
connectionTimeout?: number;
/**
* Database Schema of the data source. This is optional parameter, if you would like to
* restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion
* attempts to scan all the schemas.
*/
databaseSchema?: string;
/**
* Host and port of the Databricks service.
*/
hostPort?: string;
/**
* Databricks compute resources URL.
*/
httpPath?: string;
/**
* Generated Token to connect to Databricks.
*/
token?: string;
} }
/** /**
* SQLAlchemy driver scheme options. * SQLAlchemy driver scheme options.
*/ */
export enum SnowflakeScheme { export enum Scheme {
DatabricksConnector = "databricks+connector",
Snowflake = "snowflake", Snowflake = "snowflake",
} }
@ -118,7 +151,8 @@ export enum SnowflakeScheme {
* *
* Service type. * Service type.
*/ */
export enum SnowflakeType { export enum Type {
Databricks = "Databricks",
Snowflake = "Snowflake", Snowflake = "Snowflake",
} }