mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-29 10:57:52 +00:00
fix(ingest): glue - ignore custom connectors (#3805)
This commit is contained in:
parent
9a237e9c92
commit
5b369447f4
@ -38,22 +38,23 @@ sink:
|
||||
|
||||
Note that a `.` is used to denote nested fields in the YAML recipe.
|
||||
|
||||
| Field | Required | Default | Description |
|
||||
| ----------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
|
||||
| `aws_region` | ✅ | | AWS region code. |
|
||||
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
|
||||
| `aws_access_key_id` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_secret_access_key` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_session_token` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_role` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `extract_transforms` | | `True` | Whether to extract Glue transform jobs. |
|
||||
| `database_pattern.allow` | | | List of regex patterns for databases to include in ingestion. |
|
||||
| `database_pattern.deny` | | | List of regex patterns for databases to exclude from ingestion. |
|
||||
| `database_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
|
||||
| `table_pattern.allow` | | | List of regex patterns for tables to include in ingestion. |
|
||||
| `table_pattern.deny` | | | List of regex patterns for tables to exclude from ingestion. |
|
||||
| `table_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
|
||||
| `underlying_platform` | | `glue` | Override for platform name. Allowed values - `glue`, `athena` |
|
||||
| Field | Required | Default | Description |
|
||||
| ------------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
|
||||
| `aws_region` | ✅ | | AWS region code. |
|
||||
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
|
||||
| `aws_access_key_id` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_secret_access_key` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_session_token` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `aws_role` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
|
||||
| `extract_transforms` | | `True` | Whether to extract Glue transform jobs. |
|
||||
| `database_pattern.allow` | | | List of regex patterns for databases to include in ingestion. |
|
||||
| `database_pattern.deny` | | | List of regex patterns for databases to exclude from ingestion. |
|
||||
| `database_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
|
||||
| `table_pattern.allow` | | | List of regex patterns for tables to include in ingestion. |
|
||||
| `table_pattern.deny` | | | List of regex patterns for tables to exclude from ingestion. |
|
||||
| `table_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
|
||||
| `underlying_platform` | | `glue` | Override for platform name. Allowed values - `glue`, `athena` |
|
||||
| `ignore_unsupported_connectors` | | `True` | Whether to ignore unsupported connectors. If disabled, an error will be raised. |
|
||||
|
||||
## Compatibility
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import typing
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
@ -44,11 +45,14 @@ from datahub.metadata.schema_classes import (
|
||||
OwnershipTypeClass,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GlueSourceConfig(AwsSourceConfig):
|
||||
|
||||
extract_transforms: Optional[bool] = True
|
||||
underlying_platform: Optional[str] = None
|
||||
ignore_unsupported_connectors: Optional[bool] = True
|
||||
|
||||
@property
|
||||
def glue_client(self):
|
||||
@ -262,7 +266,16 @@ class GlueSource(Source):
|
||||
|
||||
else:
|
||||
|
||||
raise ValueError(f"Unrecognized Glue data object type: {node_args}")
|
||||
if self.source_config.ignore_unsupported_connectors:
|
||||
|
||||
logger.info(
|
||||
flow_urn,
|
||||
f"Unrecognized Glue data object type: {node_args}. Skipping.",
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
raise ValueError(f"Unrecognized Glue data object type: {node_args}")
|
||||
|
||||
# otherwise, a node represents a transformation
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user