fix(ingest): glue - ignore custom connectors (#3805)

This commit is contained in:
Kevin Hu 2022-01-05 15:12:50 -08:00 committed by GitHub
parent 9a237e9c92
commit 5b369447f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 17 deletions

View File

@ -38,22 +38,23 @@ sink:
Note that a `.` is used to denote nested fields in the YAML recipe.
| Field | Required | Default | Description |
| ----------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
| `aws_region` | ✅ | | AWS region code. |
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
| `aws_access_key_id` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_secret_access_key` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_session_token` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_role` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `extract_transforms` | | `True` | Whether to extract Glue transform jobs. |
| `database_pattern.allow` | | | List of regex patterns for databases to include in ingestion. |
| `database_pattern.deny` | | | List of regex patterns for databases to exclude from ingestion. |
| `database_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
| `table_pattern.allow` | | | List of regex patterns for tables to include in ingestion. |
| `table_pattern.deny` | | | List of regex patterns for tables to exclude from ingestion. |
| `table_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
| `underlying_platform` | | `glue` | Override for platform name. Allowed values - `glue`, `athena` |
| Field | Required | Default | Description |
| ------------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
| `aws_region` | ✅ | | AWS region code. |
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
| `aws_access_key_id` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_secret_access_key` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_session_token` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `aws_role` | | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| `extract_transforms` | | `True` | Whether to extract Glue transform jobs. |
| `database_pattern.allow` | | | List of regex patterns for databases to include in ingestion. |
| `database_pattern.deny` | | | List of regex patterns for databases to exclude from ingestion. |
| `database_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
| `table_pattern.allow` | | | List of regex patterns for tables to include in ingestion. |
| `table_pattern.deny` | | | List of regex patterns for tables to exclude from ingestion. |
| `table_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. |
| `underlying_platform` | | `glue` | Override for platform name. Allowed values - `glue`, `athena` |
| `ignore_unsupported_connectors` | | `True` | Whether to ignore unsupported connectors. If disabled, an error will be raised. |
## Compatibility

View File

@ -1,3 +1,4 @@
import logging
import typing
from collections import defaultdict
from dataclasses import dataclass
@ -44,11 +45,14 @@ from datahub.metadata.schema_classes import (
OwnershipTypeClass,
)
logger = logging.getLogger(__name__)
class GlueSourceConfig(AwsSourceConfig):
extract_transforms: Optional[bool] = True
underlying_platform: Optional[str] = None
ignore_unsupported_connectors: Optional[bool] = True
@property
def glue_client(self):
@ -262,7 +266,16 @@ class GlueSource(Source):
else:
raise ValueError(f"Unrecognized Glue data object type: {node_args}")
if self.source_config.ignore_unsupported_connectors:
logger.info(
flow_urn,
f"Unrecognized Glue data object type: {node_args}. Skipping.",
)
else:
raise ValueError(f"Unrecognized Glue data object type: {node_args}")
# otherwise, a node represents a transformation
else: