mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 00:42:29 +00:00
feat(ingest): aws - support extra args to role config (#6031)
This commit is contained in:
parent
27f28019de
commit
3a6d1d2bf1
@ -333,7 +333,7 @@ mypy_stubs = {
|
|||||||
"types-cachetools",
|
"types-cachetools",
|
||||||
# versions 0.1.13 and 0.1.14 seem to have issues
|
# versions 0.1.13 and 0.1.14 seem to have issues
|
||||||
"types-click==0.1.12",
|
"types-click==0.1.12",
|
||||||
"boto3-stubs[s3,glue,sagemaker]",
|
"boto3-stubs[s3,glue,sagemaker,sts]",
|
||||||
"types-tabulate",
|
"types-tabulate",
|
||||||
# avrogen package requires this
|
# avrogen package requires this
|
||||||
"types-pytz",
|
"types-pytz",
|
||||||
|
|||||||
@ -19,6 +19,16 @@ class ConfigModel(BaseModel):
|
|||||||
) # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info.
|
) # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info.
|
||||||
|
|
||||||
|
|
||||||
|
class PermissiveConfigModel(ConfigModel):
|
||||||
|
# A permissive config model that allows extra fields.
|
||||||
|
# This is useful for cases where we want to strongly type certain fields,
|
||||||
|
# but still allow the user to pass in arbitrary fields that we don't care about.
|
||||||
|
# It is usually used for argument bags that are passed through to third-party libraries.
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
extra = Extra.allow
|
||||||
|
|
||||||
|
|
||||||
class TransformerSemantics(ConfigEnum):
|
class TransformerSemantics(ConfigEnum):
|
||||||
"""Describes semantics for aspect changes"""
|
"""Describes semantics for aspect changes"""
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
from functools import reduce
|
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
@ -7,21 +6,39 @@ from botocore.config import Config
|
|||||||
from botocore.utils import fix_s3_host
|
from botocore.utils import fix_s3_host
|
||||||
from pydantic.fields import Field
|
from pydantic.fields import Field
|
||||||
|
|
||||||
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
from datahub.configuration.common import (
|
||||||
|
AllowDenyPattern,
|
||||||
|
ConfigModel,
|
||||||
|
PermissiveConfigModel,
|
||||||
|
)
|
||||||
from datahub.configuration.source_common import EnvBasedSourceConfigBase
|
from datahub.configuration.source_common import EnvBasedSourceConfigBase
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
|
||||||
from mypy_boto3_glue import GlueClient
|
from mypy_boto3_glue import GlueClient
|
||||||
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
||||||
from mypy_boto3_sagemaker import SageMakerClient
|
from mypy_boto3_sagemaker import SageMakerClient
|
||||||
|
from mypy_boto3_sts import STSClient
|
||||||
|
|
||||||
|
|
||||||
|
class AwsAssumeRoleConfig(PermissiveConfigModel):
|
||||||
|
# Using the PermissiveConfigModel to allow the user to pass additional arguments.
|
||||||
|
|
||||||
|
RoleArn: str = Field(
|
||||||
|
description="ARN of the role to assume.",
|
||||||
|
)
|
||||||
|
ExternalId: Optional[str] = Field(
|
||||||
|
None,
|
||||||
|
description="External ID to use when assuming the role.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def assume_role(
|
def assume_role(
|
||||||
role_arn: str, aws_region: str, credentials: Optional[dict] = None
|
role: AwsAssumeRoleConfig,
|
||||||
|
aws_region: str,
|
||||||
|
credentials: Optional[dict] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
credentials = credentials or {}
|
credentials = credentials or {}
|
||||||
sts_client = boto3.client(
|
sts_client: "STSClient" = boto3.client(
|
||||||
"sts",
|
"sts",
|
||||||
region_name=aws_region,
|
region_name=aws_region,
|
||||||
aws_access_key_id=credentials.get("AccessKeyId"),
|
aws_access_key_id=credentials.get("AccessKeyId"),
|
||||||
@ -29,10 +46,20 @@ def assume_role(
|
|||||||
aws_session_token=credentials.get("SessionToken"),
|
aws_session_token=credentials.get("SessionToken"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
assume_role_args: dict = {
|
||||||
|
**dict(
|
||||||
|
RoleSessionName="DatahubIngestionSource",
|
||||||
|
),
|
||||||
|
**{k: v for k, v in role.dict().items() if v is not None},
|
||||||
|
}
|
||||||
|
|
||||||
assumed_role_object = sts_client.assume_role(
|
assumed_role_object = sts_client.assume_role(
|
||||||
RoleArn=role_arn, RoleSessionName="DatahubIngestionSource"
|
**assume_role_args,
|
||||||
)
|
)
|
||||||
return assumed_role_object["Credentials"]
|
return dict(assumed_role_object["Credentials"])
|
||||||
|
|
||||||
|
|
||||||
|
AUTODETECT_CREDENTIALS_DOC_LINK = "Can be auto-detected, see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html for details."
|
||||||
|
|
||||||
|
|
||||||
class AwsConnectionConfig(ConfigModel):
|
class AwsConnectionConfig(ConfigModel):
|
||||||
@ -47,25 +74,27 @@ class AwsConnectionConfig(ConfigModel):
|
|||||||
|
|
||||||
aws_access_key_id: Optional[str] = Field(
|
aws_access_key_id: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html",
|
description=f"AWS access key ID. {AUTODETECT_CREDENTIALS_DOC_LINK}",
|
||||||
)
|
)
|
||||||
aws_secret_access_key: Optional[str] = Field(
|
aws_secret_access_key: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html",
|
description=f"AWS secret access key. {AUTODETECT_CREDENTIALS_DOC_LINK}",
|
||||||
)
|
)
|
||||||
aws_session_token: Optional[str] = Field(
|
aws_session_token: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html",
|
description=f"AWS session token. {AUTODETECT_CREDENTIALS_DOC_LINK}",
|
||||||
)
|
)
|
||||||
aws_role: Optional[Union[str, List[str]]] = Field(
|
aws_role: Optional[Union[str, List[Union[str, AwsAssumeRoleConfig]]]] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html",
|
description="AWS roles to assume. If using the string format, the role ARN can be specified directly. "
|
||||||
|
"If using the object format, the role can be specified in the RoleArn field and additional available arguments are documented at https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html?highlight=assume_role#STS.Client.assume_role",
|
||||||
)
|
)
|
||||||
aws_profile: Optional[str] = Field(
|
aws_profile: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Named AWS profile to use, if not set the default will be used",
|
description="Named AWS profile to use. Only used if access key / secret are unset. If not set the default will be used",
|
||||||
)
|
)
|
||||||
aws_region: str = Field(description="AWS region code.")
|
aws_region: str = Field(description="AWS region code.")
|
||||||
|
|
||||||
aws_endpoint_url: Optional[str] = Field(
|
aws_endpoint_url: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html",
|
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html",
|
||||||
@ -75,43 +104,58 @@ class AwsConnectionConfig(ConfigModel):
|
|||||||
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html",
|
description="Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _normalized_aws_roles(self) -> List[AwsAssumeRoleConfig]:
|
||||||
|
if not self.aws_role:
|
||||||
|
return []
|
||||||
|
elif isinstance(self.aws_role, str):
|
||||||
|
return [AwsAssumeRoleConfig(RoleArn=self.aws_role)]
|
||||||
|
else:
|
||||||
|
assert isinstance(self.aws_role, list)
|
||||||
|
return [
|
||||||
|
AwsAssumeRoleConfig(RoleArn=role) if isinstance(role, str) else role
|
||||||
|
for role in self.aws_role
|
||||||
|
]
|
||||||
|
|
||||||
def get_session(self) -> Session:
|
def get_session(self) -> Session:
|
||||||
if (
|
if self.aws_access_key_id and self.aws_secret_access_key:
|
||||||
self.aws_access_key_id
|
session = Session(
|
||||||
and self.aws_secret_access_key
|
|
||||||
and self.aws_session_token
|
|
||||||
):
|
|
||||||
return Session(
|
|
||||||
aws_access_key_id=self.aws_access_key_id,
|
aws_access_key_id=self.aws_access_key_id,
|
||||||
aws_secret_access_key=self.aws_secret_access_key,
|
aws_secret_access_key=self.aws_secret_access_key,
|
||||||
aws_session_token=self.aws_session_token,
|
aws_session_token=self.aws_session_token,
|
||||||
region_name=self.aws_region,
|
region_name=self.aws_region,
|
||||||
)
|
)
|
||||||
elif self.aws_access_key_id and self.aws_secret_access_key:
|
elif self.aws_profile:
|
||||||
return Session(
|
session = Session(
|
||||||
aws_access_key_id=self.aws_access_key_id,
|
region_name=self.aws_region, profile_name=self.aws_profile
|
||||||
aws_secret_access_key=self.aws_secret_access_key,
|
|
||||||
region_name=self.aws_region,
|
|
||||||
)
|
)
|
||||||
elif self.aws_role:
|
|
||||||
if isinstance(self.aws_role, str):
|
|
||||||
credentials = assume_role(self.aws_role, self.aws_region)
|
|
||||||
else:
|
else:
|
||||||
credentials = reduce(
|
# Use boto3's credential autodetection.
|
||||||
lambda new_credentials, role_arn: assume_role(
|
session = Session(region_name=self.aws_region)
|
||||||
role_arn, self.aws_region, new_credentials
|
|
||||||
),
|
if self._normalized_aws_roles():
|
||||||
self.aws_role,
|
# Use existing session credentials to start the chain of role assumption.
|
||||||
{},
|
current_credentials = session.get_credentials()
|
||||||
|
credentials = {
|
||||||
|
"AccessKeyId": current_credentials.access_key,
|
||||||
|
"SecretAccessKey": current_credentials.secret_key,
|
||||||
|
"SessionToken": current_credentials.token,
|
||||||
|
}
|
||||||
|
|
||||||
|
for role in self._normalized_aws_roles():
|
||||||
|
credentials = assume_role(
|
||||||
|
role,
|
||||||
|
self.aws_region,
|
||||||
|
credentials=credentials,
|
||||||
)
|
)
|
||||||
return Session(
|
|
||||||
|
session = Session(
|
||||||
aws_access_key_id=credentials["AccessKeyId"],
|
aws_access_key_id=credentials["AccessKeyId"],
|
||||||
aws_secret_access_key=credentials["SecretAccessKey"],
|
aws_secret_access_key=credentials["SecretAccessKey"],
|
||||||
aws_session_token=credentials["SessionToken"],
|
aws_session_token=credentials["SessionToken"],
|
||||||
region_name=self.aws_region,
|
region_name=self.aws_region,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
return Session(region_name=self.aws_region, profile_name=self.aws_profile)
|
return session
|
||||||
|
|
||||||
def get_credentials(self) -> Dict[str, str]:
|
def get_credentials(self) -> Dict[str, str]:
|
||||||
credentials = self.get_session().get_credentials()
|
credentials = self.get_session().get_credentials()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user