Fixes #23416: Option To Opt Out of BigQuery Policy Tags Ingestion (#23532)

* fix: added includePolicyTags flag

* feat: added includePolicyTags
This commit is contained in:
Keshav Mohta 2025-09-29 18:24:10 +05:30 committed by GitHub
parent 5de12a8cef
commit 4528c0c1c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 111 additions and 37 deletions

View File

@ -5,6 +5,7 @@ source:
config:
type: BigQuery
taxonomyProjectID: [ project-id-where-policy-tags-exist ]
# includePolicyTags: false
credentials:
gcpConfig:
type: service_account

View File

@ -153,6 +153,10 @@ def test_connection(
return policy_tags
def test_tags():
if not service_connection.includePolicyTags:
logger.info("'includePolicyTags' is set to false, so skipping this test.")
return None
taxonomy_project_ids = []
if engine.url.host:
taxonomy_project_ids.append(engine.url.host)

View File

@ -187,34 +187,12 @@ def get_columns(bq_schema):
"max_length": field.max_length,
"system_data_type": get_system_data_type(col_type),
"is_complex": is_complex_type(str(col_type)),
"policy_tags": None,
"policy_tags": field.policy_tags,
}
if getattr(field, "fields", None):
# Nested Columns available
col_obj["children"] = get_columns(field.fields)
try:
if field.policy_tags:
policy_tag_name = field.policy_tags.names[0]
taxonomy_name = (
policy_tag_name.split("/policyTags/")[0] if policy_tag_name else ""
)
if not taxonomy_name:
raise NotImplementedError(
f"Taxonomy Name not present for {field.name}"
)
col_obj["taxonomy"] = (
PolicyTagManagerClient()
.get_taxonomy(name=taxonomy_name)
.display_name
)
col_obj["policy_tags"] = (
PolicyTagManagerClient()
.get_policy_tag(name=policy_tag_name)
.display_name
)
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(f"Skipping Policy Tag: {exc}")
col_list.append(col_obj)
return col_list
@ -513,6 +491,13 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
metadata=self.metadata,
system_tags=True,
)
if not self.service_connection.includePolicyTags:
logger.info(
"'includePolicyTags' is set to false so skipping policy tag ingestion"
)
return
# Fetching policy tags on the column level
list_project_ids = [self.context.get().database]
if not self.service_connection.taxonomyProjectID:
@ -712,6 +697,32 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
table_tag_labels.append(tag_label)
return table_tag_labels
def get_policy_tags_for_column(self, column: dict) -> dict:
try:
if column.get("policy_tags"):
policy_tag_name = column["policy_tags"].names[0]
taxonomy_name = (
policy_tag_name.split("/policyTags/")[0] if policy_tag_name else ""
)
if not taxonomy_name:
raise NotImplementedError(
f"Taxonomy Name not present for {column['name']}"
)
column["taxonomy"] = (
PolicyTagManagerClient()
.get_taxonomy(name=taxonomy_name)
.display_name
)
column["policy_tags"] = (
PolicyTagManagerClient()
.get_policy_tag(name=policy_tag_name)
.display_name
)
return column
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(f"Skipping Policy Tag: {exc}")
def get_column_tag_labels(
self, table_name: str, column: dict
) -> Optional[List[TagLabel]]:
@ -719,12 +730,14 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
This will only get executed if the tags context
is properly informed
"""
if column.get("policy_tags"):
if self.service_connection.includePolicyTags and column.get("policy_tags"):
self.get_policy_tags_for_column(column)
return fetch_tag_labels_om(
metadata=self.metadata,
tags=[column["policy_tags"]],
classification_name=column["taxonomy"],
include_tags=self.source_config.includeTags,
include_tags=self.source_config.includeTags
and self.service_connection.includePolicyTags,
)
return None

View File

@ -53,6 +53,12 @@
"description": "GCP Credentials",
"$ref": "../../../../security/credentials/gcpCredentials.json"
},
"includePolicyTags": {
"title": "Include Policy Tags",
"description": "Option to include policy tags as part of column description.",
"type": "boolean",
"default": true
},
"taxonomyProjectID": {
"title": "Taxonomy Project IDs",
"description": "Project IDs used to fetch policy tags",

View File

@ -72,6 +72,12 @@ $$section
Credentials Type is the type of the account, for a service account the value of this field is `service_account`. To fetch this key, look for the value associated with the `type` key in the service account key file.
$$
$$section
### Include Policy Tags $(id="includePolicyTags")
Enable this to ingest BigQuery policy tags. Make sure the `Include Tags` option is enabled in the ingestion agent. If Include Policy Tags is disabled, the agent will only ingest labels according to the Include Tags setting.
$$
$$section
### Billing Project ID $(id="billingProjectId")

View File

@ -611,7 +611,11 @@ export interface ConfigObject {
*
* Apache Ranger Admin URL.
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -235,7 +235,11 @@ export interface ConfigObject {
*
* ServiceNow instance URL (e.g., https://your-instance.service-now.com)
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -3132,7 +3132,11 @@ export interface ConfigObject {
/**
* Regex to only include/exclude databases that matches the pattern.
*/
databaseFilterPattern?: FilterPattern;
databaseFilterPattern?: FilterPattern;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -493,7 +493,11 @@ export interface ConfigObject {
*
* Apache Ranger Admin URL.
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -1053,7 +1053,11 @@ export interface ConfigObject {
*
* Apache Ranger Admin URL.
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -35,7 +35,11 @@ export interface BigQueryConnection {
/**
* BigQuery APIs URL.
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -938,7 +938,11 @@ export interface ConfigObject {
/**
* Regex to only include/exclude databases that matches the pattern.
*/
databaseFilterPattern?: FilterPattern;
databaseFilterPattern?: FilterPattern;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -354,7 +354,11 @@ export interface ConfigObject {
*
* ServiceNow instance URL (e.g., https://your-instance.service-now.com)
*/
hostPort?: string;
hostPort?: string;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -3649,7 +3649,11 @@ export interface ConfigObject {
/**
* Regex to only include/exclude databases that matches the pattern.
*/
databaseFilterPattern?: FilterPattern;
databaseFilterPattern?: FilterPattern;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -982,7 +982,11 @@ export interface ConfigObject {
/**
* Regex to only include/exclude databases that matches the pattern.
*/
databaseFilterPattern?: FilterPattern;
databaseFilterPattern?: FilterPattern;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.

View File

@ -1018,7 +1018,11 @@ export interface ConfigObject {
/**
* Regex to only include/exclude databases that matches the pattern.
*/
databaseFilterPattern?: FilterPattern;
databaseFilterPattern?: FilterPattern;
/**
* Option to include policy tags as part of column description.
*/
includePolicyTags?: boolean;
sampleDataStorageConfig?: SampleDataStorageConfig;
/**
* Regex to only include/exclude schemas that matches the pattern.