Fixes #15696: Added Powerbi filter query (#19912)

* Powerbi filter query

* error handling

* Added test and minor changes

* Checkstyle

* resolved comments

---------

Co-authored-by: Akash Verma <akashverma@Akashs-MacBook-Pro-2.local>
This commit is contained in:
Akash Verma 2025-03-06 18:24:57 +05:30 committed by GitHub
parent f29bad1c1c
commit af9d7c9fa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 153 additions and 3 deletions

View File

@ -23,6 +23,7 @@ from pydantic import BaseModel, ConfigDict
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
PowerBIConnection,
)
from metadata.generated.schema.type.filterPattern import FilterPattern
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.ometa.client import REST, ClientConfig
from metadata.ingestion.source.dashboard.powerbi.file_client import PowerBiFileClient
@ -43,6 +44,7 @@ from metadata.ingestion.source.dashboard.powerbi.models import (
Workspaces,
WorkSpaceScanResponse,
)
from metadata.utils.filters import validate_regex
from metadata.utils.logger import utils_logger
logger = utils_logger()
@ -253,8 +255,91 @@ class PowerBiApiClient:
return None
def regex_to_odata_condition(self, regex: str) -> str:
"""
Convert a regex pattern to an OData filter condition
"""
try:
# Handle empty pattern
if not regex:
return ""
# Exact match
if regex.startswith("^") and regex.endswith("$"):
literal = regex[1:-1]
return f"trim(name) eq '{literal}'"
# Starts with
if regex.startswith("^"):
remaining = regex[1:]
parts = remaining.split(".*", 1)
literal = parts[0] if parts else ""
return f"startswith(name, '{literal}')"
# Ends with
if regex.endswith("$"):
remaining = regex[:-1]
parts = remaining.split(".*")
literal = parts[-1] if parts else ""
return f"endswith(name, '{literal}')"
# Contains
if regex.startswith(".*") and regex.endswith(".*"):
parts = regex.split(".*")
literal = parts[1] if len(parts) > 1 else ""
else:
literal = regex
return f"contains(name, '{literal}')"
except Exception as exc:
logger.warning(
f"Error converting regex '{regex}' to OData condition: {exc}"
)
return ""
def create_filter_query(self, filter_pattern) -> Optional[str]:
"""
Create a complete filter query for workspaces from filter_pattern
"""
try:
validate_regex(filter_pattern.includes)
validate_regex(filter_pattern.excludes)
project_to_include = filter_pattern.includes
project_to_exclude = filter_pattern.excludes
filter_conditions = []
if project_to_include:
include_conditions = []
for pattern in project_to_include:
condition = self.regex_to_odata_condition(pattern)
if condition:
include_conditions.append(f"{condition}")
if include_conditions:
filter_conditions.append(f"{' or '.join(include_conditions)}")
if project_to_exclude:
exclude_conditions = []
for pattern in project_to_exclude:
condition = self.regex_to_odata_condition(pattern)
if condition:
exclude_conditions.append(f"not({condition})")
if exclude_conditions:
filter_conditions.append(f"{' and '.join(exclude_conditions)}")
filter_query = " and ".join(filter_conditions) if filter_conditions else ""
return filter_query if filter_query else None
except Exception as exc:
logger.warning(
f"Creating filter query from the project filter pattern failed: {exc}. "
"The projects will be filtered further inside OpenMetadata."
)
return None
# pylint: disable=too-many-branches,too-many-statements
def fetch_all_workspaces(self) -> Optional[List[Group]]:
def fetch_all_workspaces(
self, filter_pattern: Optional[FilterPattern] = None
) -> Optional[List[Group]]:
"""Method to fetch all powerbi workspace details
Returns:
Group
@ -293,6 +378,9 @@ class PowerBiApiClient:
"$top": str(entities_per_page),
"$skip": str(index * entities_per_page),
}
if filter_pattern:
params_data["$filter"] = self.create_filter_query(filter_pattern)
response = self.client.get(api_url, data=params_data)
if (
not response

View File

@ -132,7 +132,8 @@ class PowerbiSource(DashboardServiceSource):
"""
fetch all the group workspace ids
"""
groups = self.client.api_client.fetch_all_workspaces()
filter_pattern = self.source_config.projectFilterPattern
groups = self.client.api_client.fetch_all_workspaces(filter_pattern)
for group in groups:
# add the dashboards to the groups
group.dashboards.extend(
@ -171,7 +172,8 @@ class PowerbiSource(DashboardServiceSource):
fetch all the workspace ids
"""
groups = []
workspaces = self.client.api_client.fetch_all_workspaces()
filter_pattern = self.source_config.projectFilterPattern
workspaces = self.client.api_client.fetch_all_workspaces(filter_pattern)
if workspaces:
workspace_id_list = [workspace.id for workspace in workspaces]

View File

@ -0,0 +1,60 @@
import pytest
from metadata.generated.schema.type.filterPattern import FilterPattern
from metadata.ingestion.source.dashboard.powerbi.client import PowerBiApiClient
# Test cases dictionary
test_cases = {
"exact_match": {
"input": FilterPattern(includes=["^exact_workspace$"], excludes=[]),
"expected": "trim(name) eq 'exact_workspace'",
},
"starts_with": {
"input": FilterPattern(includes=["^dev.*"], excludes=[]),
"expected": "startswith(name, 'dev')",
},
"ends_with": {
"input": FilterPattern(includes=[".*prod$"], excludes=[]),
"expected": "endswith(name, 'prod')",
},
"contains": {
"input": FilterPattern(includes=[".*test.*"], excludes=[]),
"expected": "contains(name, 'test')",
},
"multiple_includes": {
"input": FilterPattern(includes=["^dev.*", ".*prod$"], excludes=[]),
"expected": "startswith(name, 'dev') or endswith(name, 'prod')",
},
"multiple_excludes": {
"input": FilterPattern(includes=[], excludes=["^test.*", ".*temp$"]),
"expected": "not(startswith(name, 'test')) and not(endswith(name, 'temp'))",
},
"includes_and_excludes": {
"input": FilterPattern(includes=["^prod.*"], excludes=[".*temp$"]),
"expected": "startswith(name, 'prod') and not(endswith(name, 'temp'))",
},
"includes_without_regex": {
"input": FilterPattern(includes=["test"], excludes=[]),
"expected": "contains(name, 'test')",
},
"excludes_withour_regex": {
"input": FilterPattern(includes=[], excludes=["test"]),
"expected": "not(contains(name, 'test'))",
},
"empty_patterns": {
"input": FilterPattern(includes=[], excludes=[]),
"expected": None,
},
}
# Mock class that inherits from PowerBiApiClient
class MockPowerBiApiClient(PowerBiApiClient):
def __init__(self):
pass
@pytest.mark.parametrize("test_name,test_data", test_cases.items())
def test_filter_query(test_name, test_data):
client = MockPowerBiApiClient()
result = client.create_filter_query(test_data["input"])
assert result == test_data["expected"], f"Failed test: {test_name}"