mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-03 20:19:31 +00:00 
			
		
		
		
	* Powerbi filter query * error handling * Added test and minor changes * Checkstyle * resolved comments --------- Co-authored-by: Akash Verma <akashverma@Akashs-MacBook-Pro-2.local>
This commit is contained in:
		
							parent
							
								
									f29bad1c1c
								
							
						
					
					
						commit
						af9d7c9fa3
					
				@ -23,6 +23,7 @@ from pydantic import BaseModel, ConfigDict
 | 
			
		||||
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
 | 
			
		||||
    PowerBIConnection,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.type.filterPattern import FilterPattern
 | 
			
		||||
from metadata.ingestion.api.steps import InvalidSourceException
 | 
			
		||||
from metadata.ingestion.ometa.client import REST, ClientConfig
 | 
			
		||||
from metadata.ingestion.source.dashboard.powerbi.file_client import PowerBiFileClient
 | 
			
		||||
@ -43,6 +44,7 @@ from metadata.ingestion.source.dashboard.powerbi.models import (
 | 
			
		||||
    Workspaces,
 | 
			
		||||
    WorkSpaceScanResponse,
 | 
			
		||||
)
 | 
			
		||||
from metadata.utils.filters import validate_regex
 | 
			
		||||
from metadata.utils.logger import utils_logger
 | 
			
		||||
 | 
			
		||||
logger = utils_logger()
 | 
			
		||||
@ -253,8 +255,91 @@ class PowerBiApiClient:
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def regex_to_odata_condition(self, regex: str) -> str:
 | 
			
		||||
        """
 | 
			
		||||
        Convert a regex pattern to an OData filter condition
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            # Handle empty pattern
 | 
			
		||||
            if not regex:
 | 
			
		||||
                return ""
 | 
			
		||||
 | 
			
		||||
            # Exact match
 | 
			
		||||
            if regex.startswith("^") and regex.endswith("$"):
 | 
			
		||||
                literal = regex[1:-1]
 | 
			
		||||
                return f"trim(name) eq '{literal}'"
 | 
			
		||||
 | 
			
		||||
            # Starts with
 | 
			
		||||
            if regex.startswith("^"):
 | 
			
		||||
                remaining = regex[1:]
 | 
			
		||||
                parts = remaining.split(".*", 1)
 | 
			
		||||
                literal = parts[0] if parts else ""
 | 
			
		||||
                return f"startswith(name, '{literal}')"
 | 
			
		||||
 | 
			
		||||
            # Ends with
 | 
			
		||||
            if regex.endswith("$"):
 | 
			
		||||
                remaining = regex[:-1]
 | 
			
		||||
                parts = remaining.split(".*")
 | 
			
		||||
                literal = parts[-1] if parts else ""
 | 
			
		||||
                return f"endswith(name, '{literal}')"
 | 
			
		||||
 | 
			
		||||
            # Contains
 | 
			
		||||
            if regex.startswith(".*") and regex.endswith(".*"):
 | 
			
		||||
                parts = regex.split(".*")
 | 
			
		||||
                literal = parts[1] if len(parts) > 1 else ""
 | 
			
		||||
            else:
 | 
			
		||||
                literal = regex
 | 
			
		||||
            return f"contains(name, '{literal}')"
 | 
			
		||||
        except Exception as exc:
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f"Error converting regex '{regex}' to OData condition: {exc}"
 | 
			
		||||
            )
 | 
			
		||||
            return ""
 | 
			
		||||
 | 
			
		||||
    def create_filter_query(self, filter_pattern) -> Optional[str]:
 | 
			
		||||
        """
 | 
			
		||||
        Create a complete filter query for workspaces from filter_pattern
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
 | 
			
		||||
            validate_regex(filter_pattern.includes)
 | 
			
		||||
            validate_regex(filter_pattern.excludes)
 | 
			
		||||
            project_to_include = filter_pattern.includes
 | 
			
		||||
            project_to_exclude = filter_pattern.excludes
 | 
			
		||||
            filter_conditions = []
 | 
			
		||||
            if project_to_include:
 | 
			
		||||
                include_conditions = []
 | 
			
		||||
                for pattern in project_to_include:
 | 
			
		||||
                    condition = self.regex_to_odata_condition(pattern)
 | 
			
		||||
                    if condition:
 | 
			
		||||
                        include_conditions.append(f"{condition}")
 | 
			
		||||
 | 
			
		||||
                if include_conditions:
 | 
			
		||||
                    filter_conditions.append(f"{' or '.join(include_conditions)}")
 | 
			
		||||
 | 
			
		||||
            if project_to_exclude:
 | 
			
		||||
                exclude_conditions = []
 | 
			
		||||
                for pattern in project_to_exclude:
 | 
			
		||||
                    condition = self.regex_to_odata_condition(pattern)
 | 
			
		||||
                    if condition:
 | 
			
		||||
                        exclude_conditions.append(f"not({condition})")
 | 
			
		||||
 | 
			
		||||
                if exclude_conditions:
 | 
			
		||||
                    filter_conditions.append(f"{' and '.join(exclude_conditions)}")
 | 
			
		||||
 | 
			
		||||
            filter_query = " and ".join(filter_conditions) if filter_conditions else ""
 | 
			
		||||
            return filter_query if filter_query else None
 | 
			
		||||
        except Exception as exc:
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f"Creating filter query from the project filter pattern failed: {exc}. "
 | 
			
		||||
                "The projects will be filtered further inside OpenMetadata."
 | 
			
		||||
            )
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    # pylint: disable=too-many-branches,too-many-statements
 | 
			
		||||
    def fetch_all_workspaces(self) -> Optional[List[Group]]:
 | 
			
		||||
    def fetch_all_workspaces(
 | 
			
		||||
        self, filter_pattern: Optional[FilterPattern] = None
 | 
			
		||||
    ) -> Optional[List[Group]]:
 | 
			
		||||
        """Method to fetch all powerbi workspace details
 | 
			
		||||
        Returns:
 | 
			
		||||
            Group
 | 
			
		||||
@ -293,6 +378,9 @@ class PowerBiApiClient:
 | 
			
		||||
                    "$top": str(entities_per_page),
 | 
			
		||||
                    "$skip": str(index * entities_per_page),
 | 
			
		||||
                }
 | 
			
		||||
                if filter_pattern:
 | 
			
		||||
                    params_data["$filter"] = self.create_filter_query(filter_pattern)
 | 
			
		||||
 | 
			
		||||
                response = self.client.get(api_url, data=params_data)
 | 
			
		||||
                if (
 | 
			
		||||
                    not response
 | 
			
		||||
 | 
			
		||||
@ -132,7 +132,8 @@ class PowerbiSource(DashboardServiceSource):
 | 
			
		||||
        """
 | 
			
		||||
        fetch all the group workspace ids
 | 
			
		||||
        """
 | 
			
		||||
        groups = self.client.api_client.fetch_all_workspaces()
 | 
			
		||||
        filter_pattern = self.source_config.projectFilterPattern
 | 
			
		||||
        groups = self.client.api_client.fetch_all_workspaces(filter_pattern)
 | 
			
		||||
        for group in groups:
 | 
			
		||||
            # add the dashboards to the groups
 | 
			
		||||
            group.dashboards.extend(
 | 
			
		||||
@ -171,7 +172,8 @@ class PowerbiSource(DashboardServiceSource):
 | 
			
		||||
        fetch all the workspace ids
 | 
			
		||||
        """
 | 
			
		||||
        groups = []
 | 
			
		||||
        workspaces = self.client.api_client.fetch_all_workspaces()
 | 
			
		||||
        filter_pattern = self.source_config.projectFilterPattern
 | 
			
		||||
        workspaces = self.client.api_client.fetch_all_workspaces(filter_pattern)
 | 
			
		||||
        if workspaces:
 | 
			
		||||
            workspace_id_list = [workspace.id for workspace in workspaces]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										60
									
								
								ingestion/tests/unit/test_powerbi_filter_query.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								ingestion/tests/unit/test_powerbi_filter_query.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,60 @@
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
from metadata.generated.schema.type.filterPattern import FilterPattern
 | 
			
		||||
from metadata.ingestion.source.dashboard.powerbi.client import PowerBiApiClient
 | 
			
		||||
 | 
			
		||||
# Test cases dictionary
 | 
			
		||||
test_cases = {
 | 
			
		||||
    "exact_match": {
 | 
			
		||||
        "input": FilterPattern(includes=["^exact_workspace$"], excludes=[]),
 | 
			
		||||
        "expected": "trim(name) eq 'exact_workspace'",
 | 
			
		||||
    },
 | 
			
		||||
    "starts_with": {
 | 
			
		||||
        "input": FilterPattern(includes=["^dev.*"], excludes=[]),
 | 
			
		||||
        "expected": "startswith(name, 'dev')",
 | 
			
		||||
    },
 | 
			
		||||
    "ends_with": {
 | 
			
		||||
        "input": FilterPattern(includes=[".*prod$"], excludes=[]),
 | 
			
		||||
        "expected": "endswith(name, 'prod')",
 | 
			
		||||
    },
 | 
			
		||||
    "contains": {
 | 
			
		||||
        "input": FilterPattern(includes=[".*test.*"], excludes=[]),
 | 
			
		||||
        "expected": "contains(name, 'test')",
 | 
			
		||||
    },
 | 
			
		||||
    "multiple_includes": {
 | 
			
		||||
        "input": FilterPattern(includes=["^dev.*", ".*prod$"], excludes=[]),
 | 
			
		||||
        "expected": "startswith(name, 'dev') or endswith(name, 'prod')",
 | 
			
		||||
    },
 | 
			
		||||
    "multiple_excludes": {
 | 
			
		||||
        "input": FilterPattern(includes=[], excludes=["^test.*", ".*temp$"]),
 | 
			
		||||
        "expected": "not(startswith(name, 'test')) and not(endswith(name, 'temp'))",
 | 
			
		||||
    },
 | 
			
		||||
    "includes_and_excludes": {
 | 
			
		||||
        "input": FilterPattern(includes=["^prod.*"], excludes=[".*temp$"]),
 | 
			
		||||
        "expected": "startswith(name, 'prod') and not(endswith(name, 'temp'))",
 | 
			
		||||
    },
 | 
			
		||||
    "includes_without_regex": {
 | 
			
		||||
        "input": FilterPattern(includes=["test"], excludes=[]),
 | 
			
		||||
        "expected": "contains(name, 'test')",
 | 
			
		||||
    },
 | 
			
		||||
    "excludes_withour_regex": {
 | 
			
		||||
        "input": FilterPattern(includes=[], excludes=["test"]),
 | 
			
		||||
        "expected": "not(contains(name, 'test'))",
 | 
			
		||||
    },
 | 
			
		||||
    "empty_patterns": {
 | 
			
		||||
        "input": FilterPattern(includes=[], excludes=[]),
 | 
			
		||||
        "expected": None,
 | 
			
		||||
    },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Mock class that inherits from PowerBiApiClient
 | 
			
		||||
class MockPowerBiApiClient(PowerBiApiClient):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("test_name,test_data", test_cases.items())
 | 
			
		||||
def test_filter_query(test_name, test_data):
 | 
			
		||||
    client = MockPowerBiApiClient()
 | 
			
		||||
    result = client.create_filter_query(test_data["input"])
 | 
			
		||||
    assert result == test_data["expected"], f"Failed test: {test_name}"
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user