datahub/metadata-ingestion/tests/integration/mode/test_mode.py

import json
import pathlib
from typing import Sequence
from unittest.mock import patch

import pytest
from freezegun import freeze_time
from requests.models import HTTPError

from datahub.configuration.common import PipelineExecutionError
from datahub.ingestion.api.source import StructuredLogEntry
from datahub.ingestion.run.pipeline import Pipeline
from tests.test_helpers import mce_helpers

FROZEN_TIME = "2021-12-07 07:00:00"

JSON_RESPONSE_MAP = {
    "https://app.mode.com/api/verify": "verify.json",
    "https://app.mode.com/api/account": "user.json",
    "https://app.mode.com/api/acryl/spaces?filter=all&per_page=30&page=1": "spaces.json",
    "https://app.mode.com/api/acryl/spaces?filter=all&per_page=30&page=2": "spaces_empty.json",
    "https://app.mode.com/api/acryl/spaces/157933cc1168/reports": "reports_157933cc1168.json",
    "https://app.mode.com/api/acryl/spaces/75737b70402e/reports": "reports_75737b70402e.json",
    "https://app.mode.com/api/modeuser": "user.json",
    "https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries": "queries.json",
    "https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries/6e26a9f3d4e2/charts": "charts.json",
    "https://app.mode.com/api/acryl/data_sources": "data_sources.json",
    "https://app.mode.com/api/acryl/definitions": "definitions.json",
    "https://app.mode.com/api/acryl/spaces/157933cc1168/datasets": "datasets_157933cc1168.json",
    "https://app.mode.com/api/acryl/spaces/75737b70402e/datasets": "datasets_75737b70402e.json",
    "https://app.mode.com/api/acryl/reports/24f66e1701b6": "dataset_24f66e1701b6.json",
    "https://app.mode.com/api/acryl/reports/24f66e1701b6/queries": "dataset_queries_24f66e1701b6.json",
}

ERROR_URL = "https://app.mode.com/api/acryl/spaces/75737b70402e/reports"

test_resources_dir = pathlib.Path(__file__).parent


class MockResponse:
    def __init__(self, error_list, status_code):
        self.json_data = None
        self.error_list = error_list
        self.status_code = status_code
        self.auth = None
        self.headers = {}
        self.url = None

    def json(self):
        return self.json_data

    def mount(self, prefix, adaptor):
        return self

    def get(self, url, timeout=40):
        if self.error_list is not None and self.url in self.error_list:
            http_error_msg = "{} Client Error: {} for url: {}".format(
                400,
                "Simulate error",
                self.url,
            )
            raise HTTPError(http_error_msg, response=self)

        self.url = url
        self.timeout = timeout
        response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}"
        with open(response_json_path) as file:
            data = json.loads(file.read())
            self.json_data = data
        return self

    @property
    def text(self) -> str:
        return json.dumps(self.json_data)

    def raise_for_status(self) -> None:
        if self.status_code >= 400:
            raise HTTPError(
                f"MockResponse for {self.url} has status code {self.status_code}",
                response=self,
            )


class MockResponseJson(MockResponse):
    def __init__(
        self,
        status_code: int = 200,
        *,
        json_empty_list: Sequence[str] = (),
        json_error_list: Sequence[str] = (),
    ):
        super().__init__(None, status_code)
        self.json_empty_list = json_empty_list
        self.json_error_list = json_error_list

    def json(self):
        if self.url in self.json_empty_list:
            return json.loads("")  # Shouldn't be called
        if self.url in self.json_error_list:
            return json.loads("{")
        return super().json()

    def get(self, url, timeout=40):
        response = super().get(url, timeout)
        if self.url in self.json_empty_list:
            response.status_code = 204
        return response


def mocked_requests_success(*args, **kwargs):
    return MockResponse(None, 200)


def mocked_requests_failure(*args, **kwargs):
    return MockResponse([ERROR_URL], 200)


@freeze_time(FROZEN_TIME)
def test_mode_ingest_success(pytestconfig, tmp_path):
    with patch(
        "datahub.ingestion.source.mode.requests.Session",
        side_effect=mocked_requests_success,
    ):
        pipeline = Pipeline.create(
            {
                "run_id": "mode-test",
                "source": {
                    "type": "mode",
                    "config": {
                        "token": "xxxx",
                        "password": "xxxx",
                        "connect_uri": "https://app.mode.com/",
                        "workspace": "acryl",
                    },
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/mode_mces.json",
                    },
                },
            }
        )
        pipeline.run()
        pipeline.raise_from_status()

        mce_helpers.check_golden_file(
            pytestconfig,
            output_path=f"{tmp_path}/mode_mces.json",
            golden_path=test_resources_dir / "mode_mces_golden.json",
            ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
        )


@freeze_time(FROZEN_TIME)
def test_mode_ingest_failure(pytestconfig, tmp_path):
    with patch(
        "datahub.ingestion.source.mode.requests.Session",
        side_effect=mocked_requests_failure,
    ):
        global test_resources_dir
        test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"

        pipeline = Pipeline.create(
            {
                "run_id": "mode-test",
                "source": {
                    "type": "mode",
                    "config": {
                        "token": "xxxx",
                        "password": "xxxx",
                        "connect_uri": "https://app.mode.com/",
                        "workspace": "acryl",
                    },
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/mode_mces.json",
                    },
                },
            }
        )
        pipeline.run()
        with pytest.raises(PipelineExecutionError) as exec_error:
            pipeline.raise_from_status()
        assert exec_error.value.args[0] == "Source reported errors"
        assert len(exec_error.value.args[1]) == 1
        error_dict: StructuredLogEntry
        _level, error_dict = exec_error.value.args[1][0]
        error = next(iter(error_dict.context))
        assert "Simulate error" in error
        assert ERROR_URL in error


@freeze_time(FROZEN_TIME)
def test_mode_ingest_json_empty(pytestconfig, tmp_path):
    with patch(
        "datahub.ingestion.source.mode.requests.Session",
        side_effect=lambda *args, **kwargs: MockResponseJson(
            json_empty_list=["https://app.mode.com/api/modeuser"]
        ),
    ):
        global test_resources_dir
        test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"

        pipeline = Pipeline.create(
            {
                "run_id": "mode-test",
                "source": {
                    "type": "mode",
                    "config": {
                        "token": "xxxx",
                        "password": "xxxx",
                        "connect_uri": "https://app.mode.com/",
                        "workspace": "acryl",
                    },
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/mode_mces.json",
                    },
                },
            }
        )
        pipeline.run()
        pipeline.raise_from_status(raise_warnings=True)


@freeze_time(FROZEN_TIME)
def test_mode_ingest_json_failure(pytestconfig, tmp_path):
    with patch(
        "datahub.ingestion.source.mode.requests.Session",
        side_effect=lambda *args, **kwargs: MockResponseJson(
            json_error_list=["https://app.mode.com/api/modeuser"]
        ),
    ):
        global test_resources_dir
        test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"

        pipeline = Pipeline.create(
            {
                "run_id": "mode-test",
                "source": {
                    "type": "mode",
                    "config": {
                        "token": "xxxx",
                        "password": "xxxx",
                        "connect_uri": "https://app.mode.com/",
                        "workspace": "acryl",
                    },
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/mode_mces.json",
                    },
                },
            }
        )
        pipeline.run()
        pipeline.raise_from_status(raise_warnings=False)
        with pytest.raises(PipelineExecutionError) as exec_error:
            pipeline.raise_from_status(raise_warnings=True)
        assert len(exec_error.value.args[1]) > 0
        error_dict: StructuredLogEntry
        _level, error_dict = exec_error.value.args[1][0]
        error = next(iter(error_dict.context))
        assert "Expecting property name enclosed in double quotes" in error
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`import json`
chore(ingest): enable flake8 bugbear linting (#7763) 2023-04-11 02:44:42 +05:30			`import pathlib`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`from typing import Sequence`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`from unittest.mock import patch`

fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`import pytest`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`from freezegun import freeze_time`
			`from requests.models import HTTPError`

			`from datahub.configuration.common import PipelineExecutionError`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`from datahub.ingestion.api.source import StructuredLogEntry`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`from datahub.ingestion.run.pipeline import Pipeline`
			`from tests.test_helpers import mce_helpers`

			`FROZEN_TIME = "2021-12-07 07:00:00"`

			`JSON_RESPONSE_MAP = {`
feat(ingest/mode): Mode improvements (#10273) 2024-04-12 09:01:16 +02:00			`"https://app.mode.com/api/verify": "verify.json",`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`"https://app.mode.com/api/account": "user.json",`
fix(ingest/mode): Add pagination and warn on missing reports (#13322) 2025-04-25 23:21:27 +00:00			`"https://app.mode.com/api/acryl/spaces?filter=all&per_page=30&page=1": "spaces.json",`
			`"https://app.mode.com/api/acryl/spaces?filter=all&per_page=30&page=2": "spaces_empty.json",`
fix(mode): support definitions in mode query (#3721) Co-authored-by: Jawad Qureshi <jqureshi@petabloc.com> 2021-12-10 17:56:39 -08:00			`"https://app.mode.com/api/acryl/spaces/157933cc1168/reports": "reports_157933cc1168.json",`
			`"https://app.mode.com/api/acryl/spaces/75737b70402e/reports": "reports_75737b70402e.json",`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`"https://app.mode.com/api/modeuser": "user.json",`
fix(mode): support definitions in mode query (#3721) Co-authored-by: Jawad Qureshi <jqureshi@petabloc.com> 2021-12-10 17:56:39 -08:00			`"https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries": "queries.json",`
			`"https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries/6e26a9f3d4e2/charts": "charts.json",`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`"https://app.mode.com/api/acryl/data_sources": "data_sources.json",`
fix(mode): support definitions in mode query (#3721) Co-authored-by: Jawad Qureshi <jqureshi@petabloc.com> 2021-12-10 17:56:39 -08:00			`"https://app.mode.com/api/acryl/definitions": "definitions.json",`
feat(mode/ingest): Add support for missing Mode datasets in lineage (#11290) 2024-09-10 22:54:55 +05:30			`"https://app.mode.com/api/acryl/spaces/157933cc1168/datasets": "datasets_157933cc1168.json",`
			`"https://app.mode.com/api/acryl/spaces/75737b70402e/datasets": "datasets_75737b70402e.json",`
			`"https://app.mode.com/api/acryl/reports/24f66e1701b6": "dataset_24f66e1701b6.json",`
			`"https://app.mode.com/api/acryl/reports/24f66e1701b6/queries": "dataset_queries_24f66e1701b6.json",`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`}`

fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`ERROR_URL = "https://app.mode.com/api/acryl/spaces/75737b70402e/reports"`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00
chore(ingest): enable flake8 bugbear linting (#7763) 2023-04-11 02:44:42 +05:30			`test_resources_dir = pathlib.Path(__file__).parent`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00

			`class MockResponse:`
			`def __init__(self, error_list, status_code):`
			`self.json_data = None`
			`self.error_list = error_list`
			`self.status_code = status_code`
			`self.auth = None`
			`self.headers = {}`
			`self.url = None`

			`def json(self):`
			`return self.json_data`

fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245) 2024-09-24 00:58:30 +05:30			`def mount(self, prefix, adaptor):`
			`return self`

			`def get(self, url, timeout=40):`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`if self.error_list is not None and self.url in self.error_list:`
			`http_error_msg = "{} Client Error: {} for url: {}".format(`
			`400,`
			`"Simulate error",`
			`self.url,`
			`)`
			`raise HTTPError(http_error_msg, response=self)`

feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`self.url = url`
fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245) 2024-09-24 00:58:30 +05:30			`self.timeout = timeout`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}"`
			`with open(response_json_path) as file:`
			`data = json.loads(file.read())`
			`self.json_data = data`
			`return self`

feat(ingest/mode): fix issue in mode request validation (#12948) 2025-03-25 08:03:40 -07:00			`@property`
			`def text(self) -> str:`
			`return json.dumps(self.json_data)`

			`def raise_for_status(self) -> None:`
			`if self.status_code >= 400:`
			`raise HTTPError(`
			`f"MockResponse for {self.url} has status code {self.status_code}",`
			`response=self,`
			`)`

fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00
			`class MockResponseJson(MockResponse):`
			`def __init__(`
			`self,`
			`status_code: int = 200,`
			`*,`
			`json_empty_list: Sequence[str] = (),`
			`json_error_list: Sequence[str] = (),`
			`):`
			`super().__init__(None, status_code)`
			`self.json_empty_list = json_empty_list`
			`self.json_error_list = json_error_list`

			`def json(self):`
			`if self.url in self.json_empty_list:`
			`return json.loads("") # Shouldn't be called`
			`if self.url in self.json_error_list:`
			`return json.loads("{")`
			`return super().json()`

			`def get(self, url, timeout=40):`
			`response = super().get(url, timeout)`
			`if self.url in self.json_empty_list:`
			`response.status_code = 204`
			`return response`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00

fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`def mocked_requests_success(args, *kwargs):`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`return MockResponse(None, 200)`


			`def mocked_requests_failure(args, *kwargs):`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`return MockResponse([ERROR_URL], 200)`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00

			`@freeze_time(FROZEN_TIME)`
			`def test_mode_ingest_success(pytestconfig, tmp_path):`
			`with patch(`
fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245) 2024-09-24 00:58:30 +05:30			`"datahub.ingestion.source.mode.requests.Session",`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`side_effect=mocked_requests_success,`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`):`
			`pipeline = Pipeline.create(`
			`{`
			`"run_id": "mode-test",`
			`"source": {`
			`"type": "mode",`
			`"config": {`
			`"token": "xxxx",`
			`"password": "xxxx",`
			`"connect_uri": "https://app.mode.com/",`
			`"workspace": "acryl",`
			`},`
			`},`
			`"sink": {`
			`"type": "file",`
			`"config": {`
			`"filename": f"{tmp_path}/mode_mces.json",`
			`},`
			`},`
			`}`
			`)`
			`pipeline.run()`
			`pipeline.raise_from_status()`

			`mce_helpers.check_golden_file(`
			`pytestconfig,`
			`output_path=f"{tmp_path}/mode_mces.json",`
			`golden_path=test_resources_dir / "mode_mces_golden.json",`
			`ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,`
			`)`


			`@freeze_time(FROZEN_TIME)`
			`def test_mode_ingest_failure(pytestconfig, tmp_path):`
			`with patch(`
fix(ingest/mode): add connection timeouts to avoid RemoteDisconnected errors (#11245) 2024-09-24 00:58:30 +05:30			`"datahub.ingestion.source.mode.requests.Session",`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`side_effect=mocked_requests_failure,`
			`):`
			`global test_resources_dir`
			`test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"`

			`pipeline = Pipeline.create(`
			`{`
			`"run_id": "mode-test",`
			`"source": {`
			`"type": "mode",`
			`"config": {`
			`"token": "xxxx",`
			`"password": "xxxx",`
			`"connect_uri": "https://app.mode.com/",`
			`"workspace": "acryl",`
			`},`
			`},`
			`"sink": {`
			`"type": "file",`
			`"config": {`
			`"filename": f"{tmp_path}/mode_mces.json",`
			`},`
			`},`
			`}`
			`)`
			`pipeline.run()`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`with pytest.raises(PipelineExecutionError) as exec_error:`
feat(mode): add mode analytics ingestion source (#3710) 2021-12-09 16:10:08 -08:00			`pipeline.raise_from_status()`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`assert exec_error.value.args[0] == "Source reported errors"`
feat(ingest/tableau): Allow specifying asset types for ingest_hidden_assets (#13190) 2025-04-11 20:07:37 -07:00			`assert len(exec_error.value.args[1]) == 1`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`error_dict: StructuredLogEntry`
feat(ingest/tableau): Allow specifying asset types for ingest_hidden_assets (#13190) 2025-04-11 20:07:37 -07:00			`_level, error_dict = exec_error.value.args[1][0]`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`error = next(iter(error_dict.context))`
			`assert "Simulate error" in error`
			`assert ERROR_URL in error`


			`@freeze_time(FROZEN_TIME)`
			`def test_mode_ingest_json_empty(pytestconfig, tmp_path):`
			`with patch(`
			`"datahub.ingestion.source.mode.requests.Session",`
			`side_effect=lambda args, *kwargs: MockResponseJson(`
			`json_empty_list=["https://app.mode.com/api/modeuser"]`
			`),`
			`):`
			`global test_resources_dir`
			`test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"`

			`pipeline = Pipeline.create(`
			`{`
			`"run_id": "mode-test",`
			`"source": {`
			`"type": "mode",`
			`"config": {`
			`"token": "xxxx",`
			`"password": "xxxx",`
			`"connect_uri": "https://app.mode.com/",`
			`"workspace": "acryl",`
			`},`
			`},`
			`"sink": {`
			`"type": "file",`
			`"config": {`
			`"filename": f"{tmp_path}/mode_mces.json",`
			`},`
			`},`
			`}`
			`)`
			`pipeline.run()`
			`pipeline.raise_from_status(raise_warnings=True)`


			`@freeze_time(FROZEN_TIME)`
			`def test_mode_ingest_json_failure(pytestconfig, tmp_path):`
			`with patch(`
			`"datahub.ingestion.source.mode.requests.Session",`
			`side_effect=lambda args, *kwargs: MockResponseJson(`
			`json_error_list=["https://app.mode.com/api/modeuser"]`
			`),`
			`):`
			`global test_resources_dir`
			`test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"`

			`pipeline = Pipeline.create(`
			`{`
			`"run_id": "mode-test",`
			`"source": {`
			`"type": "mode",`
			`"config": {`
			`"token": "xxxx",`
			`"password": "xxxx",`
			`"connect_uri": "https://app.mode.com/",`
			`"workspace": "acryl",`
			`},`
			`},`
			`"sink": {`
			`"type": "file",`
			`"config": {`
			`"filename": f"{tmp_path}/mode_mces.json",`
			`},`
			`},`
			`}`
			`)`
			`pipeline.run()`
			`pipeline.raise_from_status(raise_warnings=False)`
			`with pytest.raises(PipelineExecutionError) as exec_error:`
			`pipeline.raise_from_status(raise_warnings=True)`
feat(ingest/tableau): Allow specifying asset types for ingest_hidden_assets (#13190) 2025-04-11 20:07:37 -07:00			`assert len(exec_error.value.args[1]) > 0`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`error_dict: StructuredLogEntry`
feat(ingest/tableau): Allow specifying asset types for ingest_hidden_assets (#13190) 2025-04-11 20:07:37 -07:00			`_level, error_dict = exec_error.value.args[1][0]`
fix(ingest/mode): Handle 204 response and invalid json (#12156) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2024-12-24 11:33:06 -08:00			`error = next(iter(error_dict.context))`
			`assert "Expecting property name enclosed in double quotes" in error`