mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-14 04:31:02 +00:00
357 lines
13 KiB
Python
357 lines
13 KiB
Python
![]() |
import json
|
||
|
import os
|
||
|
import unittest
|
||
|
from datetime import datetime, timezone
|
||
|
from pathlib import Path
|
||
|
from unittest.mock import MagicMock, patch
|
||
|
|
||
|
import requests
|
||
|
from pydantic import ValidationError
|
||
|
|
||
|
from datahub.ingestion.source.hex.api import (
|
||
|
HexApi,
|
||
|
HexApiProjectApiResource,
|
||
|
HexApiProjectsListResponse,
|
||
|
HexApiReport,
|
||
|
)
|
||
|
from datahub.ingestion.source.hex.model import (
|
||
|
Component,
|
||
|
Project,
|
||
|
)
|
||
|
|
||
|
|
||
|
# Helper to load test data from JSON files
|
||
|
def load_json_data(filename):
|
||
|
test_dir = Path(os.path.dirname(os.path.abspath(__file__)))
|
||
|
file_path = test_dir / "test_data" / filename
|
||
|
with open(file_path, "r") as f:
|
||
|
return json.load(f)
|
||
|
|
||
|
|
||
|
class TestHexAPI(unittest.TestCase):
|
||
|
def setUp(self):
|
||
|
self.token = "test-token"
|
||
|
self.report = HexApiReport()
|
||
|
self.base_url = "https://test.hex.tech/api/v1"
|
||
|
self.page_size = 8 # Small page size to test pagination
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
def test_fetch_projects_pagination(self, mock_get):
|
||
|
page1_data = load_json_data("hex_projects_page1.json")
|
||
|
page2_data = load_json_data("hex_projects_page2.json")
|
||
|
|
||
|
mock_response1 = MagicMock()
|
||
|
mock_response1.json.return_value = page1_data
|
||
|
mock_response2 = MagicMock()
|
||
|
mock_response2.json.return_value = page2_data
|
||
|
|
||
|
mock_get.side_effect = [mock_response1, mock_response2]
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
page_size=self.page_size,
|
||
|
)
|
||
|
|
||
|
results = list(hex_api.fetch_projects())
|
||
|
|
||
|
# check pagination
|
||
|
|
||
|
assert mock_get.call_count == 2
|
||
|
assert self.report.fetch_projects_page_calls == 2
|
||
|
assert self.report.fetch_projects_page_items == len(
|
||
|
mock_response1.json()["values"]
|
||
|
) + len(mock_response2.json()["values"])
|
||
|
|
||
|
# some random validations on the results
|
||
|
|
||
|
assert len(results) == len(mock_response1.json()["values"]) + len(
|
||
|
mock_response2.json()["values"]
|
||
|
)
|
||
|
assert all(isinstance(item, (Project, Component)) for item in results)
|
||
|
assert {
|
||
|
(item.id, item.title) for item in results if isinstance(item, Project)
|
||
|
} == {
|
||
|
("827ea1f2-ed9a-425f-8d48-0ecc491c7c7c", "Welcome to Hex!-3"),
|
||
|
("e9d940fe-34ad-415b-ad12-cb4c201650dc", "Welcome to Hex!-4"),
|
||
|
("d73da67d-c87b-4dd8-9e7f-b79cb7f822cf", "PlayNotebook"),
|
||
|
("d05b0d81-6d00-4798-8967-6587b6731c0a", "Welcome to Hex!-6"),
|
||
|
("2ef730de-25ec-4131-94af-3517e743a738", "Welcome to Hex!"),
|
||
|
("c8f815c8-88c2-4dea-981f-69f544d6165d", "Welcome to Hex!-0"),
|
||
|
("89e64571-42d9-44ac-bf47-320a7440eb57", "Welcome to Hex!-5"),
|
||
|
("dd0f1e20-7586-4b8e-89ae-bfe3c924625b", "Welcome to Hex!-2"),
|
||
|
}
|
||
|
assert {
|
||
|
(item.id, item.title) for item in results if isinstance(item, Component)
|
||
|
} == {
|
||
|
("0496a2c2-8656-475d-9946-6402320779e2", "Pet Profiles"),
|
||
|
("4759f33c-1ab9-403d-92e8-9bef48de00c4", "Cancelled Orders"),
|
||
|
}
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
def test_map_data_project(self, mock_get):
|
||
|
# Test mapping of a project
|
||
|
project_data = {
|
||
|
"id": "project1",
|
||
|
"title": "Test Project",
|
||
|
"description": "A test project",
|
||
|
"type": "PROJECT",
|
||
|
"createdAt": "2022-01-01T12:00:00.000Z",
|
||
|
"lastEditedAt": "2022-01-02T12:00:00.000Z",
|
||
|
"status": {"name": "Published"},
|
||
|
"categories": [{"name": "Category1", "description": "A category"}],
|
||
|
"sharing": {"collections": [{"collection": {"name": "Collection1"}}]},
|
||
|
"creator": {"email": "creator@example.com"},
|
||
|
"owner": {"email": "owner@example.com"},
|
||
|
"analytics": {
|
||
|
"appViews": {
|
||
|
"allTime": 100,
|
||
|
"lastSevenDays": 10,
|
||
|
"lastFourteenDays": 20,
|
||
|
"lastThirtyDays": 30,
|
||
|
},
|
||
|
"lastViewedAt": "2022-01-03T12:00:00.000Z",
|
||
|
},
|
||
|
}
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
)
|
||
|
|
||
|
hex_api_project = HexApiProjectApiResource.parse_obj(project_data)
|
||
|
result = hex_api._map_data_from_model(hex_api_project)
|
||
|
|
||
|
# Verify the result
|
||
|
assert isinstance(result, Project)
|
||
|
assert result.id == "project1"
|
||
|
assert result.title == "Test Project"
|
||
|
assert result.description == "A test project"
|
||
|
assert result.created_at == datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
|
||
|
assert result.last_edited_at == datetime(
|
||
|
2022, 1, 2, 12, 0, 0, tzinfo=timezone.utc
|
||
|
)
|
||
|
assert result.status and result.status.name == "Published"
|
||
|
assert (
|
||
|
result.categories
|
||
|
and len(result.categories) == 1
|
||
|
and result.categories[0].name == "Category1"
|
||
|
)
|
||
|
assert (
|
||
|
result.collections
|
||
|
and len(result.collections) == 1
|
||
|
and result.collections[0].name == "Collection1"
|
||
|
)
|
||
|
assert result.creator and result.creator.email == "creator@example.com"
|
||
|
assert result.owner and result.owner.email == "owner@example.com"
|
||
|
assert (
|
||
|
result.analytics
|
||
|
and result.analytics.appviews_all_time == 100
|
||
|
and result.analytics.last_viewed_at
|
||
|
== datetime(2022, 1, 3, 12, 0, 0, tzinfo=timezone.utc)
|
||
|
)
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
def test_map_data_component(self, mock_get):
|
||
|
# Test mapping of a component
|
||
|
component_data = {
|
||
|
"id": "component1",
|
||
|
"title": "Test Component",
|
||
|
"description": "A test component",
|
||
|
"type": "COMPONENT",
|
||
|
"createdAt": "2022-02-01T12:00:00.000Z",
|
||
|
"lastEditedAt": "2022-02-02T12:00:00.000Z",
|
||
|
"status": {"name": "Draft"},
|
||
|
"categories": [{"name": "Category2"}],
|
||
|
"sharing": {"collections": [{"collection": {"name": "Collection2"}}]},
|
||
|
"creator": {"email": "creator@example.com"},
|
||
|
"owner": {"email": "owner@example.com"},
|
||
|
"analytics": {
|
||
|
"appViews": {
|
||
|
"allTime": 50,
|
||
|
"lastSevenDays": 5,
|
||
|
"lastFourteenDays": 10,
|
||
|
"lastThirtyDays": 15,
|
||
|
},
|
||
|
"lastViewedAt": "2022-02-03T12:00:00.000Z",
|
||
|
},
|
||
|
}
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
)
|
||
|
|
||
|
hex_api_component = HexApiProjectApiResource.parse_obj(component_data)
|
||
|
result = hex_api._map_data_from_model(hex_api_component)
|
||
|
|
||
|
# Verify the result
|
||
|
assert isinstance(result, Component)
|
||
|
assert result.id == "component1"
|
||
|
assert result.title == "Test Component"
|
||
|
assert result.description == "A test component"
|
||
|
assert result.created_at == datetime(2022, 2, 1, 12, 0, 0, tzinfo=timezone.utc)
|
||
|
assert result.last_edited_at == datetime(
|
||
|
2022, 2, 2, 12, 0, 0, tzinfo=timezone.utc
|
||
|
)
|
||
|
assert result.status and result.status.name == "Draft"
|
||
|
assert (
|
||
|
result.categories
|
||
|
and len(result.categories) == 1
|
||
|
and result.categories[0].name == "Category2"
|
||
|
)
|
||
|
assert (
|
||
|
result.collections
|
||
|
and len(result.collections) == 1
|
||
|
and result.collections[0].name == "Collection2"
|
||
|
)
|
||
|
assert result.creator and result.creator.email == "creator@example.com"
|
||
|
assert result.owner and result.owner.email == "owner@example.com"
|
||
|
assert (
|
||
|
result.analytics
|
||
|
and result.analytics.appviews_all_time == 50
|
||
|
and result.analytics.last_viewed_at
|
||
|
== datetime(2022, 2, 3, 12, 0, 0, tzinfo=timezone.utc)
|
||
|
)
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
def test_fetch_projects_failure_http_error(self, mock_get):
|
||
|
mock_response = MagicMock()
|
||
|
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
|
||
|
"500 Server Error: Internal Server Error"
|
||
|
)
|
||
|
mock_get.return_value = mock_response
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
)
|
||
|
|
||
|
# No exception should be raised; gracefully finish with no results and proper error reporting
|
||
|
results = list(hex_api.fetch_projects())
|
||
|
|
||
|
# Verify results are empty and error was reported
|
||
|
assert len(results) == 0
|
||
|
assert self.report.fetch_projects_page_calls == 1
|
||
|
failures = list(self.report.failures)
|
||
|
assert len(failures) == 1
|
||
|
assert (
|
||
|
failures[0].title
|
||
|
and failures[0].title == "Listing Projects and Components API request error"
|
||
|
)
|
||
|
assert (
|
||
|
failures[0].message
|
||
|
and failures[0].message
|
||
|
== "Error fetching Projects and Components and halting metadata ingestion"
|
||
|
)
|
||
|
assert failures[0].context
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
@patch("datahub.ingestion.source.hex.api.HexApiProjectsListResponse.parse_obj")
|
||
|
def test_fetch_projects_failure_response_validation(self, mock_parse_obj, mock_get):
|
||
|
# Create a dummy http response
|
||
|
mock_response = MagicMock()
|
||
|
mock_response.json.return_value = {"whatever": "json"}
|
||
|
mock_get.return_value = mock_response
|
||
|
# and simulate ValidationError when parsing the response
|
||
|
mock_parse_obj.side_effect = ValidationError([], model=HexApiProjectApiResource)
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
)
|
||
|
|
||
|
# No exception should be raised; gracefully finish with no results and proper error reporting
|
||
|
results = list(hex_api.fetch_projects())
|
||
|
|
||
|
# Verify results are empty and error was reported
|
||
|
assert len(results) == 0
|
||
|
assert self.report.fetch_projects_page_calls == 1
|
||
|
failures = list(self.report.failures)
|
||
|
assert len(failures) == 1
|
||
|
assert (
|
||
|
failures[0].title
|
||
|
and failures[0].title
|
||
|
== "Listing Projects and Components API response parsing error"
|
||
|
)
|
||
|
assert (
|
||
|
failures[0].message
|
||
|
and failures[0].message
|
||
|
== "Error parsing API response and halting metadata ingestion"
|
||
|
)
|
||
|
assert failures[0].context
|
||
|
|
||
|
@patch("datahub.ingestion.source.hex.api.requests.get")
|
||
|
@patch("datahub.ingestion.source.hex.api.HexApiProjectsListResponse.parse_obj")
|
||
|
@patch("datahub.ingestion.source.hex.api.HexApi._map_data_from_model")
|
||
|
def test_fetch_projects_warning_model_mapping(
|
||
|
self, mock_map_data_from_model, mock_parse_obj, mock_get
|
||
|
):
|
||
|
# Create a dummy http response
|
||
|
mock_get_response = MagicMock()
|
||
|
mock_get_response.json.return_value = {"values": [{"whatever": "json"}]}
|
||
|
mock_get.return_value = mock_get_response
|
||
|
# create a couple of dummy project items
|
||
|
mock_parse_obj.return_value = HexApiProjectsListResponse(
|
||
|
values=[
|
||
|
HexApiProjectApiResource(
|
||
|
id="problem_item", title="Problem Item", type="PROJECT"
|
||
|
),
|
||
|
HexApiProjectApiResource(
|
||
|
id="valid_item", title="Valid Item", type="PROJECT"
|
||
|
),
|
||
|
]
|
||
|
)
|
||
|
|
||
|
# and simulate an Error when mapping the response to a model
|
||
|
def parse_side_effect(item_data):
|
||
|
assert isinstance(item_data, HexApiProjectApiResource)
|
||
|
if item_data.id == "problem_item":
|
||
|
raise ValueError("Invalid data structure for problem_item")
|
||
|
else:
|
||
|
valid_item = MagicMock()
|
||
|
valid_item.id = "valid_item"
|
||
|
valid_item.title = "Valid Item"
|
||
|
valid_item.type = "PROJECT"
|
||
|
valid_item.description = "A valid project"
|
||
|
valid_item.created_at = None
|
||
|
valid_item.last_edited_at = None
|
||
|
valid_item.status = None
|
||
|
valid_item.categories = []
|
||
|
valid_item.sharing = MagicMock(collections=[])
|
||
|
valid_item.creator = None
|
||
|
valid_item.owner = None
|
||
|
valid_item.analytics = None
|
||
|
return valid_item
|
||
|
|
||
|
mock_map_data_from_model.side_effect = parse_side_effect
|
||
|
|
||
|
hex_api = HexApi(
|
||
|
token=self.token,
|
||
|
report=self.report,
|
||
|
base_url=self.base_url,
|
||
|
)
|
||
|
|
||
|
# Should not raise exception, but log warning
|
||
|
results = list(hex_api.fetch_projects())
|
||
|
|
||
|
# We should still get the valid item but skip the problematic one
|
||
|
assert len(results) == 1
|
||
|
assert results[0].id == "valid_item"
|
||
|
|
||
|
assert self.report.fetch_projects_page_calls == 1
|
||
|
warnings = list(self.report.warnings)
|
||
|
assert len(warnings) == 1
|
||
|
assert warnings[0].title and warnings[0].title == "Incomplete metadata"
|
||
|
assert (
|
||
|
warnings[0].message
|
||
|
and warnings[0].message
|
||
|
== "Incomplete metadata because of error mapping item"
|
||
|
)
|
||
|
assert warnings[0].context
|