Data insight tests addition (#8814)

* Added additional tests around web analytics and data insight

* Added playwright dependency installation

* removed -m no-ui option

* Removed playwright tests

* Added event ingestion in setUp class
This commit is contained in:
Teddy 2022-11-17 10:44:52 +01:00 committed by GitHub
parent bd9c098bc5
commit 4ab6dbc201
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 253 additions and 14 deletions

View File

@ -16,9 +16,13 @@ To be used by OpenMetadata class
from __future__ import annotations from __future__ import annotations
from typing import Optional from typing import List, Optional
from metadata.generated.schema.analytics.basic import WebAnalyticEventType
from metadata.generated.schema.analytics.reportData import ReportData from metadata.generated.schema.analytics.reportData import ReportData
from metadata.generated.schema.analytics.webAnalyticEventData import (
WebAnalyticEventData,
)
from metadata.generated.schema.api.dataInsight.kpi.createKpiRequest import ( from metadata.generated.schema.api.dataInsight.kpi.createKpiRequest import (
CreateKpiRequest, CreateKpiRequest,
) )
@ -56,6 +60,16 @@ class DataInisghtMixin:
return resp return resp
def add_web_analytic_events(
self,
event_data: WebAnalyticEventData,
) -> List[WebAnalyticEventData]:
"""Get web analytic event"""
resp = self.client.put("/analytics/webAnalyticEvent/collect", event_data.json())
return resp
def get_data_insight_report_data( def get_data_insight_report_data(
self, start_ts: int, end_ts: int, report_data_type: str self, start_ts: int, end_ts: int, report_data_type: str
) -> dict[str, list[ReportData]]: ) -> dict[str, list[ReportData]]:
@ -135,3 +149,16 @@ class DataInisghtMixin:
resp = self.client.post("/kpi", create.json()) resp = self.client.post("/kpi", create.json())
return Kpi.parse_obj(resp) return Kpi.parse_obj(resp)
def get_web_analytic_events(
self, event_type: WebAnalyticEventType, start_ts: int, end_ts: int
) -> List[WebAnalyticEventData]:
"""Get web analytic event"""
event_type_value = event_type.value
params = {"eventType": event_type_value, "startTs": start_ts, "endTs": end_ts}
resp = self.client.get("/analytics/webAnalyticEvent/collect", params)
return [WebAnalyticEventData(**data) for data in resp["data"]]

View File

@ -16,8 +16,9 @@ Validate workflow configs and filters
from __future__ import annotations from __future__ import annotations
import unittest import unittest
import uuid
from copy import deepcopy from copy import deepcopy
from datetime import datetime, time from datetime import datetime, time, timedelta
from time import sleep from time import sleep
import pytest import pytest
@ -25,7 +26,14 @@ import requests
from metadata.data_insight.api.workflow import DataInsightWorkflow from metadata.data_insight.api.workflow import DataInsightWorkflow
from metadata.data_insight.helper.data_insight_es_index import DataInsightEsIndex from metadata.data_insight.helper.data_insight_es_index import DataInsightEsIndex
from metadata.generated.schema.analytics.basic import WebAnalyticEventType
from metadata.generated.schema.analytics.reportData import ReportDataType from metadata.generated.schema.analytics.reportData import ReportDataType
from metadata.generated.schema.analytics.webAnalyticEventData import (
WebAnalyticEventData,
)
from metadata.generated.schema.analytics.webAnalyticEventType.pageViewEvent import (
PageViewData,
)
from metadata.generated.schema.api.dataInsight.kpi.createKpiRequest import ( from metadata.generated.schema.api.dataInsight.kpi.createKpiRequest import (
CreateKpiRequest, CreateKpiRequest,
) )
@ -36,6 +44,10 @@ from metadata.generated.schema.dataInsight.dataInsightChartResult import (
) )
from metadata.generated.schema.dataInsight.kpi.basic import KpiResult, KpiTarget from metadata.generated.schema.dataInsight.kpi.basic import KpiResult, KpiTarget
from metadata.generated.schema.dataInsight.kpi.kpi import Kpi from metadata.generated.schema.dataInsight.kpi.kpi import Kpi
from metadata.generated.schema.dataInsight.type.dailyActiveUsers import DailyActiveUsers
from metadata.generated.schema.dataInsight.type.pageViewsByEntities import (
PageViewsByEntities,
)
from metadata.generated.schema.dataInsight.type.percentageOfEntitiesWithDescriptionByType import ( from metadata.generated.schema.dataInsight.type.percentageOfEntitiesWithDescriptionByType import (
PercentageOfEntitiesWithDescriptionByType, PercentageOfEntitiesWithDescriptionByType,
) )
@ -58,7 +70,7 @@ data_insight_config = {
"processor": {"type": "data-insight-processor", "config": {}}, "processor": {"type": "data-insight-processor", "config": {}},
"sink": { "sink": {
"type": "elasticsearch", "type": "elasticsearch",
"config": {"es_host": "localhost", "es_port": 9200, "recreate_indexes": True}, "config": {"es_host": "localhost", "es_port": 9200, "recreate_indexes": False},
}, },
"workflowConfig": { "workflowConfig": {
"openMetadataServerConfig": { "openMetadataServerConfig": {
@ -71,6 +83,41 @@ data_insight_config = {
}, },
} }
WEB_EVENT_DATA = [
WebAnalyticEventData(
eventId=None,
timestamp=int((datetime.utcnow() - timedelta(days=1)).timestamp() * 1000),
eventType=WebAnalyticEventType.PageView,
eventData=PageViewData(
fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.%22dim.shop%22",
url="/table/sample_data.ecommerce_db.shopify.%22dim.shop%22",
hostname="localhost",
language="en-US",
screenSize="1280x720",
userId=uuid.uuid4(),
sessionId=uuid.uuid4(),
pageLoadTime=0.0,
referrer="",
),
),
WebAnalyticEventData(
eventId=None,
timestamp=int((datetime.utcnow() - timedelta(days=1)).timestamp() * 1000),
eventType=WebAnalyticEventType.PageView,
eventData=PageViewData(
fullUrl="http://localhost:8585/table/mysql.default.airflow_db.dag_run/profiler",
url="/table/mysql.default.airflow_db.dag_run/profiler",
hostname="localhost",
language="en-US",
screenSize="1280x720",
userId=uuid.uuid4(),
sessionId=uuid.uuid4(),
pageLoadTime=0.0,
referrer="",
),
),
]
class DataInsightWorkflowTests(unittest.TestCase): class DataInsightWorkflowTests(unittest.TestCase):
"""Test class for data insight workflow validation""" """Test class for data insight workflow validation"""
@ -111,6 +158,9 @@ class DataInsightWorkflowTests(unittest.TestCase):
cls.metadata.create_kpi(create) cls.metadata.create_kpi(create)
for event in WEB_EVENT_DATA:
cls.metadata.add_web_analytic_events(event)
def test_create_method(self): def test_create_method(self):
"""Test validation of the workflow config is properly happening""" """Test validation of the workflow config is properly happening"""
DataInsightWorkflow.create(data_insight_config) DataInsightWorkflow.create(data_insight_config)
@ -128,21 +178,32 @@ class DataInsightWorkflowTests(unittest.TestCase):
sleep(1) # wait for data to be available sleep(1) # wait for data to be available
# Test the indexes have been created as expected and the data have been loaded # Test the indexes have been created as expected and the data have been loaded
entity_report_indexes = requests.get( entity_report_docs = requests.get(
"http://localhost:9200/entity_report_data_index/_search", timeout=30 "http://localhost:9200/entity_report_data_index/_search", timeout=30
) )
requests.get( web_analytic_user_activity_report_data_docs = requests.get(
"http://localhost:9200/entity_report_data_index/_search", timeout=30 "http://localhost:9200/web_analytic_user_activity_report_data_index/_search",
) timeout=30,
requests.get( )
"http://localhost:9200/web_analytic_entity_view_report_data/_search", web_analytic_entity_view_report_data_docs = requests.get(
"http://localhost:9200/web_analytic_entity_view_report_data_index/_search",
timeout=30, timeout=30,
) )
assert (
entity_report_indexes.json()["hits"]["total"]["value"] > 0
) # check data have been correctly indexed in ES
# test report endpoint is returning data # check data have been correctly indexed in ES
# --------------------------------------------
assert entity_report_docs.json()["hits"]["total"]["value"] > 0
assert (
web_analytic_user_activity_report_data_docs.json()["hits"]["total"]["value"]
> 0
)
assert (
web_analytic_entity_view_report_data_docs.json()["hits"]["total"]["value"]
> 0
)
# test report endpoints are returning data
# --------------------------------------
report_data = self.metadata.get_data_insight_report_data( report_data = self.metadata.get_data_insight_report_data(
self.start_ts, self.start_ts,
self.end_ts, self.end_ts,
@ -150,7 +211,22 @@ class DataInsightWorkflowTests(unittest.TestCase):
) )
assert report_data.get("data") assert report_data.get("data")
# test data insight aggregation endpoint is returning data web_entity_analytics = self.metadata.get_data_insight_report_data(
self.start_ts,
self.end_ts,
ReportDataType.WebAnalyticEntityViewReportData.value,
)
assert web_entity_analytics.get("data")
web_user_analytics = self.metadata.get_data_insight_report_data(
self.start_ts,
self.end_ts,
ReportDataType.WebAnalyticUserActivityReportData.value,
)
assert web_user_analytics.get("data")
# test data insight aggregation endpoints are returning data
# ----------------------------------------------------------
resp = self.metadata.get_aggregated_data_insight_results( resp = self.metadata.get_aggregated_data_insight_results(
start_ts=self.start_ts, start_ts=self.start_ts,
end_ts=self.end_ts, end_ts=self.end_ts,
@ -172,6 +248,26 @@ class DataInsightWorkflowTests(unittest.TestCase):
assert resp.data assert resp.data
assert isinstance(resp.data[0], PercentageOfEntitiesWithOwnerByType) assert isinstance(resp.data[0], PercentageOfEntitiesWithOwnerByType)
resp = self.metadata.get_aggregated_data_insight_results(
start_ts=self.start_ts,
end_ts=self.end_ts,
data_insight_chart_nane=DataInsightChartType.DailyActiveUsers.value,
data_report_index=DataInsightEsIndex.WebAnalyticUserActivityReportData.value,
)
assert resp.data
assert isinstance(resp.data[0], DailyActiveUsers)
resp = self.metadata.get_aggregated_data_insight_results(
start_ts=self.start_ts,
end_ts=self.end_ts,
data_insight_chart_nane=DataInsightChartType.PageViewsByEntities.value,
data_report_index=DataInsightEsIndex.WebAnalyticEntityViewReportData.value,
)
assert resp.data
assert isinstance(resp.data[0], PageViewsByEntities)
def test_get_kpis(self): def test_get_kpis(self):
"""test Kpis are returned as expected""" """test Kpis are returned as expected"""
# TO DO: Add KPI creation step and deletion (setUp + tearDown) # TO DO: Add KPI creation step and deletion (setUp + tearDown)

View File

@ -0,0 +1,116 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test web analytics events
"""
from __future__ import annotations
import unittest
import uuid
from datetime import datetime, timedelta
from metadata.generated.schema.analytics.basic import WebAnalyticEventType
from metadata.generated.schema.analytics.webAnalyticEventData import (
WebAnalyticEventData,
)
from metadata.generated.schema.analytics.webAnalyticEventType.pageViewEvent import (
PageViewData,
)
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils.time_utils import (
get_beginning_of_day_timestamp_mill,
get_end_of_day_timestamp_mill,
)
data_insight_config = {
"source": {
"type": "dataInsight",
"serviceName": "dataInsightWorkflow",
"sourceConfig": {"config": {"type": "dataInsight"}},
},
"processor": {"type": "data-insight-processor", "config": {}},
"sink": {
"type": "elasticsearch",
"config": {},
},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" # pylint: disable=line-too-long
},
}
},
}
class WebAnalyticsEndpointsTests(unittest.TestCase):
"""Test class for data insight workflow validation"""
@classmethod
def setUpClass(cls) -> None:
"""Set up om client for the test class"""
cls.metadata = OpenMetadata(
OpenMetadataConnection.parse_obj(
data_insight_config["workflowConfig"]["openMetadataServerConfig"]
)
)
cls.start_ts = get_beginning_of_day_timestamp_mill(days=1)
cls.end_ts = get_end_of_day_timestamp_mill(days=1)
cls.yesterday = int((datetime.utcnow() - timedelta(days=1)).timestamp() * 1000)
def test_web_analytic_events(self):
"""Test web analytic get function"""
user_id = uuid.uuid4()
session_id = uuid.uuid4()
event_data = WebAnalyticEventData(
eventId=None,
timestamp=self.yesterday,
eventType=WebAnalyticEventType.PageView,
eventData=PageViewData(
fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.%22dim.shop%22",
url="/table/sample_data.ecommerce_db.shopify.dim.foo",
hostname="localhost",
language="en-US",
screenSize="1280x720",
userId=user_id,
sessionId=session_id,
pageLoadTime=0.0,
referrer="",
),
)
self.metadata.add_web_analytic_events(
event_data,
)
web_events = self.metadata.get_web_analytic_events(
WebAnalyticEventType.PageView, self.start_ts, self.end_ts
)
test_event = next(
(
web_event
for web_event in web_events
if web_event.eventData.userId.__root__ == user_id
),
None,
)
assert test_event