diff --git a/ingestion/src/metadata/data_insight/processor/web_analytic_report_data_processor.py b/ingestion/src/metadata/data_insight/processor/web_analytic_report_data_processor.py index 4a35f277b07..af486ff9fb7 100644 --- a/ingestion/src/metadata/data_insight/processor/web_analytic_report_data_processor.py +++ b/ingestion/src/metadata/data_insight/processor/web_analytic_report_data_processor.py @@ -102,7 +102,7 @@ class WebAnalyticEntityViewReportDataProcessor(DataProcessor): while True: event = yield refined_data - split_url = [url for url in event.eventData.url.split("/") if url] # type: ignore + split_url = [url for url in event.eventData.url.__root__.split("/") if url] # type: ignore if not split_url or split_url[0] not in ENTITIES: continue @@ -118,9 +118,9 @@ class WebAnalyticEntityViewReportDataProcessor(DataProcessor): # if we've seen the entity previously but were not able to get # the URL we'll try again from the new event. try: - entity_href = re.search(re_pattern, event.eventData.fullUrl).group( - 1 - ) + entity_href = re.search( + re_pattern, event.eventData.fullUrl.__root__ + ).group(1) refined_data[entity_obj.fqn]["entityHref"] = entity_href except IndexError: logger.debug(f"Could not find entity Href for {entity_obj.fqn}") @@ -166,9 +166,9 @@ class WebAnalyticEntityViewReportDataProcessor(DataProcessor): ) try: - entity_href = re.search(re_pattern, event.eventData.fullUrl).group( - 1 - ) + entity_href = re.search( + re_pattern, event.eventData.fullUrl.__root__ + ).group(1) except IndexError: entity_href = None diff --git a/openmetadata-airflow-apis/openmetadata_managed_apis/workflows/ingestion/data_insight.py b/openmetadata-airflow-apis/openmetadata_managed_apis/workflows/ingestion/data_insight.py index 229740a4752..9323aa6564b 100644 --- a/openmetadata-airflow-apis/openmetadata_managed_apis/workflows/ingestion/data_insight.py +++ b/openmetadata-airflow-apis/openmetadata_managed_apis/workflows/ingestion/data_insight.py @@ -41,7 +41,6 @@ from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig from metadata.generated.schema.type.basic import ComponentConfig from metadata.ingestion.models.encoders import show_secrets_encoder from metadata.ingestion.ometa.ometa_api import OpenMetadata -from metadata.utils.constants import ES_SOURCE_TO_ES_OBJ_ARGS def data_insight_workflow(workflow_config: OpenMetadataWorkflowConfig): diff --git a/openmetadata-service/src/main/resources/json/data/analytics/webAnalyticEvents/customEvent.json b/openmetadata-service/src/main/resources/json/data/analytics/webAnalyticEvents/customEvent.json new file mode 100644 index 00000000000..fe1e1f216a4 --- /dev/null +++ b/openmetadata-service/src/main/resources/json/data/analytics/webAnalyticEvents/customEvent.json @@ -0,0 +1,6 @@ +{ + "name": "CustomEvent", + "fullyQualifiedName": "CustomEvent", + "description": "Captures web analytic custom events", + "eventType": "CustomEvent" +} diff --git a/openmetadata-spec/src/main/resources/json/schema/analytics/basic.json b/openmetadata-spec/src/main/resources/json/schema/analytics/basic.json index 7d971e45538..2bebd283ed2 100644 --- a/openmetadata-spec/src/main/resources/json/schema/analytics/basic.json +++ b/openmetadata-spec/src/main/resources/json/schema/analytics/basic.json @@ -9,8 +9,25 @@ "description": "event type", "type": "string", "enum": [ - "PageView" + "PageView", + "CustomEvent" ] + }, + "fullUrl": { + "description": "complete URL of the page", + "type": "string" + }, + "url": { + "description": "url part after the domain specification", + "type": "string" + }, + "hostname": { + "description": "domain name", + "type": "string" + }, + "sessionId": { + "description": "Unique ID identifying a session", + "$ref": "../type/basic.json#/definitions/uuid" } } } \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventData.json b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventData.json index 17dde93360f..b4ec7148035 100644 --- a/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventData.json +++ b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventData.json @@ -21,7 +21,8 @@ "eventData": { "description": "Web analytic data captured", "oneOf": [ - {"$ref": "webAnalyticEventType/pageViewEvent.json"} + {"$ref": "webAnalyticEventType/pageViewEvent.json"}, + {"$ref": "webAnalyticEventType/customEvent.json"} ] } }, diff --git a/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/customEvent.json b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/customEvent.json new file mode 100644 index 00000000000..47e5c892315 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/customEvent.json @@ -0,0 +1,43 @@ +{ + "$id": "https://open-metadata.org/schema/analytics/customEvent.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "customData", + "type": "object", + "javaType": "org.openmetadata.schema.analytics.CustomEvent", + "description": "Event tracker (e.g. clicks, etc.)", + "definitions": { + "customEventTypes": { + "description": "Type of events that can be performed", + "type": "string", + "enum": [ + "CLICK" + ] + } + }, + "properties": { + "fullUrl": { + "description": "complete URL of the page", + "$ref": "../basic.json#/definitions/fullUrl" + }, + "url": { + "description": "url part after the domain specification", + "$ref": "../basic.json#/definitions/url" + }, + "hostname": { + "description": "domain name", + "$ref": "../basic.json#/definitions/hostname" + }, + "sessionId": { + "description": "Unique ID identifying a session", + "$ref": "../basic.json#/definitions/sessionId" + }, + "eventType": { + "description": "Type of event that was performed", + "$ref": "#/definitions/customEventTypes" + }, + "eventValue": { + "description": "Value of the event", + "type": "string" + } + } +} \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/pageViewEvent.json b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/pageViewEvent.json index 6d662e5cfde..809f889fe10 100644 --- a/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/pageViewEvent.json +++ b/openmetadata-spec/src/main/resources/json/schema/analytics/webAnalyticEventType/pageViewEvent.json @@ -8,15 +8,15 @@ "properties": { "fullUrl": { "description": "complete URL of the page", - "type": "string" + "$ref": "../basic.json#/definitions/fullUrl" }, "url": { "description": "url part after the domain specification", - "type": "string" + "$ref": "../basic.json#/definitions/url" }, "hostname": { "description": "domain name", - "type": "string" + "$ref": "../basic.json#/definitions/hostname" }, "language": { "description": "language set on the page", @@ -32,7 +32,7 @@ }, "sessionId": { "description": "Unique ID identifying a session", - "$ref": "../../type/basic.json#/definitions/uuid" + "$ref": "../basic.json#/definitions/sessionId" }, "pageLoadTime": { "description": "time for the page to load in seconds",