mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
fix(ingest): warn when API tracing is unexpectedly inactive (#13007)
This commit is contained in:
parent
c9999d4cf0
commit
29e09185eb
@ -74,6 +74,8 @@ filterwarnings =
|
|||||||
ignore::datahub.configuration.pydantic_migration_helpers.PydanticDeprecatedSince20
|
ignore::datahub.configuration.pydantic_migration_helpers.PydanticDeprecatedSince20
|
||||||
ignore::datahub.configuration.common.ConfigurationWarning
|
ignore::datahub.configuration.common.ConfigurationWarning
|
||||||
ignore:The new datahub SDK:datahub.errors.ExperimentalWarning
|
ignore:The new datahub SDK:datahub.errors.ExperimentalWarning
|
||||||
|
# We should not be unexpectedly seeing API tracing warnings.
|
||||||
|
error::datahub.errors.APITracingWarning
|
||||||
|
|
||||||
[coverage:run]
|
[coverage:run]
|
||||||
# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
|
# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
|
||||||
|
@ -125,9 +125,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
|
|||||||
|
|
||||||
|
|
||||||
def make_data_platform_urn(platform: str) -> str:
|
def make_data_platform_urn(platform: str) -> str:
|
||||||
if platform.startswith("urn:li:dataPlatform:"):
|
return DataPlatformUrn(platform).urn()
|
||||||
return platform
|
|
||||||
return DataPlatformUrn.create_from_id(platform).urn()
|
|
||||||
|
|
||||||
|
|
||||||
def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
|
def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
|
||||||
|
@ -1,17 +1,21 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import warnings
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, List, Optional, Sequence, Union
|
from typing import Dict, List, Optional, Sequence, Union
|
||||||
|
|
||||||
from requests import Response
|
from requests import Response
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
|
from datahub.errors import APITracingWarning
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
||||||
MetadataChangeProposal,
|
MetadataChangeProposal,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_TRACE_HEADER_NAME = "traceparent"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TraceData:
|
class TraceData:
|
||||||
@ -25,14 +29,11 @@ class TraceData:
|
|||||||
raise TypeError("data must be a dictionary")
|
raise TypeError("data must be a dictionary")
|
||||||
|
|
||||||
|
|
||||||
def _extract_trace_id(
|
def _extract_trace_id(response: Response) -> Optional[str]:
|
||||||
response: Response, trace_header: str = "traceparent"
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""
|
"""
|
||||||
Extract trace ID from response headers.
|
Extract trace ID from response headers.
|
||||||
Args:
|
Args:
|
||||||
response: HTTP response object
|
response: HTTP response object
|
||||||
trace_header: Name of the trace header to use
|
|
||||||
Returns:
|
Returns:
|
||||||
Trace ID if found and response is valid, None otherwise
|
Trace ID if found and response is valid, None otherwise
|
||||||
"""
|
"""
|
||||||
@ -40,9 +41,17 @@ def _extract_trace_id(
|
|||||||
logger.debug(f"Invalid status code: {response.status_code}")
|
logger.debug(f"Invalid status code: {response.status_code}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
trace_id = response.headers.get(trace_header)
|
trace_id = response.headers.get(_TRACE_HEADER_NAME)
|
||||||
if not trace_id:
|
if not trace_id:
|
||||||
logger.debug(f"Missing trace header: {trace_header}")
|
# This will only be printed if
|
||||||
|
# 1. we're in async mode (checked by the caller)
|
||||||
|
# 2. the server did not return a trace ID
|
||||||
|
logger.debug(f"Missing trace header: {_TRACE_HEADER_NAME}")
|
||||||
|
warnings.warn(
|
||||||
|
"No trace ID found in response headers. API tracing is not active - likely due to an outdated server version.",
|
||||||
|
APITracingWarning,
|
||||||
|
stacklevel=3,
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return trace_id
|
return trace_id
|
||||||
@ -51,20 +60,19 @@ def _extract_trace_id(
|
|||||||
def extract_trace_data(
|
def extract_trace_data(
|
||||||
response: Response,
|
response: Response,
|
||||||
aspects_to_trace: Optional[List[str]] = None,
|
aspects_to_trace: Optional[List[str]] = None,
|
||||||
trace_header: str = "traceparent",
|
|
||||||
) -> Optional[TraceData]:
|
) -> Optional[TraceData]:
|
||||||
"""
|
"""Extract trace data from a response object.
|
||||||
Extract trace data from a response object.
|
|
||||||
|
If we run into a JSONDecodeError, we'll log an error and return None.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
response: HTTP response object
|
response: HTTP response object
|
||||||
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
||||||
trace_header: Name of the trace header to use (default: "traceparent")
|
|
||||||
Returns:
|
Returns:
|
||||||
TraceData object if successful, None otherwise
|
TraceData object if successful, None otherwise
|
||||||
Raises:
|
|
||||||
JSONDecodeError: If response body cannot be decoded as JSON
|
|
||||||
"""
|
"""
|
||||||
trace_id = _extract_trace_id(response, trace_header)
|
trace_id = _extract_trace_id(response)
|
||||||
if not trace_id:
|
if not trace_id:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -104,19 +112,18 @@ def extract_trace_data_from_mcps(
|
|||||||
response: Response,
|
response: Response,
|
||||||
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
||||||
aspects_to_trace: Optional[List[str]] = None,
|
aspects_to_trace: Optional[List[str]] = None,
|
||||||
trace_header: str = "traceparent",
|
|
||||||
) -> Optional[TraceData]:
|
) -> Optional[TraceData]:
|
||||||
"""
|
"""Extract trace data from a response object and populate data from provided MCPs.
|
||||||
Extract trace data from a response object and populate data from provided MCPs.
|
|
||||||
Args:
|
Args:
|
||||||
response: HTTP response object used only for trace_id extraction
|
response: HTTP response object used only for trace_id extraction
|
||||||
mcps: List of MCP URN and aspect data
|
mcps: List of MCP URN and aspect data
|
||||||
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
||||||
trace_header: Name of the trace header to use (default: "traceparent")
|
|
||||||
Returns:
|
Returns:
|
||||||
TraceData object if successful, None otherwise
|
TraceData object if successful, None otherwise
|
||||||
"""
|
"""
|
||||||
trace_id = _extract_trace_id(response, trace_header)
|
trace_id = _extract_trace_id(response)
|
||||||
if not trace_id:
|
if not trace_id:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@ -50,6 +51,7 @@ from datahub.emitter.response_helper import (
|
|||||||
extract_trace_data_from_mcps,
|
extract_trace_data_from_mcps,
|
||||||
)
|
)
|
||||||
from datahub.emitter.serialization_helper import pre_json_transform
|
from datahub.emitter.serialization_helper import pre_json_transform
|
||||||
|
from datahub.errors import APITracingWarning
|
||||||
from datahub.ingestion.api.closeable import Closeable
|
from datahub.ingestion.api.closeable import Closeable
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
||||||
MetadataChangeEvent,
|
MetadataChangeEvent,
|
||||||
@ -749,6 +751,12 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|||||||
trace_flag if trace_flag is not None else self._default_trace_mode
|
trace_flag if trace_flag is not None else self._default_trace_mode
|
||||||
)
|
)
|
||||||
resolved_async_flag = async_flag if async_flag is not None else async_default
|
resolved_async_flag = async_flag if async_flag is not None else async_default
|
||||||
|
if resolved_trace_flag and not resolved_async_flag:
|
||||||
|
warnings.warn(
|
||||||
|
"API tracing is only available with async ingestion. For sync mode, API errors will be surfaced as exceptions.",
|
||||||
|
APITracingWarning,
|
||||||
|
stacklevel=3,
|
||||||
|
)
|
||||||
return resolved_trace_flag and resolved_async_flag
|
return resolved_trace_flag and resolved_async_flag
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
|
@ -33,3 +33,7 @@ class MultipleSubtypesWarning(Warning):
|
|||||||
|
|
||||||
class ExperimentalWarning(Warning):
|
class ExperimentalWarning(Warning):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class APITracingWarning(Warning):
|
||||||
|
pass
|
||||||
|
@ -2,6 +2,7 @@ import json
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
from requests import Response
|
from requests import Response
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
@ -9,6 +10,7 @@ from datahub.emitter.response_helper import (
|
|||||||
extract_trace_data,
|
extract_trace_data,
|
||||||
extract_trace_data_from_mcps,
|
extract_trace_data_from_mcps,
|
||||||
)
|
)
|
||||||
|
from datahub.errors import APITracingWarning
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeProposal
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeProposal
|
||||||
|
|
||||||
|
|
||||||
@ -41,7 +43,8 @@ def test_invalid_status_code():
|
|||||||
def test_missing_trace_header():
|
def test_missing_trace_header():
|
||||||
"""Test that missing trace header returns None"""
|
"""Test that missing trace header returns None"""
|
||||||
response = create_response(status_code=200, headers={}, json_data=[])
|
response = create_response(status_code=200, headers={}, json_data=[])
|
||||||
result = extract_trace_data(response)
|
with pytest.warns(APITracingWarning):
|
||||||
|
result = extract_trace_data(response)
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
@ -138,22 +141,6 @@ def test_missing_urn():
|
|||||||
assert "test:2" in result.data
|
assert "test:2" in result.data
|
||||||
|
|
||||||
|
|
||||||
def test_custom_trace_header():
|
|
||||||
"""Test using a custom trace header"""
|
|
||||||
response = create_response(
|
|
||||||
status_code=200,
|
|
||||||
headers={"custom-trace": "test-trace-id"},
|
|
||||||
json_data=[{"urn": "test:1", "status": {"removed": False}}],
|
|
||||||
)
|
|
||||||
|
|
||||||
result = extract_trace_data(response, trace_header="custom-trace")
|
|
||||||
|
|
||||||
assert result is not None
|
|
||||||
assert result.trace_id == "test-trace-id"
|
|
||||||
assert "test:1" in result.data
|
|
||||||
assert len(result.data["test:1"]) == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_mcps_invalid_status_code():
|
def test_mcps_invalid_status_code():
|
||||||
"""Test that non-200 status codes return None for MCPs"""
|
"""Test that non-200 status codes return None for MCPs"""
|
||||||
response = create_response(
|
response = create_response(
|
||||||
@ -168,7 +155,8 @@ def test_mcps_missing_trace_header():
|
|||||||
"""Test that missing trace header returns None for MCPs"""
|
"""Test that missing trace header returns None for MCPs"""
|
||||||
response = create_response(status_code=200, headers={}, json_data=[])
|
response = create_response(status_code=200, headers={}, json_data=[])
|
||||||
mcps = [create_mcp("urn:test:1", "testAspect")]
|
mcps = [create_mcp("urn:test:1", "testAspect")]
|
||||||
result = extract_trace_data_from_mcps(response, mcps)
|
with pytest.warns(APITracingWarning):
|
||||||
|
result = extract_trace_data_from_mcps(response, mcps)
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from datahub.emitter.rest_emitter import (
|
|||||||
DatahubRestEmitter,
|
DatahubRestEmitter,
|
||||||
logger,
|
logger,
|
||||||
)
|
)
|
||||||
|
from datahub.errors import APITracingWarning
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
||||||
Status,
|
Status,
|
||||||
)
|
)
|
||||||
@ -514,10 +515,14 @@ def test_openapi_emitter_missing_trace_header(openapi_emitter):
|
|||||||
aspect=Status(removed=False),
|
aspect=Status(removed=False),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should not raise exception but log warning
|
# Should not raise exception but log a warning.
|
||||||
openapi_emitter.emit_mcp(
|
with pytest.warns(APITracingWarning):
|
||||||
item, async_flag=True, trace_flag=True, trace_timeout=timedelta(seconds=10)
|
openapi_emitter.emit_mcp(
|
||||||
)
|
item,
|
||||||
|
async_flag=True,
|
||||||
|
trace_flag=True,
|
||||||
|
trace_timeout=timedelta(seconds=10),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_emitter_invalid_status_code(openapi_emitter):
|
def test_openapi_emitter_invalid_status_code(openapi_emitter):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user