mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-27 17:02:03 +00:00
fix(ingest/dremio): Fix platform_instance URN generation (#15076)
This commit is contained in:
parent
5a1569743a
commit
a865b6ba63
@ -338,10 +338,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
||||
return
|
||||
|
||||
dataset_urn = make_dataset_urn_with_platform_instance(
|
||||
platform=make_data_platform_urn(self.get_platform()),
|
||||
name=f"dremio.{dataset_name}",
|
||||
env=self.config.env,
|
||||
platform=self.get_platform(),
|
||||
name=dataset_name,
|
||||
platform_instance=self.config.platform_instance,
|
||||
env=self.config.env,
|
||||
)
|
||||
|
||||
for dremio_mcp in self.dremio_aspects.populate_dataset_mcp(
|
||||
@ -421,10 +421,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
||||
schema_str = ".".join(dataset_info.path)
|
||||
dataset_name = f"{schema_str}.{dataset_info.resource_name}".lower()
|
||||
dataset_urn = make_dataset_urn_with_platform_instance(
|
||||
platform=make_data_platform_urn(self.get_platform()),
|
||||
name=f"dremio.{dataset_name}",
|
||||
env=self.config.env,
|
||||
platform=self.get_platform(),
|
||||
name=dataset_name,
|
||||
platform_instance=self.config.platform_instance,
|
||||
env=self.config.env,
|
||||
)
|
||||
yield from self.profiler.get_workunits(dataset_info, dataset_urn)
|
||||
|
||||
@ -436,10 +436,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
||||
"""
|
||||
upstream_urns = [
|
||||
make_dataset_urn_with_platform_instance(
|
||||
platform=make_data_platform_urn(self.get_platform()),
|
||||
name=f"dremio.{upstream_table.lower()}",
|
||||
env=self.config.env,
|
||||
platform=self.get_platform(),
|
||||
name=upstream_table.lower(),
|
||||
platform_instance=self.config.platform_instance,
|
||||
env=self.config.env,
|
||||
)
|
||||
for upstream_table in parents
|
||||
]
|
||||
@ -498,19 +498,19 @@ class DremioSource(StatefulIngestionSourceBase):
|
||||
if query.query and query.affected_dataset:
|
||||
upstream_urns = [
|
||||
make_dataset_urn_with_platform_instance(
|
||||
platform=make_data_platform_urn(self.get_platform()),
|
||||
name=f"dremio.{ds.lower()}",
|
||||
env=self.config.env,
|
||||
platform=self.get_platform(),
|
||||
name=ds.lower(),
|
||||
platform_instance=self.config.platform_instance,
|
||||
env=self.config.env,
|
||||
)
|
||||
for ds in query.queried_datasets
|
||||
]
|
||||
|
||||
downstream_urn = make_dataset_urn_with_platform_instance(
|
||||
platform=make_data_platform_urn(self.get_platform()),
|
||||
name=f"dremio.{query.affected_dataset.lower()}",
|
||||
env=self.config.env,
|
||||
platform=self.get_platform(),
|
||||
name=query.affected_dataset.lower(),
|
||||
platform_instance=self.config.platform_instance,
|
||||
env=self.config.env,
|
||||
)
|
||||
|
||||
# Add query to SqlParsingAggregator
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
@ -6,7 +7,7 @@ from typing import Dict
|
||||
import boto3
|
||||
import pytest
|
||||
import requests
|
||||
from freezegun import freeze_time
|
||||
from time_machine import travel
|
||||
|
||||
from datahub.testing import mce_helpers
|
||||
from tests.test_helpers.click_helpers import run_datahub_cmd
|
||||
@ -14,7 +15,7 @@ from tests.test_helpers.docker_helpers import wait_for_port
|
||||
|
||||
pytestmark = pytest.mark.integration_batch_4
|
||||
|
||||
FROZEN_TIME = "2023-10-15 07:00:00"
|
||||
FROZEN_TIME = datetime.datetime(2023, 10, 15, 7, 0, tzinfo=datetime.timezone.utc)
|
||||
MINIO_PORT = 9000
|
||||
MYSQL_PORT = 3306
|
||||
|
||||
@ -469,7 +470,7 @@ def populate_minio(pytestconfig, s3_bkt):
|
||||
yield
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@travel(FROZEN_TIME, tick=False)
|
||||
@pytest.mark.integration
|
||||
def test_dremio_ingest(
|
||||
test_resources_dir,
|
||||
@ -492,7 +493,7 @@ def test_dremio_ingest(
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@travel(FROZEN_TIME, tick=False)
|
||||
@pytest.mark.integration
|
||||
def test_dremio_platform_instance_urns(
|
||||
test_resources_dir,
|
||||
@ -539,9 +540,12 @@ def test_dremio_platform_instance_urns(
|
||||
|
||||
# Check dataset URN structure
|
||||
if mce["entityType"] == "dataset" and "entityUrn" in mce:
|
||||
assert "test-platform.dremio" in mce["entityUrn"], (
|
||||
assert "test-platform." in mce["entityUrn"], (
|
||||
f"Platform instance missing in dataset URN: {mce['entityUrn']}"
|
||||
)
|
||||
assert "test-platform.dremio." not in mce["entityUrn"], (
|
||||
f"URN has incorrect double dremio prefix: {mce['entityUrn']}"
|
||||
)
|
||||
|
||||
# Check aspects for both datasets and containers
|
||||
if "aspectName" in mce:
|
||||
@ -573,7 +577,7 @@ def test_dremio_platform_instance_urns(
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@travel(FROZEN_TIME, tick=False)
|
||||
@pytest.mark.integration
|
||||
def test_dremio_schema_filter(
|
||||
test_resources_dir,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user