mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-11 00:48:39 +00:00
fix(ingest): run sqllineage in process by default (#11650)
This commit is contained in:
parent
b8144699fd
commit
8b42ac8cde
@ -124,9 +124,6 @@ class LookMLSourceConfig(
|
||||
description="List of regex patterns for LookML views to include in the extraction.",
|
||||
)
|
||||
parse_table_names_from_sql: bool = Field(True, description="See note below.")
|
||||
sql_parser: str = Field(
|
||||
"datahub.utilities.sql_parser.DefaultSQLParser", description="See note below."
|
||||
)
|
||||
api: Optional[LookerAPIConfig]
|
||||
project_name: Optional[str] = Field(
|
||||
None,
|
||||
|
@ -2,7 +2,6 @@ import logging
|
||||
import math
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from typing import Dict, Iterable, List, Optional, Set, Type
|
||||
|
||||
import dateutil.parser as dp
|
||||
@ -43,6 +42,7 @@ from datahub.metadata.schema_classes import (
|
||||
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
||||
from datahub.utilities.perf_timer import PerfTimer
|
||||
from datahub.utilities.sql_parser import SQLParser
|
||||
from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
@ -646,11 +646,11 @@ class RedashSource(Source):
|
||||
self.report.total_dashboards = total_dashboards
|
||||
self.report.max_page_dashboards = max_page
|
||||
|
||||
dash_exec_pool = ThreadPool(self.config.parallelism)
|
||||
for response in dash_exec_pool.imap_unordered(
|
||||
self._process_dashboard_response, range(1, max_page + 1)
|
||||
):
|
||||
yield from response
|
||||
yield from ThreadedIteratorExecutor.process(
|
||||
self._process_dashboard_response,
|
||||
[(page,) for page in range(1, max_page + 1)],
|
||||
max_workers=self.config.parallelism,
|
||||
)
|
||||
|
||||
def _get_chart_type_from_viz_data(self, viz_data: Dict) -> str:
|
||||
"""
|
||||
@ -769,11 +769,12 @@ class RedashSource(Source):
|
||||
logger.info(f"/api/queries total count {total_queries} and max page {max_page}")
|
||||
self.report.total_queries = total_queries
|
||||
self.report.max_page_queries = max_page
|
||||
chart_exec_pool = ThreadPool(self.config.parallelism)
|
||||
for response in chart_exec_pool.imap_unordered(
|
||||
self._process_query_response, range(1, max_page + 1)
|
||||
):
|
||||
yield from response
|
||||
|
||||
yield from ThreadedIteratorExecutor.process(
|
||||
self._process_query_response,
|
||||
[(page,) for page in range(1, max_page + 1)],
|
||||
max_workers=self.config.parallelism,
|
||||
)
|
||||
|
||||
def add_config_to_report(self) -> None:
|
||||
self.report.api_page_limit = self.config.api_page_limit
|
||||
|
@ -46,7 +46,7 @@ class SqlLineageSQLParser(SQLParser):
|
||||
def __init__(
|
||||
self,
|
||||
sql_query: str,
|
||||
use_external_process: bool = True,
|
||||
use_external_process: bool = False,
|
||||
use_raw_names: bool = False,
|
||||
) -> None:
|
||||
super().__init__(sql_query, use_external_process)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,6 @@ from deepdiff import DeepDiff
|
||||
from freezegun import freeze_time
|
||||
from looker_sdk.sdk.api40.models import DBConnection
|
||||
|
||||
from datahub.configuration.common import PipelineExecutionError
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
from datahub.ingestion.source.file import read_metadata_file
|
||||
from datahub.ingestion.source.looker.looker_template_language import (
|
||||
@ -518,53 +517,6 @@ def ingestion_test(
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_lookml_bad_sql_parser(pytestconfig, tmp_path, mock_time):
|
||||
"""Incorrect specification of sql parser should not fail ingestion"""
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
|
||||
mce_out = "lookml_mces_badsql_parser.json"
|
||||
pipeline = Pipeline.create(
|
||||
{
|
||||
"run_id": "lookml-test",
|
||||
"source": {
|
||||
"type": "lookml",
|
||||
"config": {
|
||||
"base_folder": str(test_resources_dir / "lkml_samples"),
|
||||
"connection_to_platform_map": {
|
||||
"my_connection": {
|
||||
"platform": "snowflake",
|
||||
"default_db": "default_db",
|
||||
"default_schema": "default_schema",
|
||||
}
|
||||
},
|
||||
"parse_table_names_from_sql": True,
|
||||
"project_name": "lkml_samples",
|
||||
"sql_parser": "bad.sql.Parser",
|
||||
"emit_reachable_views_only": False,
|
||||
"process_refinements": False,
|
||||
},
|
||||
},
|
||||
"sink": {
|
||||
"type": "file",
|
||||
"config": {
|
||||
"filename": f"{tmp_path}/{mce_out}",
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
pipeline.run()
|
||||
pipeline.pretty_print_summary()
|
||||
pipeline.raise_from_status(raise_warnings=False)
|
||||
with pytest.raises(PipelineExecutionError): # we expect the source to have warnings
|
||||
pipeline.raise_from_status(raise_warnings=True)
|
||||
|
||||
mce_helpers.check_golden_file(
|
||||
pytestconfig,
|
||||
output_path=tmp_path / mce_out,
|
||||
golden_path=test_resources_dir / mce_out,
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_lookml_git_info(pytestconfig, tmp_path, mock_time):
|
||||
"""Add github info to config"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user