chore(ingest): cleanup various methods (#9221)

This commit is contained in:
Harshal Sheth 2023-11-13 21:51:11 -05:00 committed by GitHub
parent 19aa215068
commit 906a5b91a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 7 additions and 10 deletions

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
SCRIPT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]:-$0}" )" >/dev/null && pwd )"

View File

@ -7,11 +7,10 @@ import re
import sys
import textwrap
from importlib.metadata import metadata, requires
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional
import click
from pydantic import BaseModel, Field
from pydantic.dataclasses import dataclass
from datahub.configuration.common import ConfigModel
from datahub.ingestion.api.decorators import (
@ -94,7 +93,6 @@ class FieldRow(BaseModel):
@staticmethod
def map_field_path_to_components(field_path: str) -> List[Component]:
m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path)
v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path)
components: List[FieldRow.Component] = []
@ -197,7 +195,7 @@ class FieldRow(BaseModel):
# Using a non-breaking space to prevent the checkbox from being
# broken into a new line.
if not self.parent: # None and empty string both count
return f'&nbsp;<abbr title="Required">✅</abbr>'
return '&nbsp;<abbr title="Required">✅</abbr>'
else:
return f'&nbsp;<abbr title="Required if {self.parent} is set">❓</abbr>'
else:
@ -356,7 +354,6 @@ def priority_value(path: str) -> str:
def gen_md_table_from_struct(schema_dict: Dict[str, Any]) -> List[str]:
from datahub.ingestion.extractor.json_schema_util import JsonSchemaTranslator
# we don't want default field values to be injected into the description of the field
@ -460,7 +457,6 @@ def get_additional_deps_for_extra(extra_name: str) -> List[str]:
def relocate_path(orig_path: str, relative_path: str, relocated_path: str) -> str:
newPath = os.path.join(os.path.dirname(orig_path), relative_path)
assert os.path.exists(newPath)
@ -515,7 +511,6 @@ def generate(
if extra_docs:
for path in glob.glob(f"{extra_docs}/**/*[.md|.yaml|.yml]", recursive=True):
m = re.search("/docs/sources/(.*)/(.*).md", path)
if m:
platform_name = m.group(1).lower()
@ -741,7 +736,7 @@ def generate(
i += 1
f.write(f"---\nsidebar_position: {i}\n---\n\n")
f.write(
f"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n"
"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n"
)
f.write(f"# {platform_docs['name']}\n")

View File

@ -828,7 +828,7 @@ class LookerExplore:
)
else:
logger.warning(
f"Failed to extract explore {explore_name} from model {model}.", e
f"Failed to extract explore {explore_name} from model {model}: {e}"
)
except AssertionError:

View File

@ -218,6 +218,8 @@ class ModeSource(Source):
if creator is not None:
modified_actor = builder.make_user_urn(creator)
if report_info.get("last_saved_at") is None:
# Sometimes mode returns null for last_saved_at.
# In that case, we use the created_at timestamp instead.
report_info["last_saved_at"] = report_info.get("created_at")
modified_ts = int(