mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-15 12:46:53 +00:00
docs(ingest): improve doc gen, docs for snowflake, looker (#5867)
This commit is contained in:
parent
6f09f1025e
commit
d911b15da5
@ -1,23 +0,0 @@
|
|||||||
#### Configuration Notes
|
|
||||||
|
|
||||||
See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
|
|
||||||
You need to provide the following permissions for ingestion to work correctly.
|
|
||||||
```
|
|
||||||
access_data
|
|
||||||
explore
|
|
||||||
manage_models
|
|
||||||
see_datagroups
|
|
||||||
see_lookml
|
|
||||||
see_lookml_dashboards
|
|
||||||
see_looks
|
|
||||||
see_pdts
|
|
||||||
see_queries
|
|
||||||
see_schedules
|
|
||||||
see_sql
|
|
||||||
see_system_activity
|
|
||||||
see_user_dashboards
|
|
||||||
see_users
|
|
||||||
```
|
|
||||||
Here is an example permission set after configuration.
|
|
||||||

|
|
||||||
|
|
62
metadata-ingestion/docs/sources/looker/looker_pre.md
Normal file
62
metadata-ingestion/docs/sources/looker/looker_pre.md
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
### Pre-Requisites
|
||||||
|
|
||||||
|
#### Set up the right permissions
|
||||||
|
You need to provide the following permissions for ingestion to work correctly.
|
||||||
|
```
|
||||||
|
access_data
|
||||||
|
explore
|
||||||
|
manage_models
|
||||||
|
see_datagroups
|
||||||
|
see_lookml
|
||||||
|
see_lookml_dashboards
|
||||||
|
see_looks
|
||||||
|
see_pdts
|
||||||
|
see_queries
|
||||||
|
see_schedules
|
||||||
|
see_sql
|
||||||
|
see_system_activity
|
||||||
|
see_user_dashboards
|
||||||
|
see_users
|
||||||
|
```
|
||||||
|
Here is an example permission set after configuration.
|
||||||
|

|
||||||
|
|
||||||
|
#### Get an API key
|
||||||
|
|
||||||
|
You need to get an API key for the account with the above privileges to perform ingestion. See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
|
||||||
|
|
||||||
|
|
||||||
|
### Ingestion through UI
|
||||||
|
|
||||||
|
The following video shows you how to get started with ingesting Looker metadata through the UI.
|
||||||
|
|
||||||
|
:::note
|
||||||
|
|
||||||
|
You will need to run `lookml` ingestion through the CLI after you have ingested Looker metadata through the UI. Otherwise you will not be able to see Looker Views and their lineage to your warehouse tables.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
<div
|
||||||
|
style={{
|
||||||
|
position: "relative",
|
||||||
|
paddingBottom: "57.692307692307686%",
|
||||||
|
height: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<iframe
|
||||||
|
src="https://www.loom.com/embed/b8b9654e02714d20a44122cc1bffc1bb"
|
||||||
|
frameBorder={0}
|
||||||
|
webkitallowfullscreen=""
|
||||||
|
mozallowfullscreen=""
|
||||||
|
allowFullScreen=""
|
||||||
|
style={{
|
||||||
|
position: "absolute",
|
||||||
|
top: 0,
|
||||||
|
left: 0,
|
||||||
|
width: "100%",
|
||||||
|
height: "100%"
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
|||||||
#### Configuration Notes
|
|
||||||
|
|
||||||
See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
|
|
||||||
You need to ensure that the API key is attached to a user that has Admin privileges. If that is not possible, read the configuration section to provide an offline specification of the `connection_to_platform_map` and the `project_name`.
|
|
||||||
|
|
||||||
:::note
|
|
||||||
The integration can use an SQL parser to try to parse the tables the views depends on.
|
|
||||||
:::
|
|
||||||
This parsing is disabled by default,
|
|
||||||
but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.
|
|
||||||
As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a
|
|
||||||
custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser`
|
|
||||||
and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser.
|
|
11
metadata-ingestion/docs/sources/looker/lookml_post.md
Normal file
11
metadata-ingestion/docs/sources/looker/lookml_post.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#### Configuration Notes
|
||||||
|
|
||||||
|
:::note
|
||||||
|
|
||||||
|
The integration can use an SQL parser to try to parse the tables the views depends on.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.
|
||||||
|
As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser`
|
||||||
|
and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser.
|
84
metadata-ingestion/docs/sources/looker/lookml_pre.md
Normal file
84
metadata-ingestion/docs/sources/looker/lookml_pre.md
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
### Pre-requisites
|
||||||
|
|
||||||
|
#### [Optional] Create an API key
|
||||||
|
|
||||||
|
See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
|
||||||
|
You need to ensure that the API key is attached to a user that has Admin privileges.
|
||||||
|
|
||||||
|
If that is not possible, read the configuration section and provide an offline specification of the `connection_to_platform_map` and the `project_name`.
|
||||||
|
|
||||||
|
### Ingestion through UI
|
||||||
|
|
||||||
|
Ingestion using lookml connector is not supported through the UI.
|
||||||
|
However, you can set up ingestion using a GitHub Action to push metadata whenever your main lookml repo changes.
|
||||||
|
|
||||||
|
#### Sample GitHub Action
|
||||||
|
|
||||||
|
Drop this file into your `.github/workflows` directory inside your Looker github repo.
|
||||||
|
|
||||||
|
```
|
||||||
|
name: lookml metadata upload
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths-ignore:
|
||||||
|
- "docs/**"
|
||||||
|
- "**.md"
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths-ignore:
|
||||||
|
- "docs/**"
|
||||||
|
- "**.md"
|
||||||
|
release:
|
||||||
|
types: [published, edited]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lookml-metadata-upload:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.9'
|
||||||
|
- name: Run LookML ingestion
|
||||||
|
run: |
|
||||||
|
pip install 'acryl-datahub[lookml,datahub-rest]'
|
||||||
|
cat << EOF > lookml_ingestion.yml
|
||||||
|
# LookML ingestion configuration
|
||||||
|
source:
|
||||||
|
type: "lookml"
|
||||||
|
config:
|
||||||
|
base_folder: ${{ github.workspace }}
|
||||||
|
parse_table_names_from_sql: true
|
||||||
|
github_info:
|
||||||
|
repo: ${{ github.repository }}
|
||||||
|
branch: ${{ github.ref }}
|
||||||
|
# Options
|
||||||
|
#connection_to_platform_map:
|
||||||
|
# acryl-snow: snowflake
|
||||||
|
#platform: snowflake
|
||||||
|
#default_db: DEMO_PIPELINE
|
||||||
|
api:
|
||||||
|
client_id: ${LOOKER_CLIENT_ID}
|
||||||
|
client_secret: ${LOOKER_CLIENT_SECRET}
|
||||||
|
base_url: ${LOOKER_BASE_URL}
|
||||||
|
sink:
|
||||||
|
type: datahub-rest
|
||||||
|
config:
|
||||||
|
server: ${DATAHUB_GMS_HOST}
|
||||||
|
token: ${DATAHUB_TOKEN}
|
||||||
|
EOF
|
||||||
|
datahub ingest -c lookml_ingestion.yml
|
||||||
|
env:
|
||||||
|
DATAHUB_GMS_HOST: ${{ secrets.DATAHUB_GMS_HOST }}
|
||||||
|
DATAHUB_TOKEN: ${{ secrets.DATAHUB_TOKEN }}
|
||||||
|
LOOKER_BASE_URL: https://acryl.cloud.looker.com # <--- replace with your Looker base URL
|
||||||
|
LOOKER_CLIENT_ID: ${{ secrets.LOOKER_CLIENT_ID }}
|
||||||
|
LOOKER_CLIENT_SECRET: ${{ secrets.LOOKER_CLIENT_SECRET }}
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want to ingest lookml using the **datahub** cli directly, read on for instructions and configuration details.
|
@ -31,6 +31,7 @@ source:
|
|||||||
|
|
||||||
# Optional additional github information. Used to add github links on the dataset's entity page.
|
# Optional additional github information. Used to add github links on the dataset's entity page.
|
||||||
github_info:
|
github_info:
|
||||||
repo: org/repo-name
|
repo: org/repo-name
|
||||||
|
# Default sink is datahub-rest and doesn't need to be configured
|
||||||
|
# See https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for customization options
|
||||||
|
|
||||||
# sink configs
|
|
||||||
|
@ -1,4 +1,29 @@
|
|||||||
To get all metadata from Snowflake you need to use two plugins `snowflake` and `snowflake-usage`. Both of them are described in this page. These will require 2 separate recipes.
|
Ingesting metadata from Snowflake requires either using the **snowflake-beta** module with just one recipe (recommended) or the two separate modules **snowflake** and **snowflake-usage** (soon to be deprecated) with two separate recipes.
|
||||||
|
|
||||||
|
All three modules are described on this page.
|
||||||
|
|
||||||
|
We encourage you to try out the new **snowflake-beta** plugin as alternative to running both **snowflake** and **snowflake-usage** plugins and share feedback. `snowflake-beta` is much faster than `snowflake` for extracting metadata.
|
||||||
|
|
||||||
|
## Snowflake Ingestion through the UI
|
||||||
|
|
||||||
|
The following video shows you how to ingest Snowflake metadata through the UI.
|
||||||
|
|
||||||
|
<div style={{ position: "relative", paddingBottom: "56.25%", height: 0 }}>
|
||||||
|
<iframe
|
||||||
|
src="https://www.loom.com/embed/15d0401caa1c4aa483afef1d351760db"
|
||||||
|
frameBorder={0}
|
||||||
|
webkitallowfullscreen=""
|
||||||
|
mozallowfullscreen=""
|
||||||
|
allowFullScreen=""
|
||||||
|
style={{
|
||||||
|
position: "absolute",
|
||||||
|
top: 0,
|
||||||
|
left: 0,
|
||||||
|
width: "100%",
|
||||||
|
height: "100%"
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
We encourage you to try out new `snowflake-beta` plugin as alternative to running both `snowflake` and `snowflake-usage` plugins and share feedback. `snowflake-beta` is much faster than `snowflake` for extracting metadata .
|
Read on if you are interested in ingesting Snowflake metadata using the **datahub** cli, or want to learn about all the configuration parameters that are supported by the connectors.
|
@ -1,12 +1,11 @@
|
|||||||
source:
|
source:
|
||||||
type: snowflake-beta
|
type: snowflake-beta
|
||||||
config:
|
config:
|
||||||
|
|
||||||
# This option is recommended to be used for the first time to ingest all lineage
|
# This option is recommended to be used for the first time to ingest all lineage
|
||||||
ignore_start_time_lineage: true
|
ignore_start_time_lineage: true
|
||||||
# This is an alternative option to specify the start_time for lineage
|
# This is an alternative option to specify the start_time for lineage
|
||||||
# if you don't want to look back since beginning
|
# if you don't want to look back since beginning
|
||||||
start_time: '2022-03-01T00:00:00Z'
|
start_time: "2022-03-01T00:00:00Z"
|
||||||
|
|
||||||
# Coordinates
|
# Coordinates
|
||||||
account_id: "abc48144"
|
account_id: "abc48144"
|
||||||
@ -35,9 +34,7 @@ source:
|
|||||||
profile_table_level_only: true
|
profile_table_level_only: true
|
||||||
profile_pattern:
|
profile_pattern:
|
||||||
allow:
|
allow:
|
||||||
- 'ACCOUNTING_DB.*.*'
|
- "ACCOUNTING_DB.*.*"
|
||||||
- 'MARKETING_DB.*.*'
|
- "MARKETING_DB.*.*"
|
||||||
|
# Default sink is datahub-rest and doesn't need to be configured
|
||||||
|
# See https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for customization options
|
||||||
sink:
|
|
||||||
# sink configs
|
|
||||||
|
@ -26,19 +26,25 @@ logger = logging.getLogger(__name__)
|
|||||||
@dataclass
|
@dataclass
|
||||||
class FieldRow:
|
class FieldRow:
|
||||||
path: str
|
path: str
|
||||||
|
parent: Optional[str]
|
||||||
type_name: str
|
type_name: str
|
||||||
required: bool
|
required: bool
|
||||||
default: str
|
default: str
|
||||||
description: str
|
description: str
|
||||||
inner_fields: List["FieldRow"] = Field(default_factory=list)
|
inner_fields: List["FieldRow"] = Field(default_factory=list)
|
||||||
|
|
||||||
@staticmethod
|
def get_checkbox(self) -> str:
|
||||||
def get_checkbox(enabled: bool) -> str:
|
if self.required:
|
||||||
return "✅" if enabled else ""
|
if not self.parent: # None and empty string both count
|
||||||
|
return "✅"
|
||||||
|
else:
|
||||||
|
return f"❓ (required if {self.parent} is set)"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
def to_md_line(self) -> str:
|
def to_md_line(self) -> str:
|
||||||
return (
|
return (
|
||||||
f"| {self.path} | {self.get_checkbox(self.required)} | {self.type_name} | {self.description} | {self.default} |\n"
|
f"| {self.path} | {self.get_checkbox()} | {self.type_name} | {self.description} | {self.default} |\n"
|
||||||
+ "".join([inner_field.to_md_line() for inner_field in self.inner_fields])
|
+ "".join([inner_field.to_md_line() for inner_field in self.inner_fields])
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -129,6 +135,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, None),
|
path=get_prefixed_name(field_prefix, None),
|
||||||
|
parent=field_prefix,
|
||||||
type_name="Enum",
|
type_name="Enum",
|
||||||
required=field_dict.get("required") or False,
|
required=field_dict.get("required") or False,
|
||||||
description=f"one of {','.join(field_dict['enum'])}",
|
description=f"one of {','.join(field_dict['enum'])}",
|
||||||
@ -150,6 +157,7 @@ def gen_md_table(
|
|||||||
if "enum" in def_dict:
|
if "enum" in def_dict:
|
||||||
row = FieldRow(
|
row = FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"enum({reference.split('/')[-1]})",
|
type_name=f"enum({reference.split('/')[-1]})",
|
||||||
description=get_enum_description(
|
description=get_enum_description(
|
||||||
value.get("description"), def_dict["enum"]
|
value.get("description"), def_dict["enum"]
|
||||||
@ -162,6 +170,7 @@ def gen_md_table(
|
|||||||
# object reference
|
# object reference
|
||||||
row = FieldRow(
|
row = FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"{reference.split('/')[-1]} (see below for fields)",
|
type_name=f"{reference.split('/')[-1]} (see below for fields)",
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "")),
|
default=str(value.get("default", "")),
|
||||||
@ -188,6 +197,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name="Enum",
|
type_name="Enum",
|
||||||
description=f"one of {','.join(def_dict['enum'])}",
|
description=f"one of {','.join(def_dict['enum'])}",
|
||||||
required=required_field,
|
required=required_field,
|
||||||
@ -210,6 +220,7 @@ def gen_md_table(
|
|||||||
|
|
||||||
row = FieldRow(
|
row = FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"Dict[str, {value_ref.split('/')[-1]}]",
|
type_name=f"Dict[str, {value_ref.split('/')[-1]}]",
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "")),
|
default=str(value.get("default", "")),
|
||||||
@ -229,6 +240,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"Dict[str,{value_type}]"
|
type_name=f"Dict[str,{value_type}]"
|
||||||
if value_type
|
if value_type
|
||||||
else "Dict",
|
else "Dict",
|
||||||
@ -241,6 +253,7 @@ def gen_md_table(
|
|||||||
object_definition = value["$ref"]
|
object_definition = value["$ref"]
|
||||||
row = FieldRow(
|
row = FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"{object_definition.split('/')[-1]} (see below for fields)",
|
type_name=f"{object_definition.split('/')[-1]} (see below for fields)",
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "")),
|
default=str(value.get("default", "")),
|
||||||
@ -266,6 +279,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"Array of {items_type}",
|
type_name=f"Array of {items_type}",
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "None")),
|
default=str(value.get("default", "None")),
|
||||||
@ -278,6 +292,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=value["type"],
|
type_name=value["type"],
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "None")),
|
default=str(value.get("default", "None")),
|
||||||
@ -292,6 +307,7 @@ def gen_md_table(
|
|||||||
)
|
)
|
||||||
row = FieldRow(
|
row = FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name=f"{object_definition.split('/')[-1]} (see below for fields)",
|
type_name=f"{object_definition.split('/')[-1]} (see below for fields)",
|
||||||
description=value.get("description") or "",
|
description=value.get("description") or "",
|
||||||
default=str(value.get("default", "")),
|
default=str(value.get("default", "")),
|
||||||
@ -313,6 +329,7 @@ def gen_md_table(
|
|||||||
md_str.append(
|
md_str.append(
|
||||||
FieldRow(
|
FieldRow(
|
||||||
path=get_prefixed_name(field_prefix, field_name),
|
path=get_prefixed_name(field_prefix, field_name),
|
||||||
|
parent=field_prefix,
|
||||||
type_name="Generic dict",
|
type_name="Generic dict",
|
||||||
description=value.get("description", ""),
|
description=value.get("description", ""),
|
||||||
default=str(value.get("default", "None")),
|
default=str(value.get("default", "None")),
|
||||||
@ -481,11 +498,37 @@ def generate(
|
|||||||
final_markdown,
|
final_markdown,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
create_or_update(
|
if "_" in file_name:
|
||||||
source_documentation,
|
plugin_doc_parts = file_name.split("_")
|
||||||
[platform_name, "plugins", file_name, "custom_docs"],
|
if len(plugin_doc_parts) != 2 or plugin_doc_parts[
|
||||||
final_markdown,
|
1
|
||||||
)
|
] not in ["pre", "post"]:
|
||||||
|
raise Exception(
|
||||||
|
f"{file_name} needs to be of the form <plugin>_pre.md or <plugin>_post.md"
|
||||||
|
)
|
||||||
|
|
||||||
|
docs_key_name = f"custom_docs_{plugin_doc_parts[1]}"
|
||||||
|
create_or_update(
|
||||||
|
source_documentation,
|
||||||
|
[
|
||||||
|
platform_name,
|
||||||
|
"plugins",
|
||||||
|
plugin_doc_parts[0],
|
||||||
|
docs_key_name,
|
||||||
|
],
|
||||||
|
final_markdown,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
create_or_update(
|
||||||
|
source_documentation,
|
||||||
|
[
|
||||||
|
platform_name,
|
||||||
|
"plugins",
|
||||||
|
file_name,
|
||||||
|
"custom_docs_post",
|
||||||
|
],
|
||||||
|
final_markdown,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
yml_match = re.search("/docs/sources/(.*)/(.*)_recipe.yml", path)
|
yml_match = re.search("/docs/sources/(.*)/(.*)_recipe.yml", path)
|
||||||
if yml_match:
|
if yml_match:
|
||||||
@ -543,6 +586,14 @@ def generate(
|
|||||||
if hasattr(source_type, "get_platform_id"):
|
if hasattr(source_type, "get_platform_id"):
|
||||||
platform_id = source_type.get_platform_id()
|
platform_id = source_type.get_platform_id()
|
||||||
|
|
||||||
|
if hasattr(source_type, "get_platform_doc_order"):
|
||||||
|
platform_doc_order = source_type.get_platform_doc_order()
|
||||||
|
create_or_update(
|
||||||
|
source_documentation,
|
||||||
|
[platform_id, "plugins", plugin_name, "doc_order"],
|
||||||
|
platform_doc_order,
|
||||||
|
)
|
||||||
|
|
||||||
source_documentation[platform_id] = (
|
source_documentation[platform_id] = (
|
||||||
source_documentation.get(platform_id) or {}
|
source_documentation.get(platform_id) or {}
|
||||||
)
|
)
|
||||||
@ -637,6 +688,7 @@ def generate(
|
|||||||
warning_msg = f"Failed to find source classes for platform {platform_id}. Did you remember to annotate your source class with @platform_name({platform_id})?"
|
warning_msg = f"Failed to find source classes for platform {platform_id}. Did you remember to annotate your source class with @platform_name({platform_id})?"
|
||||||
logger.error(warning_msg)
|
logger.error(warning_msg)
|
||||||
metrics["source_platforms"]["warnings"].append(warning_msg)
|
metrics["source_platforms"]["warnings"].append(warning_msg)
|
||||||
|
continue
|
||||||
|
|
||||||
with open(platform_doc_file, "w") as f:
|
with open(platform_doc_file, "w") as f:
|
||||||
if "name" in platform_docs:
|
if "name" in platform_docs:
|
||||||
@ -658,7 +710,12 @@ def generate(
|
|||||||
|
|
||||||
# f.write("| Source Module | Documentation |\n")
|
# f.write("| Source Module | Documentation |\n")
|
||||||
# f.write("| ------ | ---- |\n")
|
# f.write("| ------ | ---- |\n")
|
||||||
for plugin in sorted(platform_docs["plugins"]):
|
for plugin, plugin_docs in sorted(
|
||||||
|
platform_docs["plugins"].items(),
|
||||||
|
key=lambda x: str(x[1].get("doc_order"))
|
||||||
|
if x[1].get("doc_order")
|
||||||
|
else x[0],
|
||||||
|
):
|
||||||
f.write("<tr>\n")
|
f.write("<tr>\n")
|
||||||
f.write(f"<td>\n\n`{plugin}`\n\n</td>\n")
|
f.write(f"<td>\n\n`{plugin}`\n\n</td>\n")
|
||||||
f.write(
|
f.write(
|
||||||
@ -671,8 +728,14 @@ def generate(
|
|||||||
f.write("</table>\n\n")
|
f.write("</table>\n\n")
|
||||||
# insert platform level custom docs before plugin section
|
# insert platform level custom docs before plugin section
|
||||||
f.write(platform_docs.get("custom_docs") or "")
|
f.write(platform_docs.get("custom_docs") or "")
|
||||||
for plugin in sorted(platform_docs["plugins"]):
|
# all_plugins = platform_docs["plugins"].keys()
|
||||||
plugin_docs = platform_docs["plugins"][plugin]
|
|
||||||
|
for plugin, plugin_docs in sorted(
|
||||||
|
platform_docs["plugins"].items(),
|
||||||
|
key=lambda x: str(x[1].get("doc_order"))
|
||||||
|
if x[1].get("doc_order")
|
||||||
|
else x[0],
|
||||||
|
):
|
||||||
f.write(f"\n\n## Module `{plugin}`\n")
|
f.write(f"\n\n## Module `{plugin}`\n")
|
||||||
if "support_status" in plugin_docs:
|
if "support_status" in plugin_docs:
|
||||||
f.write(
|
f.write(
|
||||||
@ -692,8 +755,11 @@ def generate(
|
|||||||
f.write("\n")
|
f.write("\n")
|
||||||
|
|
||||||
f.write(f"{plugin_docs.get('source_doc') or ''}\n")
|
f.write(f"{plugin_docs.get('source_doc') or ''}\n")
|
||||||
|
# Insert custom pre section
|
||||||
|
f.write(plugin_docs.get("custom_docs_pre", ""))
|
||||||
|
f.write("\n### CLI based Ingestion\n")
|
||||||
if "extra_deps" in plugin_docs:
|
if "extra_deps" in plugin_docs:
|
||||||
f.write("### Install the Plugin\n")
|
f.write("\n#### Install the Plugin\n")
|
||||||
if plugin_docs["extra_deps"] != []:
|
if plugin_docs["extra_deps"] != []:
|
||||||
f.write("```shell\n")
|
f.write("```shell\n")
|
||||||
f.write(f"pip install 'acryl-datahub[{plugin}]'\n")
|
f.write(f"pip install 'acryl-datahub[{plugin}]'\n")
|
||||||
@ -703,7 +769,7 @@ def generate(
|
|||||||
f"The `{plugin}` source works out of the box with `acryl-datahub`.\n"
|
f"The `{plugin}` source works out of the box with `acryl-datahub`.\n"
|
||||||
)
|
)
|
||||||
if "recipe" in plugin_docs:
|
if "recipe" in plugin_docs:
|
||||||
f.write("\n### Quickstart Recipe\n")
|
f.write("\n### Starter Recipe\n")
|
||||||
f.write(
|
f.write(
|
||||||
"Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.\n\n\n"
|
"Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.\n\n\n"
|
||||||
)
|
)
|
||||||
@ -740,7 +806,7 @@ The [JSONSchema](https://json-schema.org/) for this configuration is inlined bel
|
|||||||
</Tabs>\n\n"""
|
</Tabs>\n\n"""
|
||||||
)
|
)
|
||||||
# insert custom plugin docs after config details
|
# insert custom plugin docs after config details
|
||||||
f.write(plugin_docs.get("custom_docs", ""))
|
f.write(plugin_docs.get("custom_docs_post", ""))
|
||||||
if "classname" in plugin_docs:
|
if "classname" in plugin_docs:
|
||||||
f.write("\n### Code Coordinates\n")
|
f.write("\n### Code Coordinates\n")
|
||||||
f.write(f"- Class Name: `{plugin_docs['classname']}`\n")
|
f.write(f"- Class Name: `{plugin_docs['classname']}`\n")
|
||||||
|
@ -28,7 +28,7 @@ def config_class(config_cls: Type) -> Callable[[Type], Type]:
|
|||||||
|
|
||||||
|
|
||||||
def platform_name(
|
def platform_name(
|
||||||
platform_name: str, id: Optional[str] = None
|
platform_name: str, id: Optional[str] = None, doc_order: Optional[int] = None
|
||||||
) -> Callable[[Type], Type]:
|
) -> Callable[[Type], Type]:
|
||||||
"""Adds a get_platform_name method to the decorated class"""
|
"""Adds a get_platform_name method to the decorated class"""
|
||||||
|
|
||||||
@ -39,6 +39,7 @@ def platform_name(
|
|||||||
"get_platform_id",
|
"get_platform_id",
|
||||||
lambda: id or platform_name.lower().replace(" ", "-"),
|
lambda: id or platform_name.lower().replace(" ", "-"),
|
||||||
)
|
)
|
||||||
|
setattr(cls, "get_platform_doc_order", lambda: doc_order or None)
|
||||||
|
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ SNOWFLAKE_FIELD_TYPE_MAPPINGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@platform_name("Snowflake")
|
@platform_name("Snowflake", doc_order=1)
|
||||||
@config_class(SnowflakeV2Config)
|
@config_class(SnowflakeV2Config)
|
||||||
@support_status(SupportStatus.INCUBATING)
|
@support_status(SupportStatus.INCUBATING)
|
||||||
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user