mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	feat(docs): support inlining code snippets from files (#7712)
This commit is contained in:
		
							parent
							
								
									25808478cb
								
							
						
					
					
						commit
						575909e41c
					
				@ -14,8 +14,8 @@ yarn install
 | 
			
		||||
# This command starts a local development server and open up a browser window.
 | 
			
		||||
../gradlew yarnStart
 | 
			
		||||
 | 
			
		||||
# Every time a markdown file is changed, update the site:
 | 
			
		||||
# If a more complex docs site change is made, you'll need to restart the server.
 | 
			
		||||
# Every time a markdown file is changed, update the site by running this in a separate terminal.
 | 
			
		||||
# If you're making changes to the docusaurus config, you'll still need to restart the server.
 | 
			
		||||
../gradlew fastReload
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
@ -27,14 +27,6 @@ yarn install
 | 
			
		||||
 | 
			
		||||
This command generates static content into the `dist` directory and can be served using any static contents hosting service. You can preview the built static site using `../gradlew serve`, although we're recommend using the local development instructions locally.
 | 
			
		||||
 | 
			
		||||
## Generating GraphQL API Docs
 | 
			
		||||
 | 
			
		||||
To regenerate GraphQL API docs, simply rebuild the docs-website directory. 
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
./gradlew docs-website:build
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Managing Content
 | 
			
		||||
 | 
			
		||||
Please use the following steps when adding/managing content for the docs site.
 | 
			
		||||
@ -138,13 +130,35 @@ The purpose of this section is to provide developers & technical users with conc
 | 
			
		||||
This section aims to provide plain-language feature overviews for both technical and non-technical readers alike.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Docs Generation Features
 | 
			
		||||
 | 
			
		||||
**Includes all markdown files**
 | 
			
		||||
 | 
			
		||||
By default, all markdown files in the repository will be included in the docs site.
 | 
			
		||||
However, you can exclude files by modifying the `filter_patterns` array in `generateDocsDir.ts`.
 | 
			
		||||
 | 
			
		||||
Any file that is included in our docs site should be linked to from the sidebar.
 | 
			
		||||
You can suppress this check by adding the path to the file in a comment in `sidebar.js`:
 | 
			
		||||
 | 
			
		||||
**Inline Code Snippets**
 | 
			
		||||
 | 
			
		||||
Use an "inline" directive to include code snippets from other files. The `show_path_as_comment` option will include the path to the file as a comment at the top of the snippet.
 | 
			
		||||
 | 
			
		||||
  ```python
 | 
			
		||||
  {{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Docs site generation process
 | 
			
		||||
 | 
			
		||||
This process is orchestrated by a combination of Gradle and Yarn tasks. The main entrypoint is via the `docs-website:yarnGenerate` task, which in turn eventually runs `yarn run generate`.
 | 
			
		||||
 | 
			
		||||
Steps:
 | 
			
		||||
1. Generate the GraphQL combined schema using the gradle's `docs-website:generateGraphQLSchema` task. This generates `./graphql/combined.graphql`.
 | 
			
		||||
2. Run `yarn run _generate-graphql` to produce some markdown in the `./docs` directory.
 | 
			
		||||
3. Run the `generateDocsDir.ts` script to add the remaining markdown files to the `./docs` directory.
 | 
			
		||||
4. Run a copy or rsync to copy the `./docs` directory to `./genDocs`.
 | 
			
		||||
5. The docusaurus build process will then use the `./genDocs` directory as the source for the docs site.
 | 
			
		||||
2. Generate docs for ingestion sources using the `:metadata-ingestion:docGen` gradle task.
 | 
			
		||||
3. Generate docs for our metadata model using the `:metadata-ingestion:modelDocGen` gradle task.
 | 
			
		||||
4. Run `yarn run _generate-graphql` to produce some markdown in the `./docs` directory.
 | 
			
		||||
5. Run `yarn run _generate-python-sdk` to generate the Python SDK reference docs in the `./docs` directory.
 | 
			
		||||
6. Run the `generateDocsDir.ts` script to add markdown files from elsewhere in our repo to the `./docs` directory.
 | 
			
		||||
7. Run a copy or rsync to copy the `./docs` directory to `./genDocs`, and delete the `./docs` directory.
 | 
			
		||||
8. The docusaurus build process will then use the `./genDocs` directory as the source for the docs site.
 | 
			
		||||
 | 
			
		||||
@ -405,6 +405,37 @@ function markdown_enable_specials(
 | 
			
		||||
  contents.content = new_content;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function markdown_process_inline_directives(
 | 
			
		||||
  contents: matter.GrayMatterFile<string>,
 | 
			
		||||
  filepath: string
 | 
			
		||||
): void {
 | 
			
		||||
  const new_content = contents.content.replace(
 | 
			
		||||
    /^{{\s+inline\s+(\S+)\s+(show_path_as_comment\s+)?\s*}}$/gm,
 | 
			
		||||
    (_, inline_file_path: string, show_path_as_comment: string) => {
 | 
			
		||||
      if (!inline_file_path.startsWith("/")) {
 | 
			
		||||
        throw new Error(`inline path must be absolute: ${inline_file_path}`);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      console.log(`Inlining ${inline_file_path} into ${filepath}`);
 | 
			
		||||
      const referenced_file = fs.readFileSync(
 | 
			
		||||
        path.join("..", inline_file_path),
 | 
			
		||||
        "utf8"
 | 
			
		||||
      );
 | 
			
		||||
 | 
			
		||||
      // TODO: Add support for start_after_line and end_before_line arguments
 | 
			
		||||
      // that can be used to limit the inlined content to a specific range of lines.
 | 
			
		||||
      let new_contents = "";
 | 
			
		||||
      if (show_path_as_comment) {
 | 
			
		||||
        new_contents += `# Inlined from ${inline_file_path}\n`;
 | 
			
		||||
      }
 | 
			
		||||
      new_contents += referenced_file;
 | 
			
		||||
 | 
			
		||||
      return new_contents;
 | 
			
		||||
    }
 | 
			
		||||
  );
 | 
			
		||||
  contents.content = new_content;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function markdown_sanitize_and_linkify(content: string): string {
 | 
			
		||||
  // MDX escaping
 | 
			
		||||
  content = content.replace(/</g, "<");
 | 
			
		||||
@ -570,6 +601,7 @@ function copy_python_wheels(): void {
 | 
			
		||||
    markdown_add_edit_url(contents, filepath);
 | 
			
		||||
    markdown_rewrite_urls(contents, filepath);
 | 
			
		||||
    markdown_enable_specials(contents, filepath);
 | 
			
		||||
    markdown_process_inline_directives(contents, filepath);
 | 
			
		||||
    //copy_platform_logos();
 | 
			
		||||
    // console.log(contents);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -12,7 +12,8 @@
 | 
			
		||||
    "clear": "docusaurus clear && rm -rf genDocs/*",
 | 
			
		||||
    "_generate-graphql": "docusaurus docs:generate:graphql",
 | 
			
		||||
    "_generate-python-sdk": "cd sphinx && make md",
 | 
			
		||||
    "_generate-docs": "rm -rf docs && mkdir docs && yarn _generate-graphql && yarn _generate-python-sdk && ts-node -O '{ \"lib\": [\"es2020\"], \"target\": \"es6\" }' generateDocsDir.ts",
 | 
			
		||||
    "_generate-docs-dir-script": "ts-node -O '{ \"lib\": [\"es2020\"], \"target\": \"es6\" }' generateDocsDir.ts",
 | 
			
		||||
    "_generate-docs": "rm -rf docs && mkdir docs && yarn _generate-graphql && yarn _generate-python-sdk && yarn run _generate-docs-dir-script",
 | 
			
		||||
    "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs",
 | 
			
		||||
    "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs",
 | 
			
		||||
    "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js",
 | 
			
		||||
 | 
			
		||||
@ -559,29 +559,6 @@ def get_sorted_entity_names(
 | 
			
		||||
    return sorted_entities
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def preprocess_markdown(markdown_contents: str) -> str:
 | 
			
		||||
    inline_pattern = re.compile(r"{{ inline (.*) }}")
 | 
			
		||||
    pos = 0
 | 
			
		||||
    content_swap_register = {}
 | 
			
		||||
    while inline_pattern.search(markdown_contents, pos=pos):
 | 
			
		||||
        match = inline_pattern.search(markdown_contents, pos=pos)
 | 
			
		||||
        assert match
 | 
			
		||||
        file_name = match.group(1)
 | 
			
		||||
        with open(file_name, "r") as fp:
 | 
			
		||||
            inline_content = fp.read()
 | 
			
		||||
            content_swap_register[match.span()] = inline_content
 | 
			
		||||
        pos = match.span()[1]
 | 
			
		||||
    processed_markdown = ""
 | 
			
		||||
    cursor = 0
 | 
			
		||||
    for (start, end) in content_swap_register:
 | 
			
		||||
        processed_markdown += (
 | 
			
		||||
            markdown_contents[cursor:start] + content_swap_register[(start, end)]
 | 
			
		||||
        )
 | 
			
		||||
        cursor = end
 | 
			
		||||
    processed_markdown += markdown_contents[cursor:]
 | 
			
		||||
    return processed_markdown
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@click.command()
 | 
			
		||||
@click.argument("schemas_root", type=click.Path(exists=True), required=True)
 | 
			
		||||
@click.option("--registry", type=click.Path(exists=True), required=True)
 | 
			
		||||
@ -616,8 +593,7 @@ def generate(
 | 
			
		||||
                entity_name = m.group(1)
 | 
			
		||||
                with open(path, "r") as doc_file:
 | 
			
		||||
                    file_contents = doc_file.read()
 | 
			
		||||
                    final_markdown = preprocess_markdown(file_contents)
 | 
			
		||||
                    entity_extra_docs[entity_name] = final_markdown
 | 
			
		||||
                    entity_extra_docs[entity_name] = file_contents
 | 
			
		||||
 | 
			
		||||
    # registry file
 | 
			
		||||
    load_registry_file(registry)
 | 
			
		||||
 | 
			
		||||
@ -27,8 +27,7 @@ Evaluation status and results for an assertion tracked over time.
 | 
			
		||||
<summary>Python SDK: Emit assertion info and results for dataset </summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from examples/library/data_quality_mcpw_rest.py
 | 
			
		||||
{{ inline examples/library/data_quality_mcpw_rest.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -45,8 +45,7 @@ The following code snippet shows you how to add a Schema containing 3 fields to
 | 
			
		||||
<summary>Python SDK: Add a schema to a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_schema.py
 | 
			
		||||
{{ inline examples/library/dataset_schema.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_schema.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -65,8 +64,7 @@ Here is an example for how to add a tag to a dataset. Note that this involves re
 | 
			
		||||
<summary>Python SDK: Add a tag to a dataset at the top-level</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_tag.py
 | 
			
		||||
{{ inline examples/library/dataset_add_tag.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_tag.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -75,8 +73,7 @@ Here is an example of adding a term to a dataset. Note that this involves readin
 | 
			
		||||
<summary>Python SDK: Add a term to a dataset at the top-level</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_term.py
 | 
			
		||||
{{ inline examples/library/dataset_add_term.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_term.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -91,8 +88,7 @@ Here is an example of how you can add a tag to a field in a dataset using the lo
 | 
			
		||||
<summary>Python SDK: Add a tag to a column (field) of a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_column_term.py
 | 
			
		||||
{{ inline examples/library/dataset_add_column_term.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_column_term.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -101,8 +97,7 @@ Similarly, here is an example of how you would add a term to a field in a datase
 | 
			
		||||
<summary>Python SDK: Add a term to a column (field) of a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_column_term.py
 | 
			
		||||
{{ inline examples/library/dataset_add_column_term.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_column_term.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -118,8 +113,7 @@ The following script shows you how to add an owner to a dataset using the low-le
 | 
			
		||||
<summary>Python SDK: Add an owner to a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_owner.py
 | 
			
		||||
{{ inline examples/library/dataset_add_owner.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_owner.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -130,8 +124,7 @@ Fine-grained lineage at field level can be associated to a dataset in two ways -
 | 
			
		||||
<summary>Python SDK: Add fine-grained lineage to a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py
 | 
			
		||||
{{ inline examples/library/lineage_emitter_dataset_finegrained.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -139,8 +132,7 @@ Fine-grained lineage at field level can be associated to a dataset in two ways -
 | 
			
		||||
<summary>Python SDK: Add fine-grained lineage to a datajob</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py
 | 
			
		||||
{{ inline examples/library/lineage_emitter_datajob_finegrained.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
@ -336,8 +328,7 @@ Here is a simple script that shows you how to add documentation for a dataset in
 | 
			
		||||
<summary>Python SDK: Add documentation, links to a dataset</summary>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# inlined from metadata-ingestion/examples/library/dataset_add_documentation.py
 | 
			
		||||
{{ inline examples/library/dataset_add_documentation.py }}
 | 
			
		||||
{{ inline /metadata-ingestion/examples/library/dataset_add_documentation.py show_path_as_comment }}
 | 
			
		||||
```
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user