diff --git a/docs-website/README.md b/docs-website/README.md index 9621fcfc33..a8b5702bc6 100644 --- a/docs-website/README.md +++ b/docs-website/README.md @@ -14,8 +14,8 @@ yarn install # This command starts a local development server and open up a browser window. ../gradlew yarnStart -# Every time a markdown file is changed, update the site: -# If a more complex docs site change is made, you'll need to restart the server. +# Every time a markdown file is changed, update the site by running this in a separate terminal. +# If you're making changes to the docusaurus config, you'll still need to restart the server. ../gradlew fastReload ``` @@ -27,14 +27,6 @@ yarn install This command generates static content into the `dist` directory and can be served using any static contents hosting service. You can preview the built static site using `../gradlew serve`, although we're recommend using the local development instructions locally. -## Generating GraphQL API Docs - -To regenerate GraphQL API docs, simply rebuild the docs-website directory. - -```console -./gradlew docs-website:build -``` - ## Managing Content Please use the following steps when adding/managing content for the docs site. @@ -138,13 +130,35 @@ The purpose of this section is to provide developers & technical users with conc This section aims to provide plain-language feature overviews for both technical and non-technical readers alike. +## Docs Generation Features + +**Includes all markdown files** + +By default, all markdown files in the repository will be included in the docs site. +However, you can exclude files by modifying the `filter_patterns` array in `generateDocsDir.ts`. + +Any file that is included in our docs site should be linked to from the sidebar. +You can suppress this check by adding the path to the file in a comment in `sidebar.js`: + +**Inline Code Snippets** + +Use an "inline" directive to include code snippets from other files. The `show_path_as_comment` option will include the path to the file as a comment at the top of the snippet. + + ```python + {{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }} + ``` + + ## Docs site generation process This process is orchestrated by a combination of Gradle and Yarn tasks. The main entrypoint is via the `docs-website:yarnGenerate` task, which in turn eventually runs `yarn run generate`. Steps: 1. Generate the GraphQL combined schema using the gradle's `docs-website:generateGraphQLSchema` task. This generates `./graphql/combined.graphql`. -2. Run `yarn run _generate-graphql` to produce some markdown in the `./docs` directory. -3. Run the `generateDocsDir.ts` script to add the remaining markdown files to the `./docs` directory. -4. Run a copy or rsync to copy the `./docs` directory to `./genDocs`. -5. The docusaurus build process will then use the `./genDocs` directory as the source for the docs site. +2. Generate docs for ingestion sources using the `:metadata-ingestion:docGen` gradle task. +3. Generate docs for our metadata model using the `:metadata-ingestion:modelDocGen` gradle task. +4. Run `yarn run _generate-graphql` to produce some markdown in the `./docs` directory. +5. Run `yarn run _generate-python-sdk` to generate the Python SDK reference docs in the `./docs` directory. +6. Run the `generateDocsDir.ts` script to add markdown files from elsewhere in our repo to the `./docs` directory. +7. Run a copy or rsync to copy the `./docs` directory to `./genDocs`, and delete the `./docs` directory. +8. The docusaurus build process will then use the `./genDocs` directory as the source for the docs site. diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index 66594ea353..fd010fd9c2 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -405,6 +405,37 @@ function markdown_enable_specials( contents.content = new_content; } +function markdown_process_inline_directives( + contents: matter.GrayMatterFile, + filepath: string +): void { + const new_content = contents.content.replace( + /^{{\s+inline\s+(\S+)\s+(show_path_as_comment\s+)?\s*}}$/gm, + (_, inline_file_path: string, show_path_as_comment: string) => { + if (!inline_file_path.startsWith("/")) { + throw new Error(`inline path must be absolute: ${inline_file_path}`); + } + + console.log(`Inlining ${inline_file_path} into ${filepath}`); + const referenced_file = fs.readFileSync( + path.join("..", inline_file_path), + "utf8" + ); + + // TODO: Add support for start_after_line and end_before_line arguments + // that can be used to limit the inlined content to a specific range of lines. + let new_contents = ""; + if (show_path_as_comment) { + new_contents += `# Inlined from ${inline_file_path}\n`; + } + new_contents += referenced_file; + + return new_contents; + } + ); + contents.content = new_content; +} + function markdown_sanitize_and_linkify(content: string): string { // MDX escaping content = content.replace(/ str: - inline_pattern = re.compile(r"{{ inline (.*) }}") - pos = 0 - content_swap_register = {} - while inline_pattern.search(markdown_contents, pos=pos): - match = inline_pattern.search(markdown_contents, pos=pos) - assert match - file_name = match.group(1) - with open(file_name, "r") as fp: - inline_content = fp.read() - content_swap_register[match.span()] = inline_content - pos = match.span()[1] - processed_markdown = "" - cursor = 0 - for (start, end) in content_swap_register: - processed_markdown += ( - markdown_contents[cursor:start] + content_swap_register[(start, end)] - ) - cursor = end - processed_markdown += markdown_contents[cursor:] - return processed_markdown - - @click.command() @click.argument("schemas_root", type=click.Path(exists=True), required=True) @click.option("--registry", type=click.Path(exists=True), required=True) @@ -616,8 +593,7 @@ def generate( entity_name = m.group(1) with open(path, "r") as doc_file: file_contents = doc_file.read() - final_markdown = preprocess_markdown(file_contents) - entity_extra_docs[entity_name] = final_markdown + entity_extra_docs[entity_name] = file_contents # registry file load_registry_file(registry) diff --git a/metadata-models/docs/entities/assertion.md b/metadata-models/docs/entities/assertion.md index a1726cc76e..fa30c2dc7b 100644 --- a/metadata-models/docs/entities/assertion.md +++ b/metadata-models/docs/entities/assertion.md @@ -27,8 +27,7 @@ Evaluation status and results for an assertion tracked over time. Python SDK: Emit assertion info and results for dataset ```python -# inlined from examples/library/data_quality_mcpw_rest.py -{{ inline examples/library/data_quality_mcpw_rest.py }} +{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }} ``` diff --git a/metadata-models/docs/entities/dataset.md b/metadata-models/docs/entities/dataset.md index 5e984cc5b7..57bbf21943 100644 --- a/metadata-models/docs/entities/dataset.md +++ b/metadata-models/docs/entities/dataset.md @@ -45,8 +45,7 @@ The following code snippet shows you how to add a Schema containing 3 fields to Python SDK: Add a schema to a dataset ```python -# inlined from metadata-ingestion/examples/library/dataset_schema.py -{{ inline examples/library/dataset_schema.py }} +{{ inline /metadata-ingestion/examples/library/dataset_schema.py show_path_as_comment }} ``` @@ -65,8 +64,7 @@ Here is an example for how to add a tag to a dataset. Note that this involves re Python SDK: Add a tag to a dataset at the top-level ```python -# inlined from metadata-ingestion/examples/library/dataset_add_tag.py -{{ inline examples/library/dataset_add_tag.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_tag.py show_path_as_comment }} ``` @@ -75,8 +73,7 @@ Here is an example of adding a term to a dataset. Note that this involves readin Python SDK: Add a term to a dataset at the top-level ```python -# inlined from metadata-ingestion/examples/library/dataset_add_term.py -{{ inline examples/library/dataset_add_term.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_term.py show_path_as_comment }} ``` @@ -91,8 +88,7 @@ Here is an example of how you can add a tag to a field in a dataset using the lo Python SDK: Add a tag to a column (field) of a dataset ```python -# inlined from metadata-ingestion/examples/library/dataset_add_column_term.py -{{ inline examples/library/dataset_add_column_term.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_column_term.py show_path_as_comment }} ``` @@ -101,8 +97,7 @@ Similarly, here is an example of how you would add a term to a field in a datase Python SDK: Add a term to a column (field) of a dataset ```python -# inlined from metadata-ingestion/examples/library/dataset_add_column_term.py -{{ inline examples/library/dataset_add_column_term.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_column_term.py show_path_as_comment }} ``` @@ -118,8 +113,7 @@ The following script shows you how to add an owner to a dataset using the low-le Python SDK: Add an owner to a dataset ```python -# inlined from metadata-ingestion/examples/library/dataset_add_owner.py -{{ inline examples/library/dataset_add_owner.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_owner.py show_path_as_comment }} ``` @@ -130,8 +124,7 @@ Fine-grained lineage at field level can be associated to a dataset in two ways - Python SDK: Add fine-grained lineage to a dataset ```python -# inlined from metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py -{{ inline examples/library/lineage_emitter_dataset_finegrained.py }} +{{ inline /metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py show_path_as_comment }} ``` @@ -139,8 +132,7 @@ Fine-grained lineage at field level can be associated to a dataset in two ways - Python SDK: Add fine-grained lineage to a datajob ```python -# inlined from metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py -{{ inline examples/library/lineage_emitter_datajob_finegrained.py }} +{{ inline /metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py show_path_as_comment }} ``` @@ -336,8 +328,7 @@ Here is a simple script that shows you how to add documentation for a dataset in Python SDK: Add documentation, links to a dataset ```python -# inlined from metadata-ingestion/examples/library/dataset_add_documentation.py -{{ inline examples/library/dataset_add_documentation.py }} +{{ inline /metadata-ingestion/examples/library/dataset_add_documentation.py show_path_as_comment }} ```