haystack/.github/utils/convert_notebooks_into_webpages.py
Sara Zan 2d65c380f1
pre-commit hooks (#2819)
* Add pre-commit config

* update contributing guidelines

* try failing the workflow

* add pre-commit to the deps

* updating uninstall instructions

* separate jobs in CI

* make tutorials check fail

* make black check fail

* make openapi check fail

* make yaml schema and api docs checks fail

* highlight the instructions

* Update .pre-commit-config.yaml

Co-authored-by: Tobias Wochinger <mail@tobias-wochinger.de>

* Update CONTRIBUTING.md

Co-authored-by: Tobias Wochinger <mail@tobias-wochinger.de>

* Update CONTRIBUTING.md

Co-authored-by: Tobias Wochinger <mail@tobias-wochinger.de>

* Use black --check

* Add images of the CI

* title level

* feedback

Co-authored-by: Tobias Wochinger <mail@tobias-wochinger.de>
2022-07-26 15:02:15 +02:00

187 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3
import re
from nbconvert import MarkdownExporter
import os
from pathlib import Path
headers = {
1: """<!---
title: "Tutorial 1"
metaTitle: "Build Your First QA System"
metaDescription: ""
slug: "/docs/tutorial1"
date: "2020-09-03"
id: "tutorial1md"
--->""",
2: """<!---
title: "Tutorial 2"
metaTitle: "Fine-tuning a model on your own data"
metaDescription: ""
slug: "/docs/tutorial2"
date: "2020-09-03"
id: "tutorial2md"
--->""",
3: """<!---
title: "Tutorial 3"
metaTitle: "Build a QA System Without Elasticsearch"
metaDescription: ""
slug: "/docs/tutorial3"
date: "2020-09-03"
id: "tutorial3md"
--->""",
4: """<!---
title: "Tutorial 4"
metaTitle: "Utilizing existing FAQs for Question Answering"
metaDescription: ""
slug: "/docs/tutorial4"
date: "2020-09-03"
id: "tutorial4md"
--->""",
5: """<!---
title: "Tutorial 5"
metaTitle: "Evaluation of a QA System"
metaDescription: ""
slug: "/docs/tutorial5"
date: "2020-09-03"
id: "tutorial5md"
--->""",
6: """<!---
title: "Tutorial 6"
metaTitle: "Better retrieval via Dense Passage Retrieval"
metaDescription: ""
slug: "/docs/tutorial6"
date: "2020-09-03"
id: "tutorial6md"
--->""",
7: """<!---
title: "Tutorial 7"
metaTitle: "Generative QA with RAG"
metaDescription: ""
slug: "/docs/tutorial7"
date: "2020-11-12"
id: "tutorial7md"
--->""",
8: """<!---
title: "Tutorial 8"
metaTitle: "Preprocessing"
metaDescription: ""
slug: "/docs/tutorial8"
date: "2021-01-08"
id: "tutorial8md"
--->""",
9: """<!---
title: "Tutorial 9"
metaTitle: "Training a Dense Passage Retrieval model"
metaDescription: ""
slug: "/docs/tutorial9"
date: "2021-01-08"
id: "tutorial9md"
--->""",
10: """<!---
title: "Tutorial 10"
metaTitle: "Knowledge Graph QA"
metaDescription: ""
slug: "/docs/tutorial10"
date: "2021-04-06"
id: "tutorial10md"
--->""",
11: """<!---
title: "Tutorial 11"
metaTitle: "Pipelines"
metaDescription: ""
slug: "/docs/tutorial11"
date: "2021-04-06"
id: "tutorial11md"
--->""",
12: """<!---
title: "Tutorial 12"
metaTitle: "Generative QA with LFQA"
metaDescription: ""
slug: "/docs/tutorial12"
date: "2021-04-06"
id: "tutorial12md"
--->""",
13: """<!---
title: "Tutorial 13"
metaTitle: "Question Generation"
metaDescription: ""
slug: "/docs/tutorial13"
date: "2021-08-23"
id: "tutorial13md"
--->""",
14: """<!---
title: "Tutorial 14"
metaTitle: "Query Classifier Tutorial"
metaDescription: ""
slug: "/docs/tutorial14"
date: "2021-08-23"
id: "tutorial14md"
--->""",
15: """<!---
title: "Tutorial 15"
metaTitle: "TableQA Tutorial"
metaDescription: ""
slug: "/docs/tutorial15"
date: "2021-10-28"
id: "tutorial15md"
--->""",
16: """<!---
title: "Tutorial 16"
metaTitle: "DocumentClassifier at Index Time Tutorial"
metaDescription: ""
slug: "/docs/tutorial16"
date: "2021-11-05"
id: "tutorial16md"
--->""",
17: """<!---
title: "Tutorial 17"
metaTitle: "Audio Tutorial"
metaDescription: ""
slug: "/docs/tutorial17"
date: "2022-06-15"
id: "tutorial17md"
--->""",
18: """<!---
title: "Tutorial 18"
metaTitle: "GPL Domain Adaptation"
metaDescription: ""
slug: "/docs/tutorial18"
date: "2022-06-22"
id: "tutorial18md"
--->""",
}
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
test = [atoi(c) for c in re.split("(\d+)", text)]
return test
dir = Path(__file__).parent.parent.parent / "tutorials"
notebooks = [x for x in os.listdir(dir) if x[-6:] == ".ipynb"]
# sort notebooks based on numbers within name of notebook
notebooks = sorted(notebooks, key=lambda x: natural_keys(x))
e = MarkdownExporter(exclude_output=True)
for i, nb in enumerate(notebooks):
body, resources = e.from_filename(dir / nb)
print(f"Processing {dir}/{nb}")
tutorials_path = Path(__file__).parent.parent.parent / "docs" / "_src" / "tutorials" / "tutorials"
with open(tutorials_path / f"{i + 1}.md", "w", encoding="utf-8") as f:
try:
f.write(headers[i + 1] + "\n\n")
except IndexError as e:
raise IndexError(
"Can't find the header for this tutorial. Have you added it in '.github/utils/convert_notebooks_into_webpages.py'?"
)
f.write(body)