Roman/notion tests (#1072)

### Description
* Add ingest test for Notion docs
* Update default cache dir for connectors to include connector name.
Makes debugging the cached content easier.

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com>
This commit is contained in:
Roman Isecke 2023-08-21 15:16:50 -04:00 committed by GitHub
parent a35ff890e0
commit db8af4f5de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 3235 additions and 84 deletions

View File

@ -287,6 +287,7 @@ jobs:
SHAREPOINT_SITE: ${{secrets.SHAREPOINT_SITE}}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
run: |
source .venv/bin/activate
sudo apt-get update
@ -311,6 +312,7 @@ jobs:
make install-ingest-outlook
make install-ingest-slack
make install-ingest-wikipedia
make install-ingest-notion
./test_unstructured_ingest/test-ingest.sh
test_unstructured_api_unit:

View File

@ -78,6 +78,7 @@ jobs:
SHAREPOINT_SITE: ${{secrets.SHAREPOINT_SITE}}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
OVERWRITE_FIXTURES: "true"
run: |
source .venv/bin/activate
@ -103,6 +104,7 @@ jobs:
make install-ingest-outlook
make install-ingest-slack
make install-ingest-wikipedia
make install-ingest-notion
./test_unstructured_ingest/test-ingest.sh
- name: Save branch name to environment file

View File

@ -67,6 +67,7 @@
* Update `partition_xlsx` to always use `soupparser_fromstring` to parse `html text`
* Add functionality to switch `html` text parser based on whether the `html` text contains emoji
* Add functionality to check if a string contains any emoji characters
* Add CI tests around Notion
### Features
@ -91,6 +92,8 @@
- better caching of models
- another version of detectron2 available, though the default layout model is unchanged
* Added UUID option for element_id
* Added UUID option for element_id
* CI improvements to run ingest tests in parallel
### Features

View File

@ -21,11 +21,18 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
pip install --no-cache -r requirements/test.txt && \
pip install --no-cache -r requirements/huggingface.txt && \
pip install --no-cache -r requirements/dev.txt && \
pip install --no-cache -r requirements/ingest-azure.txt && \
pip install --no-cache -r requirements/ingest-box.txt && \
pip install --no-cache -r requirements/ingest-confluence.txt && \
pip install --no-cache -r requirements/ingest-discord.txt && \
pip install --no-cache -r requirements/ingest-dropbox.txt && \
pip install --no-cache -r requirements/ingest-elasticsearch.txt && \
pip install --no-cache -r requirements/ingest-gcs.txt && \
pip install --no-cache -r requirements/ingest-github.txt && \
pip install --no-cache -r requirements/ingest-gitlab.txt && \
pip install --no-cache -r requirements/ingest-google-drive.txt && \
pip install --no-cache -r requirements/ingest-notion.txt && \
pip install --no-cache -r requirements/ingest-onedrive.txt && \
pip install --no-cache -r requirements/ingest-outlook.txt && \
pip install --no-cache -r requirements/ingest-reddit.txt && \
pip install --no-cache -r requirements/ingest-s3.txt && \
pip install --no-cache -r requirements/ingest-slack.txt && \

View File

@ -132,6 +132,10 @@ install-ingest-discord:
install-ingest-github:
python3 -m pip install -r requirements/ingest-github.txt
.PHONY: install-ingest-biomed
install-ingest-biomed:
python3 -m pip install -r requirements/ingest-biomed.txt
.PHONY: install-ingest-gitlab
install-ingest-gitlab:
python3 -m pip install -r requirements/ingest-gitlab.txt
@ -172,6 +176,14 @@ install-ingest-airtable:
install-ingest-sharepoint:
python3 -m pip install -r requirements/ingest-sharepoint.txt
.PHONY: install-ingest-local
install-ingest-local:
echo "no unique dependencies for local connector"
.PHONY: install-ingest-notion
install-ingest-notion:
python3 -m pip install -r requirements/ingest-notion.txt
.PHONY: install-unstructured-inference
install-unstructured-inference:
python3 -m pip install -r requirements/local-inference.txt
@ -211,6 +223,7 @@ pip-compile:
# sphinx docs looks for additional requirements
cp requirements/build.txt docs/requirements.txt
pip-compile --upgrade requirements/ingest-s3.in
pip-compile --upgrade requirements/ingest-biomed.in
pip-compile --upgrade requirements/ingest-box.in
pip-compile --upgrade requirements/ingest-gcs.in
pip-compile --upgrade requirements/ingest-dropbox.in

View File

@ -0,0 +1,3 @@
-c constraints.in
-c base.txt
bs4

View File

@ -0,0 +1,12 @@
#
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile requirements/ingest-biomed.in
#
beautifulsoup4==4.12.2
# via bs4
bs4==0.0.1
# via -r requirements/ingest-biomed.in
soupsieve==2.4.1
# via beautifulsoup4

View File

@ -2,3 +2,4 @@
-c base.txt
gcsfs
fsspec
bs4

View File

@ -12,6 +12,10 @@ async-timeout==4.0.3
# via aiohttp
attrs==23.1.0
# via aiohttp
beautifulsoup4==4.12.2
# via bs4
bs4==0.0.1
# via -r requirements/ingest-gcs.in
cachetools==5.3.1
# via google-auth
certifi==2023.7.22
@ -94,6 +98,8 @@ rsa==4.9
# via google-auth
six==1.16.0
# via google-auth
soupsieve==2.4.1
# via beautifulsoup4
urllib3==1.26.16
# via
# -c requirements/base.txt

View File

@ -2,3 +2,4 @@
-c base.txt
msal
Office365-REST-Python-Client<2.4.3
bs4

View File

@ -4,6 +4,12 @@
#
# pip-compile requirements/ingest-onedrive.in
#
beautifulsoup4==4.12.2
# via
# -c requirements/base.txt
# bs4
bs4==0.0.1
# via -r requirements/ingest-onedrive.in
certifi==2023.7.22
# via
# -c requirements/base.txt
@ -42,6 +48,10 @@ requests==2.31.0
# -c requirements/base.txt
# msal
# office365-rest-python-client
soupsieve==2.4.1
# via
# -c requirements/base.txt
# beautifulsoup4
urllib3==1.26.16
# via
# -c requirements/base.txt

View File

@ -125,6 +125,7 @@ setup(
# Extra requirements for data connectors
"s3": load_requirements("requirements/ingest-s3.in"),
"azure": load_requirements("requirements/ingest-azure.in"),
"biomed": load_requirements("requirements/ingest-biomed.in"),
"discord": load_requirements("requirements/ingest-discord.in"),
"github": load_requirements("requirements/ingest-github.in"),
"gitlab": load_requirements("requirements/ingest-gitlab.in"),

View File

@ -0,0 +1,981 @@
[
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "7b544ee99a84930c8049d5c91f8e7541",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "New Page"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "a3bc48c9c0c00bd86bfcefcb833d3fd4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Morale Events"
},
{
"type": "Title",
"element_id": "d3ad1f1f8c9c4f5a4a593571085513a4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Policies"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Policies"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "9eca9d6f69bb98c4ec616c4aec38d0d2",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T19:02:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "c502fd59c2cdff4881f98c3ce019dc77",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "New Page With Verification"
},
{
"type": "UncategorizedText",
"element_id": "20079ac60749535ee21512b3091a61e0",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"expired"
],
"emphasized_text_tags": [
"span"
]
},
"text": "expired\n \n \n Roman Isecke\n \n \n 2023-08-04T04:00:00.000Z - 2023-08-11T04:00:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "c911244e369f9ee203656a820c260e4d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Vacation Policy"
},
{
"type": "Title",
"element_id": "d3ad1f1f8c9c4f5a4a593571085513a4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Policies"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Policies"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "94efbf7307081f8f45b11a183ad99254",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Mission, Vision, Values"
},
{
"type": "UncategorizedText",
"element_id": "575d595cf4830f838cc79edf3a4bd5fc",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Vision",
"Company Updates"
],
"emphasized_text_tags": [
"span",
"span"
]
},
"text": "Vision\n \n \n Company Updates"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "b2d356b3e28717647c73b8767da6c485",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Recent Press"
},
{
"type": "Title",
"element_id": "67538900b235164b3f1debd8a8d80b44",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Company Updates"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Company Updates"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "d00eca1bae6742803906ab42a831e8b5",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Getting Started"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "a9e87d3147c54fd5fa061709e15ed0bf",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-17T18:48:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "5687503bd741f54090d4c0557c0eea1a",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Page with every block"
},
{
"type": "UncategorizedText",
"element_id": "d7501f757bf490f053005b707829e343",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Company Updates",
"Policies"
],
"emphasized_text_tags": [
"span",
"span"
]
},
"text": "Company Updates\n \n \n Policies"
},
{
"type": "UncategorizedText",
"element_id": "aad0d1a0dbac83ea1906db66ecbff086",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"verified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "verified\n \n \n Roman Isecke\n \n \n 2023-08-04T04:00:00.000Z - 2023-11-02T04:00:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "b2c1cf36a9b45cdefac07d1899b96ff1",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Corporate Travel"
},
{
"type": "Title",
"element_id": "d3ad1f1f8c9c4f5a4a593571085513a4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Policies"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Policies"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "d8d2a2140ba63413c452dbefe499f90b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-04T18:31:00.000Z"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
],
"emphasized_text_contents": [
"Roman Isecke"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Roman Isecke"
},
{
"type": "Title",
"element_id": "8bcdb5d9bc2bda33af04bae4495f5e37",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Benefits Policies"
},
{
"type": "Title",
"element_id": "d3ad1f1f8c9c4f5a4a593571085513a4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Policies"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Policies"
},
{
"type": "Title",
"element_id": "97b7e2db799e2b79e65f418b42a7d305",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:44:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"unverified"
],
"emphasized_text_tags": [
"span"
]
},
"text": "unverified"
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "7b544ee99a84930c8049d5c91f8e7541",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "New Page"
},
{
"type": "NarrativeText",
"element_id": "be23aed1a36d4a5aa33b4dc454eff351",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: When creating a page, it's important to give it a clear title and provide some content. This could include verifying the information, summarizing the topic, or sharing your thoughts and opinions on something that matters to you."
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "a3bc48c9c0c00bd86bfcefcb833d3fd4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Morale Events"
},
{
"type": "NarrativeText",
"element_id": "89f7608e949e257e04b601964f7fab2d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: Morale events increase employee satisfaction, motivation, and well-being, while promoting community and teamwork, resulting in higher productivity and retention rates."
}
]

View File

@ -0,0 +1,41 @@
[
{
"type": "Title",
"element_id": "c502fd59c2cdff4881f98c3ce019dc77",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T19:02:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "New Page With Verification"
},
{
"type": "NarrativeText",
"element_id": "be23aed1a36d4a5aa33b4dc454eff351",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T19:02:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: When creating a page, it's important to give it a clear title and provide some content. This could include verifying the information, summarizing the topic, or sharing your thoughts and opinions on something that matters to you."
},
{
"type": "NarrativeText",
"element_id": "b086a1d1099369a71964546dc89b2323",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T19:02:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: An owner of a page can verify it by clicking on the verification button above and choosing to verify the page for either a set amount of time or indefinitely!"
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "c911244e369f9ee203656a820c260e4d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Vacation Policy"
},
{
"type": "NarrativeText",
"element_id": "94bc9e2e465cfac3060a7f7ab8082e89",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: Vacation policies are crucial for employee well-being and productivity. They provide rest and recharge, reduce burnout and increase job satisfaction."
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "94efbf7307081f8f45b11a183ad99254",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Mission, Vision, Values"
},
{
"type": "NarrativeText",
"element_id": "f116dc480f737022b3eef55d2095d808",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: A company mission provides direction and purpose, aligning actions and decisions towards a common goal. It also helps attract like-minded individuals who share the same values and vision for the company."
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "b2d356b3e28717647c73b8767da6c485",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Recent Press"
},
{
"type": "NarrativeText",
"element_id": "22f92b2ebdefec36664fc1cb69221f2b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: Telling employees about news about your company is important because it helps them stay informed about the direction of the company and their role in it."
}
]

View File

@ -0,0 +1,15 @@
[
{
"type": "Title",
"element_id": "18e350f89256491ebe1f8cce73a45231",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Sprint 3"
}
]

View File

@ -0,0 +1,293 @@
[
{
"type": "Title",
"element_id": "d00eca1bae6742803906ab42a831e8b5",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Getting Started"
},
{
"type": "NarrativeText",
"element_id": "be23aed1a36d4a5aa33b4dc454eff351",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: When creating a page, it's important to give it a clear title and provide some content. This could include verifying the information, summarizing the topic, or sharing your thoughts and opinions on something that matters to you."
},
{
"type": "Title",
"element_id": "a5e729fb76c8c30039cbdb4c1a1f631f",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "The Basics"
},
{
"type": "Title",
"element_id": "c0afdd6fd0720c2d378ca3ea772d5746",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Create a Page"
},
{
"type": "NarrativeText",
"element_id": "6f7bd5b1aa870cb1bc65673b13c4f443",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Workspace"
],
"emphasized_text_tags": [
"b"
]
},
"text": "\n In your sidebar, click the \n "
},
{
"type": "Title",
"element_id": "a3e7821cf681efe356a3725fb74e5afb",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Headings"
},
{
"type": "NarrativeText",
"element_id": "de4d0201434c605453f8882bc60ed14f",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "You can add headings and subheadings in one of two ways:"
},
{
"type": "ListItem",
"element_id": "e29f621fe1abe6865d3a18f3b9366bbb",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Type \n \n /heading\n \n or \n \n /h1\n \n , \n \n /h2\n \n , or \n \n /h3\n \n to choose the heading size you want."
},
{
"type": "ListItem",
"element_id": "9ff65a4dda597336f4b5800282db7239",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Use Markdown shortcuts, like \n \n #\n \n , \n \n ##\n \n , and \n \n ###\n \n ."
},
{
"type": "ListItem",
"element_id": "a64d13d5e52c832a2b5f7b5f6cfefc09",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Create inline code by wrapping text with \n \n `\n \n (or with the shortcut \n \n cmd/ctrl + e\n \n )."
},
{
"type": "Title",
"element_id": "8b218d87a7b0992b03cbba22ed07d466",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Toggle Lists"
},
{
"type": "NarrativeText",
"element_id": "de273473055b8b700d97642a29e6a787",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Toggle lists streamline your content. Click the arrow to open."
},
{
"type": "Title",
"element_id": "660cca99d9a1ad75e7c0d12644c68d71",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Callout Blocks"
},
{
"type": "Title",
"element_id": "60d3ab7e0452a72d68e57a722376ab19",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Code Blocks"
},
{
"type": "NarrativeText",
"element_id": "7b82449d1e44001326797d90184eae33",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "You can add code notation to any Notion page:"
},
{
"type": "ListItem",
"element_id": "66d838acacfc8d48cdd7714882dfc4ef",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Your teammates can select any code to comment on it."
},
{
"type": "Title",
"element_id": "d7b5c2955f161528fd051e6273cfe1aa",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Organizing Pages"
},
{
"type": "NarrativeText",
"element_id": "0a296a1b32283aa06e695312b049892b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "\n Instead of using folders, Notion lets you nest pages inside pages. Type \n "
},
{
"type": "Title",
"element_id": "54844ea8733bd62ab0d7c3b35bde1bc2",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Advanced Techniques"
},
{
"type": "NarrativeText",
"element_id": "b8736e38d7aeb7d60b821eb715ac9bbc",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/notion/Notion-editor-101-create-and-edit-68c7c67047494fdb87d50185429df93e"
],
"link_texts": [
"\n Notion Editor 101\n "
]
},
"text": "\n Check out this \n "
},
{
"type": "UncategorizedText",
"element_id": "23b9158a48adb937cd37eae997f2dbe6",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/notion/Notion-editor-101-create-and-edit-68c7c67047494fdb87d50185429df93e"
],
"link_texts": [
"\n Notion Editor 101\n "
]
},
"text": "Notion Editor 101\n \n guide for more advanced tips and how-to's."
}
]

View File

@ -0,0 +1,704 @@
[
{
"type": "Title",
"element_id": "5687503bd741f54090d4c0557c0eea1a",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Page with every block"
},
{
"type": "NarrativeText",
"element_id": "197dece4986f325b5d51a9e9d50eb0a6",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: Tag pages to let collaborators know what they can expect to use the page for. You can add one or many tags to any page in a wiki."
},
{
"type": "Title",
"element_id": "3291955e900530da1940bdce97fa0f94",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Heading 2"
},
{
"type": "NarrativeText",
"element_id": "74ee7ebc5b884a20b9995eef3f2e90bb",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "This is some new text"
},
{
"type": "NarrativeText",
"element_id": "0e417d4322de881c074174e590eb664f",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"/9ba4d6da8a574cfc81ebceac1fde52bd"
],
"link_texts": [
"\n text\n "
],
"emphasized_text_contents": [
"formatted"
],
"emphasized_text_tags": [
"b"
]
},
"text": "\n Some/less → \n more\n \n "
},
{
"type": "UncategorizedText",
"element_id": "e302f58e2f42e9bcf16d78f5829ac32d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"/9ba4d6da8a574cfc81ebceac1fde52bd"
],
"link_texts": [
"\n text\n "
]
},
"text": "text\n \n with other"
},
{
"type": "UncategorizedText",
"element_id": "8864784f943d9f832a3dce22ef8bcf01",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"content"
],
"emphasized_text_tags": [
"b"
]
},
"text": "c1r1 \n \n content"
},
{
"type": "UncategorizedText",
"element_id": "6f75c9d2993dbb3981c019741c7962a9",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "c2r1 table \n 2023-08-08T09:00:00.000-04:00\n cell"
},
{
"type": "Title",
"element_id": "5687503bd741f54090d4c0557c0eea1a",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/c47a45664c7a488bac2a1292ee507fcb"
],
"link_texts": [
"\n Page with every block \n "
]
},
"text": "Page with every block"
},
{
"type": "UncategorizedText",
"element_id": "13686520a51e25584bb06ab189b38552",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"/122b2c22996b435b9de2ee0e9d2b04bc"
],
"link_texts": [
"\n content\n "
]
},
"text": "c1r2 more \n \n content"
},
{
"type": "UncategorizedText",
"element_id": "cf236cfe4b4c0ef644c37b4e491a4aa8",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"cell"
],
"emphasized_text_tags": [
"span"
]
},
"text": "c2r2 table \n \n cell"
},
{
"type": "Title",
"element_id": "f59ab8d1331b7b16952fbd388258f856",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/9ba4d6da8a574cfc81ebceac1fde52bd"
],
"link_texts": [
"\n Untitled\n "
]
},
"text": "Untitled"
},
{
"type": "NarrativeText",
"element_id": "7d96ce60a66271ef79da4c492ca7db8a",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "this is some green text"
},
{
"type": "NarrativeText",
"element_id": "2d77a706008eebaf1f7c4e116bbe08b4",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"an",
"equation"
],
"emphasized_text_tags": [
"b",
"b"
]
},
"text": "this is \n \n an \n \n \n equation"
},
{
"type": "Title",
"element_id": "f59ab8d1331b7b16952fbd388258f856",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/a1a5dff426f34b8f9a709d51b2a00c73"
],
"link_texts": [
"\n Untitled\n "
]
},
"text": "Untitled"
},
{
"type": "UncategorizedText",
"element_id": "7e921a403f1840728e2887990cfe640d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"text2"
],
"emphasized_text_tags": [
"i"
]
},
"text": "text1\n\n\n \n text2\n \n \n\nMultiline cell"
},
{
"type": "Title",
"element_id": "7013d5bb5a17e0e782e8971e23640bdb",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Another cell"
},
{
"type": "Title",
"element_id": "f59ab8d1331b7b16952fbd388258f856",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://www.notion.so/84002066546448d0a030aa79b8d400b0"
],
"link_texts": [
"\n Untitled\n "
]
},
"text": "Untitled"
},
{
"type": "UncategorizedText",
"element_id": "07d8cee827eb828e2a5eb7de833bbbe5",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "E = {mc^2}"
},
{
"type": "ListItem",
"element_id": "cc62f6c7b2a82fd1677dd9f8bd7d22a0",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Numbered list"
},
{
"type": "ListItem",
"element_id": "6834fb0bd1686c896a94d373f3b4b775",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "A number child"
},
{
"type": "ListItem",
"element_id": "ca680621d39d5b8acce02dfc6a97a722",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "A number grandchild"
},
{
"type": "ListItem",
"element_id": "87765da6ccf0668238c1d27c35692e11",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "great"
},
{
"type": "ListItem",
"element_id": "5fe0c7e554be4e8a7c481fda450b4891",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "super great"
},
{
"type": "ListItem",
"element_id": "e3b0c44298fc1c149afbf4c8996fb924",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": ""
},
{
"type": "ListItem",
"element_id": "49e55c65ff5dc6e829e6ecc0c70ebadd",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "with test text"
},
{
"type": "ListItem",
"element_id": "fb92b200afd22befce69ed16445527cf",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Bullet one"
},
{
"type": "ListItem",
"element_id": "0428d7ddb4428600794364fabf9f4c1d",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "A child bullet"
},
{
"type": "ListItem",
"element_id": "fa1e5246d82a38dbdfc9190a05a99dfd",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "A grandchild bullet"
},
{
"type": "ListItem",
"element_id": "87765da6ccf0668238c1d27c35692e11",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "great"
},
{
"type": "ListItem",
"element_id": "5fe0c7e554be4e8a7c481fda450b4891",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "super great"
},
{
"type": "ListItem",
"element_id": "8adaf826c6c8658a2bebd16af3130ebe",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Bullet two"
},
{
"type": "NarrativeText",
"element_id": "2ee54c373bd30284b127eba65fd6b949",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "I quote myself testings Notion"
},
{
"type": "NarrativeText",
"element_id": "20c9899ff94a00676943e33f204aa191",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2,
"link_urls": [
"https://www.notion.so/icons/airplane_brown.svg"
],
"link_texts": [
"\n https://www.notion.so/icons/airplane_brown.svg\n "
]
},
"text": "https://www.notion.so/icons/airplane_brown.svg\n \n I call this out"
},
{
"type": "Title",
"element_id": "9a9d50ec8631eafde241ce5b6991ebb9",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2,
"link_urls": [
"https://www.wikipedia.org/"
],
"link_texts": [
"\n https://www.wikipedia.org/\n "
]
},
"text": "https://www.wikipedia.org/"
},
{
"type": "Title",
"element_id": "9f92a0dd3dc5f16dc2f03319a9593bd8",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2,
"link_urls": [
"https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk"
],
"link_texts": [
"\n https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk\n "
]
},
"text": "https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk"
},
{
"type": "Title",
"element_id": "7b7f33d452d3a0e4110676710b2fa20c",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "Child Database:"
},
{
"type": "Title",
"element_id": "94c116ee118a72998db6cd10b586a9ef",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2,
"link_urls": [
"https://www.notion.so/d1fad658f1cf4eedb0b5ee72b9f0b530"
],
"link_texts": [
"\n Analytics\n "
]
},
"text": "Analytics"
},
{
"type": "Title",
"element_id": "f1807acca7417b3a1017baa0f6786223",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "Child Page:"
},
{
"type": "Title",
"element_id": "f59ab8d1331b7b16952fbd388258f856",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2,
"link_urls": [
"https://www.notion.so/9ba4d6da8a574cfc81ebceac1fde52bd"
],
"link_texts": [
"\n Untitled\n "
]
},
"text": "Untitled"
},
{
"type": "NarrativeText",
"element_id": "7416417e6b88ffae5fafbcb1f29adaa8",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "This is my code caption"
},
{
"type": "NarrativeText",
"element_id": "2263d8dd95ccfe1ad45d732c6eaaf59b",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "This is some text"
},
{
"type": "NarrativeText",
"element_id": "13a9d985856d4b7da7ce4cd1acf2f87c",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "This is text in next column"
},
{
"type": "Title",
"element_id": "5ebfbce3fcf25d957554aed984ab57ed",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "Final text in column"
},
{
"type": "NarrativeText",
"element_id": "700cbb1f3f71b5a21cda3fd79a9175e7",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "Heading 1 content"
},
{
"type": "UncategorizedText",
"element_id": "18905f6924829bb0d7ca89b5dfaa33ba",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-17T18:48:00.000Z"
},
"filetype": "text/html",
"page_number": 2
},
"text": "d3d87fc6-61cc-4bb5-89ed-e9dff0df1526"
}
]

View File

@ -0,0 +1,653 @@
[
{
"type": "UncategorizedText",
"element_id": "d6ec04f65fbb09dbefa4210ef201c9c0",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-14 - 2023-08-27"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
]
},
"text": "Roman Isecke"
},
{
"type": "UncategorizedText",
"element_id": "9dfe062b68f15b3623944bd8ebb71b24",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-02T20:36:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "dece647865149e5a86e06c1af7c64aa5",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-31"
},
{
"type": "EmailAddress",
"element_id": "3357b6f2cc3b8584f1b7e66afbb46d34",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "email@custom.domaine"
},
{
"type": "Title",
"element_id": "f6c286e4b3078307fc8ae3635b4b2f5b",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"DevOps-Bot"
],
"emphasized_text_tags": [
"span"
]
},
"text": "DevOps-Bot"
},
{
"type": "Title",
"element_id": "60a33e6cf5151f2d52eddae9685cfa27",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "False"
},
{
"type": "Title",
"element_id": "fae2db093e1dd31042e8ab9427e8673a",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Option 1"
],
"emphasized_text_tags": [
"span"
]
},
"text": "Option 1"
},
{
"type": "UncategorizedText",
"element_id": "6b51d431df5d7f141cbececcf79edf3d",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "12"
},
{
"type": "Title",
"element_id": "31eac5f6d8daefa258fc494a7e020bc8",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "SPRI1-2"
},
{
"type": "Title",
"element_id": "1ff57a29d7c9d11bdf61c1b80f2b289b",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Next"
},
{
"type": "Title",
"element_id": "dfcd7f16dd2d92ee4ec22516fb45abd6",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "notion://sprints/sprint_task_relation"
},
{
"type": "UncategorizedText",
"element_id": "188dc9ca72be97b25c9fff24f24ae74b",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-28 - 2023-09-10"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
]
},
"text": "Roman Isecke"
},
{
"type": "UncategorizedText",
"element_id": "9dfe062b68f15b3623944bd8ebb71b24",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-02T20:36:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "c9b64468a792bcbf76cdce6d7ecc3bb9",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-29T00:00:00.000-04:00 - 2023-08-31T00:00:00.000-04:00"
},
{
"type": "Title",
"element_id": "982d9e3eb996f559e633f4d194def376",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "text"
},
{
"type": "UncategorizedText",
"element_id": "f5cbeacfbddd0de7391bc723762001a6",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"/51243b9d55dc4131b2ae03eff1ef1783"
],
"link_texts": [
"\n link\n "
],
"emphasized_text_contents": [
"More",
"text",
"text",
"with",
"link"
],
"emphasized_text_tags": [
"span",
"span",
"span",
"span",
"span"
]
},
"text": "More \n \n \n \n text\n \n \n \n with \n \n \n \n link"
},
{
"type": "Title",
"element_id": "60a33e6cf5151f2d52eddae9685cfa27",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "False"
},
{
"type": "UncategorizedText",
"element_id": "710375baee13b41d02266bd01d5f6b34",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "45666645345465454"
},
{
"type": "Title",
"element_id": "41a49f786d133c212cf1a35177700394",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "option 1"
},
{
"type": "Title",
"element_id": "502bb591b927c74b9f12ef78df9d5b1b",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "SPRI1-3"
},
{
"type": "Title",
"element_id": "18e350f89256491ebe1f8cce73a45231",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Sprint 3"
},
{
"type": "Title",
"element_id": "61636cdef547228389f0260d1dbb952b",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Future"
},
{
"type": "Title",
"element_id": "dfcd7f16dd2d92ee4ec22516fb45abd6",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "notion://sprints/sprint_task_relation"
},
{
"type": "UncategorizedText",
"element_id": "a30a043314fa89294fa2c1c989a01fbb",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "0.25"
},
{
"type": "UncategorizedText",
"element_id": "3dee6959f0ef3a4e6147de48fc70a814",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-07-31 - 2023-08-13"
},
{
"type": "Title",
"element_id": "548b1cea7491191a12465d055db621f4",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100"
],
"link_texts": [
"\n Roman Isecke\n "
]
},
"text": "Roman Isecke"
},
{
"type": "UncategorizedText",
"element_id": "9dfe062b68f15b3623944bd8ebb71b24",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-02T20:36:00.000Z"
},
{
"type": "UncategorizedText",
"element_id": "7e4059ebb0ebf24caae1f12cb79b8c9c",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "2023-08-07"
},
{
"type": "EmailAddress",
"element_id": "1ae8f7599f4f616683d2a69d29658afa",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "roman@unstructured.io"
},
{
"type": "UncategorizedText",
"element_id": "5d2e9bcd00123dd21fc54731fef97129",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"https://lh3.googleusercontent.com/a/AAcHTtf2bisNRhNNHsC5OPcmNuCkfjmi4nmdyZxgYv27=s100",
"https://lh3.googleusercontent.com/a/AAcHTtdiriiUNnUcm1dkAp7cbmmQyeO-acsViQHFS9v0=s100"
],
"link_texts": [
"\n Roman Isecke\n ",
"\n Jason Scheirer\n "
],
"emphasized_text_contents": [
"Roman Isecke",
"Jason Scheirer"
],
"emphasized_text_tags": [
"span",
"span"
]
},
"text": "Roman Isecke\n \n \n \n \n Jason Scheirer"
},
{
"type": "NarrativeText",
"element_id": "495e614a2084bd7c40e34b0b69534e67",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"This is some",
"formatted",
"formatted",
"text"
],
"emphasized_text_tags": [
"span",
"span",
"b",
"span"
]
},
"text": "This is some \n \n \n \n formatted\n \n \n \n text"
},
{
"type": "Title",
"element_id": "3cbc87c7681f34db4617feaa2c880193",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "True"
},
{
"type": "UncategorizedText",
"element_id": "4b2e896ce5416db25c44f6918648d0f4",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"emphasized_text_contents": [
"Option 2",
"Option 1"
],
"emphasized_text_tags": [
"span",
"span"
]
},
"text": "Option 2\n \n \n Option 1"
},
{
"type": "UncategorizedText",
"element_id": "e29c9c180c6279b0b02abd6a1801c7c0",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "32"
},
{
"type": "UncategorizedText",
"element_id": "03ac674216f3e15c761ee1a5e255f067",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "1234"
},
{
"type": "Title",
"element_id": "209ef9fc4dfe2166bcf2460b80334276",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "option 2"
},
{
"type": "Title",
"element_id": "3fafa60b6782f5d52caf7be755d82232",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "SPRI1-1"
},
{
"type": "Title",
"element_id": "f931bdb912a40a788890924578a0cff7",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Sprint 1"
},
{
"type": "Title",
"element_id": "e0d1b68224bf0b31ef16b206c65b5f8f",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Current"
},
{
"type": "Title",
"element_id": "dfcd7f16dd2d92ee4ec22516fb45abd6",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "notion://sprints/sprint_task_relation"
},
{
"type": "Title",
"element_id": "191347bfe55d0ca9a574db77bc864827",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1,
"link_urls": [
"www.google.com"
],
"link_texts": [
"\n www.google.com\n "
]
},
"text": "www.google.com"
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "b2c1cf36a9b45cdefac07d1899b96ff1",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Corporate Travel"
},
{
"type": "NarrativeText",
"element_id": "ea6b271473e6accb79547f998e68f3d2",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: A corporate travel policy is crucial for controlling costs, ensuring compliance, and guaranteeing the safety of employees when traveling for the company."
}
]

View File

@ -0,0 +1,28 @@
[
{
"type": "Title",
"element_id": "8bcdb5d9bc2bda33af04bae4495f5e37",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Benefits Policies"
},
{
"type": "NarrativeText",
"element_id": "eef6bb1dab52f5a27ddff24998d3b614",
"metadata": {
"data_source": {
"date_created": "2023-08-04T18:31:00.000Z",
"date_modified": "2023-08-04T18:31:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "💡\n \n Notion Tip: Benefits policies can attract and retain employees, promote well-being, create positive culture, differentiate from competitors, and increase morale and satisfaction."
}
]

View File

@ -0,0 +1,15 @@
[
{
"type": "Title",
"element_id": "f931bdb912a40a788890924578a0cff7",
"metadata": {
"data_source": {
"date_created": "2023-08-02T20:36:00.000Z",
"date_modified": "2023-08-17T18:49:00.000Z"
},
"filetype": "text/html",
"page_number": 1
},
"text": "Sprint 1"
}
]

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR=$(dirname "$(realpath "$0")")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=notion
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
if [ -z "$NOTION_API_KEY" ]; then
echo "Skipping Notion ingest test because the NOTION_API_KEY env var is not set."
exit 0
fi
PYTHONPATH=. ./unstructured/ingest/main.py \
notion \
--metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed \
--download-dir "$DOWNLOAD_DIR" \
--api-key "$NOTION_API_KEY" \
--structured-output-dir "$OUTPUT_DIR" \
--database-ids "122b2c22996b435b9de2ee0e9d2b04bc" \
--num-processes 2 \
--recursive \
--verbose
sh "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME

View File

@ -33,6 +33,7 @@ export OMP_THREAD_LIMIT=1
./test_unstructured_ingest/test-ingest-local-single-file.sh
./test_unstructured_ingest/test-ingest-local-single-file-with-encoding.sh
./test_unstructured_ingest/test-ingest-local-single-file-with-pdf-infer-table-structure.sh
./test_unstructured_ingest/test-ingest-notion.sh
# NOTE(yuming): The following test should be put after any tests with --preserve-downloads option
./test_unstructured_ingest/test-ingest-pdf-fast-reprocess.sh
./test_unstructured_ingest/test-ingest-sharepoint.sh

View File

@ -107,6 +107,7 @@ class DiscordIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
bot.run(self.token)
self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
with open(self._tmp_download_file(), "w") as f:
for m in messages:
f.write(m.content + "\n")
@ -131,7 +132,7 @@ class DiscordConnector(ConnectorCleanupMixin, BaseConnector):
def initialize(self):
"""Verify that can get metadata for an object, validates connections info."""
os.mkdir(self.standard_config.download_dir)
pass
def get_ingest_docs(self):
return [

View File

@ -1,5 +1,6 @@
from typing import Any, Generator, List, Tuple
import httpx
from notion_client import Client as NotionClient
from notion_client.api_endpoints import (
BlocksChildrenEndpoint as NotionBlocksChildrenEndpoint,
@ -7,6 +8,7 @@ from notion_client.api_endpoints import (
from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint
from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint
from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
from notion_client.errors import RequestTimeoutError
from unstructured.ingest.connector.notion.types.block import Block
from unstructured.ingest.connector.notion.types.database import Database
@ -42,6 +44,18 @@ class DatabasesEndpoint(NotionDatabasesEndpoint):
resp: dict = super().retrieve(database_id=database_id, **kwargs) # type: ignore
return Database.from_dict(data=resp)
def retrieve_status(self, database_id: str, **kwargs) -> int:
request = self.parent._build_request(
method="HEAD",
path=f"databases/{database_id}",
auth=kwargs.get("auth"),
)
try:
response: httpx.Response = self.parent.client.send(request) # type: ignore
return response.status_code
except httpx.TimeoutException:
raise RequestTimeoutError()
def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]:
"""Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database.
@ -81,6 +95,18 @@ class PagesEndpoint(NotionPagesEndpoint):
resp: dict = super().retrieve(page_id=page_id, **kwargs) # type: ignore
return Page.from_dict(data=resp)
def retrieve_status(self, page_id: str, **kwargs) -> int:
request = self.parent._build_request(
method="HEAD",
path=f"pages/{page_id}",
auth=kwargs.get("auth"),
)
try:
response: httpx.Response = self.parent.client.send(request) # type: ignore
return response.status_code
except httpx.TimeoutException:
raise RequestTimeoutError()
class Client(NotionClient):
def __init__(self, *args: Any, **kwargs: Any) -> None:

View File

@ -289,6 +289,7 @@ class NotionConnector(ConnectorCleanupMixin, BaseConnector):
config=config,
)
@requires_dependencies(dependencies=["notion_client"])
def initialize(self):
"""Verify that can get metadata for an object, validates connections info."""
pass
@ -302,6 +303,13 @@ class NotionConnector(ConnectorCleanupMixin, BaseConnector):
client = NotionClient(auth=self.config.api_key, logger=self.config.get_logger())
# sanity check that database id is valid
resp_code = client.pages.retrieve_status(page_id=page_id)
if resp_code != 200:
raise ValueError(
f"page associated with page id could not be found: {page_id}",
)
child_content = get_recursive_content_from_page(
client=client,
page_id=page_id,
@ -333,6 +341,13 @@ class NotionConnector(ConnectorCleanupMixin, BaseConnector):
client = NotionClient(auth=self.config.api_key, logger=self.config.get_logger())
# sanity check that database id is valid
resp_code = client.databases.retrieve_status(database_id=database_id)
if resp_code != 200:
raise ValueError(
f"database associated with database id could not be found: {database_id}",
)
child_content = get_recursive_content_from_database(
client=client,
database_id=database_id,
@ -363,6 +378,7 @@ class NotionConnector(ConnectorCleanupMixin, BaseConnector):
for database_id in self.config.database_ids
]
if self.config.recursive:
self.config.get_logger().info("Getting recursive content")
child_pages = []
child_databases = []
for page_id in self.config.page_ids:

View File

@ -20,6 +20,7 @@ from htmlBuilder.tags import (
Tr,
Ul,
)
from notion_client.errors import APIResponseError
import unstructured.ingest.connector.notion.types.blocks as notion_blocks
from unstructured.ingest.connector.notion.client import Client
@ -160,9 +161,9 @@ def extract_database_html(
logger.debug(f"Creating {len(all_pages)} rows")
for page in all_pages:
if is_database_url(page.url):
if is_database_url(client=client, url=page.url):
child_databases.append(page.id)
if is_page_url(page.url):
if is_page_url(client=client, url=page.url):
child_pages.append(page.id)
properties = page.properties
inner_html = [properties.get(k).get_html() for k in property_keys] # type: ignore
@ -229,90 +230,138 @@ def get_recursive_content(
logger: logging.Logger,
) -> ChildExtractionResponse:
parents: List[QueueEntry] = [init_entry]
child_pages = []
child_dbs = []
processed = []
child_pages: List[str] = []
child_dbs: List[str] = []
processed: List[str] = []
while len(parents) > 0:
parent: QueueEntry = parents.pop()
processed.append(parent.id)
processed.append(str(parent.id))
if parent.type == QueueEntryType.PAGE:
logger.debug(f"Getting child data from page: {parent.id}")
for children in client.blocks.children.iterate_list( # type: ignore
block_id=str(parent.id),
):
child_pages_from_page = [
c for c in children if isinstance(c.block, notion_blocks.ChildPage)
]
if child_pages_from_page:
child_page_blocks: List[notion_blocks.ChildPage] = [
p.block
for p in child_pages_from_page
if isinstance(p.block, notion_blocks.ChildPage)
]
logger.debug(
"found child pages from parent page {}: {}".format(
parent.id,
", ".join([block.title for block in child_page_blocks]),
),
)
new_pages = [p.id for p in child_pages_from_page if p.id not in processed]
child_pages.extend(new_pages)
parents.extend(
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
)
page_children = []
try:
for children_block in client.blocks.children.iterate_list( # type: ignore
block_id=str(parent.id),
):
page_children.extend(children_block)
except APIResponseError as api_error:
logger.error(f"failed to get page with id {parent.id}: {api_error}")
if str(parent.id) in child_pages:
child_pages.remove(str(parent.id))
continue
if not page_children:
continue
child_dbs_from_page = [
c for c in children if isinstance(c.block, notion_blocks.ChildDatabase)
# Extract child pages
child_pages_from_page = [
c for c in page_children if isinstance(c.block, notion_blocks.ChildPage)
]
if child_pages_from_page:
child_page_blocks: List[notion_blocks.ChildPage] = [
p.block
for p in child_pages_from_page
if isinstance(p.block, notion_blocks.ChildPage)
]
if child_dbs_from_page:
child_db_blocks: List[notion_blocks.ChildDatabase] = [
c.block
for c in children
if isinstance(c.block, notion_blocks.ChildDatabase)
]
logger.debug(
"found child database from parent page {}: {}".format(
parent.id,
", ".join([block.title for block in child_db_blocks]),
),
)
new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed]
child_dbs.extend(new_dbs)
parents.extend(
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
logger.debug(
"found child pages from parent page {}: {}".format(
parent.id,
", ".join([block.title for block in child_page_blocks]),
),
)
new_pages = [p.id for p in child_pages_from_page if p.id not in processed]
new_pages = list(set(new_pages))
child_pages.extend(new_pages)
parents.extend(
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
)
# Extract child databases
child_dbs_from_page = [
c for c in page_children if isinstance(c.block, notion_blocks.ChildDatabase)
]
if child_dbs_from_page:
child_db_blocks: List[notion_blocks.ChildDatabase] = [
c.block
for c in page_children
if isinstance(c.block, notion_blocks.ChildDatabase)
]
logger.debug(
"found child database from parent page {}: {}".format(
parent.id,
", ".join([block.title for block in child_db_blocks]),
),
)
new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed]
new_dbs = list(set(new_dbs))
child_dbs.extend(new_dbs)
parents.extend(
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
)
linked_to_others: List[notion_blocks.LinkToPage] = [
c.block for c in page_children if isinstance(c.block, notion_blocks.LinkToPage)
]
for link in linked_to_others:
if (page_id := link.page_id) and (
page_id not in processed and page_id not in child_pages
):
child_pages.append(page_id)
parents.append(QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)))
if (database_id := link.database_id) and (
database_id not in processed and database_id not in child_dbs
):
child_dbs.append(database_id)
parents.append(
QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
)
elif parent.type == QueueEntryType.DATABASE:
logger.debug(f"Getting child data from database: {parent.id}")
for page_entries in client.databases.iterate_query( # type: ignore
database_id=str(parent.id),
):
child_pages_from_db = [p for p in page_entries if is_page_url(p.url)]
if child_pages_from_db:
logger.debug(
"found child pages from parent database {}: {}".format(
parent.id,
", ".join([p.url for p in child_pages_from_db]),
),
)
new_pages = [p.id for p in child_pages_from_db if p.id not in processed]
child_pages.extend(new_pages)
parents.extend(
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
)
database_pages = []
try:
for page_entries in client.databases.iterate_query( # type: ignore
database_id=str(parent.id),
):
database_pages.extend(page_entries)
except APIResponseError as api_error:
logger.error(f"failed to get database with id {parent.id}: {api_error}")
if str(parent.id) in child_dbs:
child_dbs.remove(str(parent.id))
continue
if not database_pages:
continue
child_dbs_from_db = [p for p in page_entries if is_database_url(p.url)]
if child_dbs_from_db:
logger.debug(
"found child database from parent database {}: {}".format(
parent.id,
", ".join([db.url for db in child_dbs_from_db]),
),
)
new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed]
child_dbs.extend(new_dbs)
parents.extend(
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
child_pages_from_db = [
p for p in database_pages if is_page_url(client=client, url=p.url)
]
if child_pages_from_db:
logger.debug(
"found child pages from parent database {}: {}".format(
parent.id,
", ".join([p.url for p in child_pages_from_db]),
),
)
new_pages = [p.id for p in child_pages_from_db if p.id not in processed]
child_pages.extend(new_pages)
parents.extend(
[QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
)
child_dbs_from_db = [
p for p in database_pages if is_database_url(client=client, url=p.url)
]
if child_dbs_from_db:
logger.debug(
"found child database from parent database {}: {}".format(
parent.id,
", ".join([db.url for db in child_dbs_from_db]),
),
)
new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed]
child_dbs.extend(new_dbs)
parents.extend(
[QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
)
return ChildExtractionResponse(
child_pages=child_pages,
@ -328,25 +377,39 @@ def is_valid_uuid(uuid_str: str) -> bool:
return False
def is_page_url(url: str):
def get_uuid_from_url(path: str) -> Optional[str]:
strings = path.split("-")
if len(strings) > 0 and is_valid_uuid(strings[-1]):
return strings[-1]
return None
def is_page_url(client: Client, url: str):
parsed_url = urlparse(url)
path = parsed_url.path.split("/")[-1]
if parsed_url.netloc != "www.notion.so":
return False
if is_valid_uuid(path):
page_uuid = get_uuid_from_url(path=path)
if not page_uuid:
return False
strings = path.split("-")
if len(strings) > 0 and is_valid_uuid(strings[-1]):
check_resp = client.pages.retrieve_status(page_id=page_uuid)
if check_resp == 200:
return True
return False
def is_database_url(url: str):
def is_database_url(client: Client, url: str):
parsed_url = urlparse(url)
path = parsed_url.path.split("/")[-1]
if parsed_url.netloc != "www.notion.so":
return False
return is_valid_uuid(path)
database_uuid = get_uuid_from_url(path=path)
if not database_uuid:
return False
check_resp = client.databases.retrieve_status(database_id=database_uuid)
if check_resp == 200:
return True
return False
@dataclass

View File

@ -32,6 +32,7 @@ block_type_mapping = {
"file": blocks.File,
"image": blocks.Image,
"link_preview": blocks.LinkPreview,
"link_to_page": blocks.LinkToPage,
"numbered_list_item": blocks.NumberedListItem,
"paragraph": blocks.Paragraph,
"pdf": blocks.PDF,

View File

@ -13,6 +13,7 @@ from .file import File
from .heading import Heading
from .image import Image
from .link_preview import LinkPreview
from .link_to_page import LinkToPage
from .numbered_list import NumberedListItem
from .paragraph import Paragraph
from .pdf import PDF
@ -43,6 +44,7 @@ __all__ = [
"Heading",
"Image",
"LinkPreview",
"LinkToPage",
"NumberedListItem",
"Paragraph",
"PDF",

View File

@ -0,0 +1,29 @@
# https://developers.notion.com/reference/block#link-to-page
from dataclasses import dataclass
from typing import Optional
from htmlBuilder.tags import Div, HtmlTag
from unstructured.ingest.connector.notion.interfaces import BlockBase
@dataclass
class LinkToPage(BlockBase):
type: str
page_id: Optional[str] = None
database_id: Optional[str] = None
@staticmethod
def can_have_children() -> bool:
return False
@classmethod
def from_dict(cls, data: dict):
return cls(**data)
def get_html(self) -> Optional[HtmlTag]:
if page_id := self.page_id:
return Div([], page_id)
if database_id := self.database_id:
return Div([], database_id)
return None

View File

@ -22,6 +22,7 @@ def airtable(
personal_access_token.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="airtable",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -26,6 +26,7 @@ def azure(
)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="azure",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -37,6 +37,7 @@ def biomed(
)
connector_config.download_dir = update_download_dir_hash(
connector_name="biomed",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -19,6 +19,7 @@ def box(
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="box",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -26,6 +26,7 @@ def confluence(
url.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="confluence",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -23,6 +23,7 @@ def discord(
channels.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="discord",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -19,6 +19,7 @@ def dropbox(
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="dropbox",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -25,6 +25,7 @@ def elasticsearch(
),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="elasticsearch",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -19,6 +19,7 @@ def fsspec(
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="fsspec",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -19,6 +19,7 @@ def gcs(
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="gcs",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -26,6 +26,7 @@ def github(
),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="github",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -26,6 +26,7 @@ def gitlab(
),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="gitlab",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -24,6 +24,7 @@ def gdrive(
drive_id.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="gdrive",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -37,6 +37,7 @@ def notion(
else:
raise ValueError("could not create local cache directory name")
connector_config.download_dir = update_download_dir_hash(
connector_name="notion",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -27,6 +27,7 @@ def onedrive(
f"{tenant}_{user_pname}".encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="onedrive",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -25,6 +25,7 @@ def outlook(
hashed_dir_name = hashlib.sha256(user_email.encode("utf-8"))
connector_config.download_dir = update_download_dir_hash(
connector_name="outlook",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -26,6 +26,7 @@ def reddit(
subreddit_name.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="reddit",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -18,6 +18,7 @@ def s3(
ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
connector_config.download_dir = update_download_dir_remote_url(
connector_name="s3",
connector_config=connector_config,
remote_url=remote_url,
logger=logger,

View File

@ -25,6 +25,7 @@ def sharepoint(
f"{site}_{path}".encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="sharepoint",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -24,6 +24,7 @@ def slack(
channels.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="slack",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,

View File

@ -10,12 +10,14 @@ from unstructured.ingest.interfaces import (
def update_download_dir_remote_url(
connector_name: str,
connector_config: StandardConnectorConfig,
remote_url: str,
logger: logging.Logger,
) -> str:
hashed_dir_name = hashlib.sha256(remote_url.encode("utf-8"))
return update_download_dir_hash(
connector_name=connector_name,
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,
@ -23,6 +25,7 @@ def update_download_dir_remote_url(
def update_download_dir_hash(
connector_name: str,
connector_config: StandardConnectorConfig,
hashed_dir_name: hashlib._Hash,
logger: logging.Logger,
@ -32,7 +35,7 @@ def update_download_dir_hash(
cache_path = Path.home() / ".cache" / "unstructured" / "ingest"
if not cache_path.exists():
cache_path.mkdir(parents=True, exist_ok=True)
download_dir = cache_path / hashed_dir_name.hexdigest()[:10]
download_dir = cache_path / connector_name / hashed_dir_name.hexdigest()[:10]
if connector_config.preserve_downloads:
logger.warning(
f"Preserving downloaded files but download_dir is not specified,"

View File

@ -21,6 +21,7 @@ def wikipedia(
page_title.encode("utf-8"),
)
connector_config.download_dir = update_download_dir_hash(
connector_name="wikipedia",
connector_config=connector_config,
hashed_dir_name=hashed_dir_name,
logger=logger,