chore[ingest]: fix confluence ingest diff tests (#1082)

* trigger CI

* trigger CI

* trigger CI

* do not ingest personal spaces in the diff test

* fix argument

* Update ingest test fixtures (#1083)

Co-authored-by: ahmetmeleq <ahmetmeleq@users.noreply.github.com>

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: ahmetmeleq <ahmetmeleq@users.noreply.github.com>
This commit is contained in:
Ahmet Melek 2023-08-10 20:45:17 +03:00 committed by GitHub
parent dee9b405cd
commit 64a1930c46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 2 additions and 1716 deletions

View File

@ -1,176 +0,0 @@
[
{
"type": "NarrativeText",
"element_id": "68c92682cad5bd43120483571a7ecc5f",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"emphasized_texts": [
{
"text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote).",
"tag": "span"
}
]
},
"text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote)."
},
{
"type": "Title",
"element_id": "1749e90b61ae5ea8859b927415ecf3de",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "📄 Recent pages that I've worked on"
},
{
"type": "Title",
"element_id": "0ee6f0f9b05015cb4adbd8b305bec671",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Recently Updated"
},
{
"type": "ListItem",
"element_id": "90a1ffa1cadd422cfe05d6efa6503e50",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"links": [
{
"text": "Overview",
"url": "/wiki/spaces/~64083457896d10ebd4738661/overview"
},
{
"text": "ryan",
"url": "/wiki/display/~64083457896d10ebd4738661"
}
],
"emphasized_texts": [
{
"text": "•",
"tag": "span"
}
]
},
"text": "Overview\n \n \n Jun 30, 2023 • contributed by ryan"
},
{
"type": "ListItem",
"element_id": "817b22f2ed823a49b608b969010fd111",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"links": [
{
"text": "Getting started in Confluence",
"url": "/wiki/spaces/~64083457896d10ebd4738661/pages/65627/Getting+started+in+Confluence"
},
{
"text": "ryan",
"url": "/wiki/display/~64083457896d10ebd4738661"
}
],
"emphasized_texts": [
{
"text": "•",
"tag": "span"
}
]
},
"text": "Getting started in Confluence\n \n \n Jun 30, 2023 • contributed by ryan"
},
{
"type": "ListItem",
"element_id": "82a425617f6ff688335d227aab77adc5",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"links": [
{
"text": "ryan",
"url": "/wiki/spaces/~64083457896d10ebd4738661"
},
{
"text": "ryan",
"url": "/wiki/display/~64083457896d10ebd4738661"
}
],
"emphasized_texts": [
{
"text": "•",
"tag": "span"
}
]
},
"text": "ryan\n \n \n Jun 30, 2023 • contributed by ryan"
},
{
"type": "Title",
"element_id": "903f4f48016cafdf332e01f2d004487f",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Blog stream"
},
{
"type": "NarrativeText",
"element_id": "cfe366b5f8c8991d1e2af6e507e35a4e",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Create a blog post to share news and announcements with your team and company."
},
{
"type": "Title",
"element_id": "8fd2c85152eb5e0750f65b52027e68c4",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "🖐 Get in touch"
},
{
"type": "UncategorizedText",
"element_id": "93b2d3447a7fdd7df7cef8c0d0599d04",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "✉️"
},
{
"type": "NarrativeText",
"element_id": "4685ab1f21cb52cdf8973f8281fa9915",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"emphasized_texts": [
{
"text": "End with a bang! Some options are:",
"tag": "span"
},
{
"text": "<Insert company name>",
"tag": "span"
}
]
},
"text": "End with a bang! Some options are: \"I am so grateful to be here at <Insert company name> and very excited to get started!\" or \"Looking forward to meeting all of you!\" or \"Can't wait to get to know all of you!\""
}
]

View File

@ -1,150 +0,0 @@
[
{
"type": "NarrativeText",
"element_id": "68c92682cad5bd43120483571a7ecc5f",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"emphasized_texts": [
{
"text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote).",
"tag": "span"
}
]
},
"text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote)."
},
{
"type": "Title",
"element_id": "1749e90b61ae5ea8859b927415ecf3de",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "📄 Recent pages that I've worked on"
},
{
"type": "Title",
"element_id": "0ee6f0f9b05015cb4adbd8b305bec671",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Recently Updated"
},
{
"type": "ListItem",
"element_id": "47b53f02cfa28a04fe0d27e5712c0d70",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"links": [
{
"text": "Overview",
"url": "/wiki/spaces/~7120205368eedfcecd43e18b25b2221316ee6f/overview"
},
{
"text": "ahmet",
"url": "/wiki/display/~712020%3A5368eedf-cecd-43e1-8b25-b2221316ee6f"
}
],
"emphasized_texts": [
{
"text": "•",
"tag": "span"
}
]
},
"text": "Overview\n \n \n Jul 12, 2023 • contributed by ahmet"
},
{
"type": "ListItem",
"element_id": "f3a94277c2df7e6c07d9b4ebd8d047ac",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"links": [
{
"text": "ahmet",
"url": "/wiki/spaces/~7120205368eedfcecd43e18b25b2221316ee6f"
},
{
"text": "ahmet",
"url": "/wiki/display/~712020%3A5368eedf-cecd-43e1-8b25-b2221316ee6f"
}
],
"emphasized_texts": [
{
"text": "•",
"tag": "span"
}
]
},
"text": "ahmet\n \n \n Jul 12, 2023 • contributed by ahmet"
},
{
"type": "Title",
"element_id": "903f4f48016cafdf332e01f2d004487f",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Blog stream"
},
{
"type": "NarrativeText",
"element_id": "cfe366b5f8c8991d1e2af6e507e35a4e",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "Create a blog post to share news and announcements with your team and company."
},
{
"type": "Title",
"element_id": "8fd2c85152eb5e0750f65b52027e68c4",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "🖐 Get in touch"
},
{
"type": "UncategorizedText",
"element_id": "93b2d3447a7fdd7df7cef8c0d0599d04",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1
},
"text": "✉️"
},
{
"type": "NarrativeText",
"element_id": "4685ab1f21cb52cdf8973f8281fa9915",
"metadata": {
"data_source": {},
"filetype": "text/html",
"page_number": 1,
"emphasized_texts": [
{
"text": "End with a bang! Some options are:",
"tag": "span"
},
{
"text": "<Insert company name>",
"tag": "span"
}
]
},
"text": "End with a bang! Some options are: \"I am so grateful to be here at <Insert company name> and very excited to get started!\" or \"Looking forward to meeting all of you!\" or \"Can't wait to get to know all of you!\""
}
]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -e
# Description: This test checks if all the processed content is the same as the expected outputs.
# Description: This test checks if all the processed content is the same as the expected outputs
SCRIPT_DIR=$(dirname "$(realpath "$0")")
cd "$SCRIPT_DIR"/.. || exit 1
@ -27,6 +27,6 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
--url https://unstructured-ingest-test.atlassian.net \
--user-email "$CONFLUENCE_USER_EMAIL" \
--api-token "$CONFLUENCE_API_TOKEN" \
--max-num-of-spaces 4
--list-of-spaces testteamsp,MFS \
sh "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME