diff --git a/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65627.json b/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65627.json deleted file mode 100644 index c38698729..000000000 --- a/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65627.json +++ /dev/null @@ -1,1388 +0,0 @@ -[ - { - "type": "Title", - "element_id": "2eb65d726e8ba12178bfa04801fe648e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Welcome to Confluence!", - "tag": "strong" - }, - { - "text": "Welcome to Confluence!", - "tag": "span" - } - ] - }, - "text": "Welcome to Confluence!" - }, - { - "type": "NarrativeText", - "element_id": "885974053233ceee944e2ed659d52934", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "You can use Confluence to create a rich knowledge base or to collaborate with others using words, videos, images, graphics, charts, and graphs." - }, - { - "type": "NarrativeText", - "element_id": "9e83d40fe3248324776df09212a64b72", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "This page walks you through some Confluence basics like spaces, pages, and elements that allow you to create and manage impactful content for your teams." - }, - { - "type": "Title", - "element_id": "b5658fc8edda24f9c94eabac9e536e0a", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "On this page", - "tag": "strong" - } - ] - }, - "text": "On this page" - }, - { - "type": "Title", - "element_id": "322b82eeb1c9c41db929de03e7c63f57", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Confluence 101", - "tag": "strong" - }, - { - "text": "Confluence 101", - "tag": "span" - } - ] - }, - "text": "Confluence 101" - }, - { - "type": "Title", - "element_id": "5e578427b078b7c347188513581c3597", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "https://www.youtube.com/embed/kaNPNbAT-as%22", - "url": "https://www.youtube.com/embed/kaNPNbAT-as%22" - } - ] - }, - "text": "https://www.youtube.com/embed/kaNPNbAT-as%22" - }, - { - "type": "Title", - "element_id": "afa2cab2afcf2ddc402d0f2ec2ead209", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "🗃️ Spaces" - }, - { - "type": "NarrativeText", - "element_id": "3911a4c80dea2fb9473733a565e8606b", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Create", - "url": "https://confluence.atlassian.com/conf73/create-a-space-991927526.html" - } - ] - }, - "text": "Spaces are places for individuals, teams, and companies to organize and work on ideas, projects, documentation, and announcements. Spaces can be customized and integrated with both Atlassian tools and others. Create as many spaces as you need to get things done:" - }, - { - "type": "Title", - "element_id": "0be04b4f929d6cfbc549f9295e8ca818", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "TEAM SPACES", - "tag": "span" - } - ] - }, - "text": "TEAM SPACES" - }, - { - "type": "UncategorizedText", - "element_id": "ef203bcba5af304868013f9b46956c11", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Give each team their own space so they can work closely and make information easier to find." - }, - { - "type": "Title", - "element_id": "930e59c3783e8cc919849cb2387603f1", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "PROJECT SPACES", - "tag": "span" - } - ] - }, - "text": "PROJECT SPACES" - }, - { - "type": "UncategorizedText", - "element_id": "cee47208a54c4d037978ed55566f6795", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Put all the information related to your project in one place so everyone can work together." - }, - { - "type": "Title", - "element_id": "5e9251fb244b96c2af03e698ab29f6d0", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "PERSONAL SPACES", - "tag": "span" - } - ] - }, - "text": "PERSONAL SPACES" - }, - { - "type": "UncategorizedText", - "element_id": "a1525a152f649a4587bc5856f42db2e7", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Store anything you’re working on, keep your to-do lists, and polish content before moving it into a shared space." - }, - { - "type": "ListItem", - "element_id": "323630e08465ec3e6e5d87e48a99f000", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "In the main navigation bar, select Spaces→ Create space to create a new space.", - "tag": "span" - }, - { - "text": "In the main navigation bar, select", - "tag": "strong" - }, - { - "text": "In the main navigation bar, select", - "tag": "span" - }, - { - "text": "→", - "tag": "strong" - }, - { - "text": "→", - "tag": "span" - }, - { - "text": "to create a new space.", - "tag": "strong" - }, - { - "text": "to create a new space.", - "tag": "span" - } - ] - }, - "text": "In the main navigation bar, select Spaces→ Create space to create a new space." - }, - { - "type": "Title", - "element_id": "ca92c463a8ce188551e58f906ec4713f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Learn more about spaces", - "url": "https://www.atlassian.com/software/confluence/guides/get-started/set-up" - } - ] - }, - "text": "Learn more about spaces" - }, - { - "type": "Title", - "element_id": "44da24e6b3b9ac500c6688579a01dc10", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "📄 Pages" - }, - { - "type": "NarrativeText", - "element_id": "5dd3cb98e4d150a1ce441c8c21f8910b", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Pages are where teams define, collaborate on, and document their work. They’re the building blocks of Confluence." - }, - { - "type": "NarrativeText", - "element_id": "be8d1d73ac41b88c9ac02d43ab9b40b9", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "To create a page within a space, select the + to create a new page. You can also create pages using the Create button in the main navigation bar." - }, - { - "type": "ListItem", - "element_id": "eedc3d95eca759390cdf26b0339a112b", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Press c to create a page at any time.", - "tag": "span" - }, - { - "text": "Press", - "tag": "strong" - }, - { - "text": "Press", - "tag": "span" - }, - { - "text": "to create a page at any time.", - "tag": "strong" - }, - { - "text": "to create a page at any time.", - "tag": "span" - } - ] - }, - "text": "Press c to create a page at any time." - }, - { - "type": "Title", - "element_id": "45c5cc250ae3792fc2dd98720929982c", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Learn more about pages", - "url": "https://www.atlassian.com/software/confluence/guides/get-started/create-content" - } - ] - }, - "text": "Learn more about pages" - }, - { - "type": "NarrativeText", - "element_id": "a663ab71541b65391ee9312ea801f6c6", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Visualize the relationship between spaces and pages", - "tag": "span" - } - ] - }, - "text": "Visualize the relationship between spaces and pages" - }, - { - "type": "NarrativeText", - "element_id": "d9d301e195f57025d88adc0e3e5eca9e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Back to top ⤴", - "url": "#On-this-page" - } - ] - }, - "text": "Back to top ⤴" - }, - { - "type": "Title", - "element_id": "1d4055434bd0d9919762c6b588ee821a", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "emphasized_texts": [ - { - "text": "Making purposeful content", - "tag": "strong" - }, - { - "text": "Making purposeful content", - "tag": "span" - } - ] - }, - "text": "Making purposeful content" - }, - { - "type": "Title", - "element_id": "49094b8b972fbfe4e477dc20741a2610", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "✏️ Confluence editor" - }, - { - "type": "NarrativeText", - "element_id": "6d9bc96712ae22b223997def67c66849", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "Changes can only be made to page content when in the editor. Changes will save automatically and be pushed live the next time the page is published." - }, - { - "type": "Title", - "element_id": "26c2d8cd9807363e7dd97484b1017483", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "🛠️ Templates" - }, - { - "type": "NarrativeText", - "element_id": "731470a0f1e19159b3c28497268422ce", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "If you’re not sure where to start when setting up a new space or page, save time with a space or page template." - }, - { - "type": "ListItem", - "element_id": "1c04e4f623afaa1b595b0b07feae65a7", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "emphasized_texts": [ - { - "text": "Press e when on a page to open the editor.", - "tag": "span" - }, - { - "text": "Press", - "tag": "strong" - }, - { - "text": "Press", - "tag": "span" - }, - { - "text": "when on a page to open the editor.", - "tag": "strong" - }, - { - "text": "when on a page to open the editor.", - "tag": "span" - } - ] - }, - "text": "Press e when on a page to open the editor." - }, - { - "type": "NarrativeText", - "element_id": "7d9795cd9464c1afe382cace7ffbc400", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "links": [ - { - "text": "Learn more about page templates", - "url": "https://support.atlassian.com/confluence-cloud/docs/create-a-page-from-a-template" - }, - { - "text": "Browse space templates", - "url": "https://support.atlassian.com/confluence-cloud/docs/create-a-space-from-a-template" - } - ], - "emphasized_texts": [ - { - "text": "|", - "tag": "span" - } - ] - }, - "text": "Learn more about page templates | Browse space templates" - }, - { - "type": "Title", - "element_id": "8f7c5f98176b74776bec64720653bcd6", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "🧰 Elements" - }, - { - "type": "NarrativeText", - "element_id": "b4969b1758e502d396a65cd3d45198e8", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "Elements are tools like calendars, note panels, and layouts, designed to help you make your pages more powerful." - }, - { - "type": "ListItem", - "element_id": "ed6edefd9f38f6f5a1d21c0d684c37da", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "emphasized_texts": [ - { - "text": "Press/to see a list of elements.", - "tag": "span" - }, - { - "text": "Press", - "tag": "strong" - }, - { - "text": "Press", - "tag": "span" - }, - { - "text": "to see a list of elements.", - "tag": "strong" - }, - { - "text": "to see a list of elements.", - "tag": "span" - } - ] - }, - "text": "Press/to see a list of elements." - }, - { - "type": "Title", - "element_id": "8c8c8f77eab8c52c8fcdbd788a5b0b0a", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "links": [ - { - "text": "Learn more about elements", - "url": "https://support.atlassian.com/confluence-cloud/docs/create-a-space-from-a-template" - } - ] - }, - "text": "Learn more about elements" - }, - { - "type": "Title", - "element_id": "ee774e2387c977ecc598e5e70942a0e4", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "📸 Header images" - }, - { - "type": "NarrativeText", - "element_id": "f7bf164fadd9c3aaaf967152f665abc3", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2 - }, - "text": "Make your pages more visually impactful with emojis and header images. 🍕 Emojis aren’t just a fun way to spice up your page—they also show up in search, so using them strategically makes it easier to find pages later on." - }, - { - "type": "ListItem", - "element_id": "02fc62c06b4c6557c36886667b99ed9a", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "emphasized_texts": [ - { - "text": "Select Add status at the top of the page, so teammates can see your progress at a glance.", - "tag": "span" - }, - { - "text": "Select", - "tag": "strong" - }, - { - "text": "Select", - "tag": "span" - }, - { - "text": "at the top of the page, so teammates can see your progress at a glance.", - "tag": "strong" - }, - { - "text": "at the top of the page, so teammates can see your progress at a glance.", - "tag": "span" - } - ] - }, - "text": "Select Add status at the top of the page, so teammates can see your progress at a glance." - }, - { - "type": "Title", - "element_id": "7bda962700659dfe393a4eca2ff9a906", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "links": [ - { - "text": "Learn more about page header images", - "url": "https://support.atlassian.com/confluence-cloud/docs/make-your-page-and-its-title-more-memorable" - } - ] - }, - "text": "Learn more about page header images" - }, - { - "type": "NarrativeText", - "element_id": "d9d301e195f57025d88adc0e3e5eca9e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 2, - "links": [ - { - "text": "Back to top ⤴", - "url": "#On-this-page" - } - ] - }, - "text": "Back to top ⤴" - }, - { - "type": "Title", - "element_id": "8777f709dc562ff498cdde6436c20843", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Collaborating with teammates", - "tag": "strong" - }, - { - "text": "Collaborating with teammates", - "tag": "span" - } - ] - }, - "text": "Collaborating with teammates" - }, - { - "type": "Title", - "element_id": "deff602bf2306622e81cf3cf7155ad3d", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3 - }, - "text": "🧑‍💻 Real-time editing" - }, - { - "type": "NarrativeText", - "element_id": "b2cbd4a7cf0f6f7069295a4236711630", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Edit", - "tag": "strong" - } - ] - }, - "text": "You and up to 11 teammates can edit a page together in real time. Changes save and sync automatically so that everyone editing sees the same thing. Start editing together by selecting the Edit icon in the header." - }, - { - "type": "ListItem", - "element_id": "dac5e5932ac80b4a6f2733c213027826", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Press Command + Enter when in the editor to publish.", - "tag": "span" - }, - { - "text": "Press", - "tag": "strong" - }, - { - "text": "Press", - "tag": "span" - }, - { - "text": "when in the editor to publish.", - "tag": "strong" - }, - { - "text": "when in the editor to publish.", - "tag": "span" - } - ] - }, - "text": "Press Command + Enter when in the editor to publish." - }, - { - "type": "Title", - "element_id": "756118beff3227cddcaf9a163438edf4", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "links": [ - { - "text": "Learn more about collaborative editing", - "url": "https://support.atlassian.com/confluence-cloud/docs/create-edit-and-publish-a-page/#Collaborative-editing" - } - ] - }, - "text": "Learn more about collaborative editing" - }, - { - "type": "Title", - "element_id": "840ee21583a77b36e1ce4b908f9f84f8", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3 - }, - "text": "💬 Mentions and comments" - }, - { - "type": "NarrativeText", - "element_id": "f102ad85bb98dfdd392b7f266cfebe81", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3 - }, - "text": "Comments are a great way to start a conversation about a page or blog post. They allow you to remark on content, add important information, ask questions, and generally drive collaboration and teamwork." - }, - { - "type": "NarrativeText", - "element_id": "cf6f438d3c493da66de8e1204269cd7c", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3 - }, - "text": "@mention a teammate to bring them into the conversation, and they can reply to and/or like comments." - }, - { - "type": "NarrativeText", - "element_id": "98e25b5594cbab6b3f042a80180cd5c0", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Leave comments for your team in different places, depending on the type of feedback you have:", - "tag": "span" - } - ] - }, - "text": "Leave comments for your team in different places, depending on the type of feedback you have:" - }, - { - "type": "ListItem", - "element_id": "7ce59a1b8b7ed875bfbf82b5b87f7efd", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Inline comment →Added to a highlighted section of the page or post you are editing or viewing; good for targeted comments on a specific word or phrase.", - "tag": "span" - }, - { - "text": "Inline comment →", - "tag": "strong" - } - ] - }, - "text": "Inline comment →Added to a highlighted section of the page or post you are editing or viewing; good for targeted comments on a specific word or phrase." - }, - { - "type": "ListItem", - "element_id": "3bc1388235b382478e69bdbf7d9913f8", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Page comment →Added below the content of the page or post you are viewing; good for comments that apply to the content as a whole.", - "tag": "span" - }, - { - "text": "Page comment →", - "tag": "strong" - } - ] - }, - "text": "Page comment →Added below the content of the page or post you are viewing; good for comments that apply to the content as a whole." - }, - { - "type": "ListItem", - "element_id": "ec246ca35fd56b693d6e775a12b74ded", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "emphasized_texts": [ - { - "text": "Leave an inline comment when viewing a page or in the editor.", - "tag": "span" - }, - { - "text": "Leave an inline comment when viewing a page or in the editor.", - "tag": "strong" - }, - { - "text": "Leave an inline comment when viewing a page or in the editor.", - "tag": "span" - } - ] - }, - "text": "Leave an inline comment when viewing a page or in the editor." - }, - { - "type": "Title", - "element_id": "0bac4308c7b2a10251091520deb6d525", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "links": [ - { - "text": "Learn more about comments", - "url": "https://support.atlassian.com/confluence-cloud/docs/comment-on-pages-and-blog-posts/" - } - ] - }, - "text": "Learn more about comments" - }, - { - "type": "NarrativeText", - "element_id": "d9d301e195f57025d88adc0e3e5eca9e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 3, - "links": [ - { - "text": "Back to top ⤴", - "url": "#On-this-page" - } - ] - }, - "text": "Back to top ⤴" - }, - { - "type": "Title", - "element_id": "1167726f191da8408505ec6b775b0fed", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "Organizing your content", - "tag": "strong" - }, - { - "text": "Organizing your content", - "tag": "span" - } - ] - }, - "text": "Organizing your content" - }, - { - "type": "Title", - "element_id": "083eecafc6a7a2328e74bab5d4916b9b", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "🌳 Page tree" - }, - { - "type": "NarrativeText", - "element_id": "399e4d064dd823d07e9a45029dc28a1f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "The page tree is where your content is organized within a space. Drag and drop to reorder or nest pages, so you can more effectively share your work with others." - }, - { - "type": "Title", - "element_id": "b894d89eee30f587c47d93a800ee20ed", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Learn more about the page tree", - "url": "https://support.atlassian.com/confluence-cloud/docs/move-copy-and-hide-pages/" - } - ] - }, - "text": "Learn more about the page tree" - }, - { - "type": "Title", - "element_id": "2de8473d8ed31d3c4ed72c29c207648c", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "📣 Space overview" - }, - { - "type": "NarrativeText", - "element_id": "fbca24e571e80c6a6e456c6f6a28b2a7", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "To help your teammates find relevant content, curate your space overview with useful elements like calendars and page statuses, and include information about what is in your space." - }, - { - "type": "ListItem", - "element_id": "1c5b9983f916ff7129cd9a61fe48d0d5", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "Use the /info panel element to highlight important information in your space overview.", - "tag": "span" - }, - { - "text": "Use the", - "tag": "strong" - }, - { - "text": "Use the", - "tag": "span" - }, - { - "text": "panel element to highlight important information in your space overview.", - "tag": "strong" - }, - { - "text": "panel element to highlight important information in your space overview.", - "tag": "span" - } - ] - }, - "text": "Use the /info panel element to highlight important information in your space overview." - }, - { - "type": "Title", - "element_id": "84658a882a94ffaebcfe48fe4caf598c", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Learn more about space overview", - "url": "https://support.atlassian.com/confluence-cloud/docs/set-up-your-space-homepage/" - } - ] - }, - "text": "Learn more about space overview" - }, - { - "type": "Title", - "element_id": "511fe8b133462b55b2d09a545bd65e2e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "🌻 Content types" - }, - { - "type": "Title", - "element_id": "6987ba20ee85caf876c39383a25849b0", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "BLOG POST", - "tag": "span" - } - ] - }, - "text": "BLOG POST" - }, - { - "type": "UncategorizedText", - "element_id": "45e075fb15755ea5fe6e6b15b055c026", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "If your content that won’t change over time, create it as a blog post. The blog feed for each space displays the most recent posts first." - }, - { - "type": "Title", - "element_id": "c62ae84fb20004173d3831696a1cd120", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "PAGE", - "tag": "span" - } - ] - }, - "text": "PAGE" - }, - { - "type": "UncategorizedText", - "element_id": "b5a441979d22f951eaede916dabc86cd", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "If your content may change over time, create it as a page. Pages nest, so you can create categories and subcategories." - }, - { - "type": "ListItem", - "element_id": "a636107d401acb3e244fd106ae9772e6", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "Press k for a shareable link to your page or post.", - "tag": "span" - }, - { - "text": "Press", - "tag": "strong" - }, - { - "text": "Press", - "tag": "span" - }, - { - "text": "for a shareable link to your page or post.", - "tag": "strong" - }, - { - "text": "for a shareable link to your page or post.", - "tag": "span" - } - ] - }, - "text": "Press k for a shareable link to your page or post." - }, - { - "type": "Title", - "element_id": "2be9941d2236b667b1bc1a866ec1540e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Learn more about sharing pages and blog posts", - "url": "https://support.atlassian.com/confluence-cloud/docs/share-a-page-or-blog-post/" - } - ] - }, - "text": "Learn more about sharing pages and blog posts" - }, - { - "type": "Title", - "element_id": "d8a0ed1f66735721e002342525dea8e3", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "📏 Tables" - }, - { - "type": "NarrativeText", - "element_id": "607eeed35565b21421d2622df8eae4e8", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "Tables allow you to organize important information on a page to discuss it with your team. Present the information the way it makes sense to you—resize columns, color cells, rows and columns, and sort the table by clicking the column headers." - }, - { - "type": "ListItem", - "element_id": "d2d32c8488611e1541d3b312ee7826c6", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "Type /table to add a table at any time.", - "tag": "span" - }, - { - "text": "Type", - "tag": "strong" - }, - { - "text": "Type", - "tag": "span" - }, - { - "text": "to add a table at any time.", - "tag": "strong" - }, - { - "text": "to add a table at any time.", - "tag": "span" - } - ] - }, - "text": "Type /table to add a table at any time." - }, - { - "type": "Title", - "element_id": "2376efe008eb533cdfa2184bb149c844", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Learn more about tables", - "url": "https://confluence.atlassian.com/conf73/tables-991927743.html" - } - ] - }, - "text": "Learn more about tables" - }, - { - "type": "Title", - "element_id": "d15c5a32974887b38bfa6a6e64251ee7", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "🌱 Drafts" - }, - { - "type": "NarrativeText", - "element_id": "e45490c8c3158aa549797fc4aed77e2c", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4 - }, - "text": "Confluence autosaves your content as you work—if you select Close in the editor without publishing, you’ll create a draft, or an unpublished page you can get back to at any time." - }, - { - "type": "NarrativeText", - "element_id": "41bce51561cefdacb74ce1c1d12116fe", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "emphasized_texts": [ - { - "text": "DRAFT", - "tag": "span" - }, - { - "text": "Only you can see your draft, unless you share a link to it.", - "tag": "strong" - }, - { - "text": "Only you can see your draft, unless you share a link to it.", - "tag": "span" - } - ] - }, - "text": "DRAFTOnly you can see your draft, unless you share a link to it." - }, - { - "type": "Title", - "element_id": "816fa495cd358505837131d2eead8331", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Learn more about drafts", - "url": "https://confluence.atlassian.com/doc/drafts-149040.html" - } - ] - }, - "text": "Learn more about drafts" - }, - { - "type": "NarrativeText", - "element_id": "d9d301e195f57025d88adc0e3e5eca9e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 4, - "links": [ - { - "text": "Back to top ⤴", - "url": "#On-this-page" - } - ] - }, - "text": "Back to top ⤴" - }, - { - "type": "Title", - "element_id": "7ad7aa27712dc30ee04368fcc781d711", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "emphasized_texts": [ - { - "text": "What’s next?", - "tag": "strong" - }, - { - "text": "What’s next?", - "tag": "span" - } - ] - }, - "text": "What’s next?" - }, - { - "type": "Title", - "element_id": "00ddc2307a560d6e27415b805809b368", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5 - }, - "text": "🧐 TL;DR" - }, - { - "type": "NarrativeText", - "element_id": "2d9008ebc9f99c5ef68b82d4766b0976", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5 - }, - "text": "These are the top three actions we think will help you get the most from Confluence." - }, - { - "type": "ListItem", - "element_id": "7a36504d447735da39b2bc5d03384bdc", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "emphasized_texts": [ - { - "text": "Explore a Space", - "tag": "span" - } - ] - }, - "text": "Explore a Space" - }, - { - "type": "ListItem", - "element_id": "b682233a5002907c4bc71179c53d6a86", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "emphasized_texts": [ - { - "text": "Create a page", - "tag": "span" - } - ] - }, - "text": "Create a page" - }, - { - "type": "ListItem", - "element_id": "3e14c44747ea9d34bbd57115d8aa1ee4", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "emphasized_texts": [ - { - "text": "Invite a teammate", - "tag": "span" - } - ] - }, - "text": "Invite a teammate" - }, - { - "type": "Title", - "element_id": "4a5633c76f38e0f0e27b59720676c8c7", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5 - }, - "text": "Jira ❤️ Confluence" - }, - { - "type": "NarrativeText", - "element_id": "e429bc4dad5a02945818207ba51078e5", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5 - }, - "text": "Choose from 70+ templates to help you structure your thinking, improve documentation quality, and adopt best practices for agile development." - }, - { - "type": "Title", - "element_id": "8595d1d3ae853ede2a6bd371b30275ef", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "links": [ - { - "text": "https://www.youtube.com/watch?v=ohtDFXNAUns", - "url": "https://www.youtube.com/watch?v=ohtDFXNAUns" - } - ] - }, - "text": "https://www.youtube.com/watch?v=ohtDFXNAUns" - }, - { - "type": "NarrativeText", - "element_id": "d9d301e195f57025d88adc0e3e5eca9e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 5, - "links": [ - { - "text": "Back to top ⤴", - "url": "#On-this-page" - } - ] - }, - "text": "Back to top ⤴" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65628.json b/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65628.json deleted file mode 100644 index e85df962c..000000000 --- a/test_unstructured_ingest/expected-structured-output/confluence-diff/~64083457896d10ebd4738661/65628.json +++ /dev/null @@ -1,176 +0,0 @@ -[ - { - "type": "NarrativeText", - "element_id": "68c92682cad5bd43120483571a7ecc5f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote).", - "tag": "span" - } - ] - }, - "text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote)." - }, - { - "type": "Title", - "element_id": "1749e90b61ae5ea8859b927415ecf3de", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "📄 Recent pages that I've worked on" - }, - { - "type": "Title", - "element_id": "0ee6f0f9b05015cb4adbd8b305bec671", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Recently Updated" - }, - { - "type": "ListItem", - "element_id": "90a1ffa1cadd422cfe05d6efa6503e50", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Overview", - "url": "/wiki/spaces/~64083457896d10ebd4738661/overview" - }, - { - "text": "ryan", - "url": "/wiki/display/~64083457896d10ebd4738661" - } - ], - "emphasized_texts": [ - { - "text": "•", - "tag": "span" - } - ] - }, - "text": "Overview\n \n \n Jun 30, 2023 • contributed by ryan" - }, - { - "type": "ListItem", - "element_id": "817b22f2ed823a49b608b969010fd111", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Getting started in Confluence", - "url": "/wiki/spaces/~64083457896d10ebd4738661/pages/65627/Getting+started+in+Confluence" - }, - { - "text": "ryan", - "url": "/wiki/display/~64083457896d10ebd4738661" - } - ], - "emphasized_texts": [ - { - "text": "•", - "tag": "span" - } - ] - }, - "text": "Getting started in Confluence\n \n \n Jun 30, 2023 • contributed by ryan" - }, - { - "type": "ListItem", - "element_id": "82a425617f6ff688335d227aab77adc5", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "ryan", - "url": "/wiki/spaces/~64083457896d10ebd4738661" - }, - { - "text": "ryan", - "url": "/wiki/display/~64083457896d10ebd4738661" - } - ], - "emphasized_texts": [ - { - "text": "•", - "tag": "span" - } - ] - }, - "text": "ryan\n \n \n Jun 30, 2023 • contributed by ryan" - }, - { - "type": "Title", - "element_id": "903f4f48016cafdf332e01f2d004487f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Blog stream" - }, - { - "type": "NarrativeText", - "element_id": "cfe366b5f8c8991d1e2af6e507e35a4e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Create a blog post to share news and announcements with your team and company." - }, - { - "type": "Title", - "element_id": "8fd2c85152eb5e0750f65b52027e68c4", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "🖐 Get in touch" - }, - { - "type": "UncategorizedText", - "element_id": "93b2d3447a7fdd7df7cef8c0d0599d04", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "✉️" - }, - { - "type": "NarrativeText", - "element_id": "4685ab1f21cb52cdf8973f8281fa9915", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "End with a bang! Some options are:", - "tag": "span" - }, - { - "text": "", - "tag": "span" - } - ] - }, - "text": "End with a bang! Some options are: \"I am so grateful to be here at and very excited to get started!\" or \"Looking forward to meeting all of you!\" or \"Can't wait to get to know all of you!\"" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/confluence-diff/~7120205368eedfcecd43e18b25b2221316ee6f/2130016.json b/test_unstructured_ingest/expected-structured-output/confluence-diff/~7120205368eedfcecd43e18b25b2221316ee6f/2130016.json deleted file mode 100644 index d6c2c547d..000000000 --- a/test_unstructured_ingest/expected-structured-output/confluence-diff/~7120205368eedfcecd43e18b25b2221316ee6f/2130016.json +++ /dev/null @@ -1,150 +0,0 @@ -[ - { - "type": "NarrativeText", - "element_id": "68c92682cad5bd43120483571a7ecc5f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote).", - "tag": "span" - } - ] - }, - "text": "Say hello to your colleagues who want to know your name, pronouns, role, team and location (or if you're remote)." - }, - { - "type": "Title", - "element_id": "1749e90b61ae5ea8859b927415ecf3de", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "📄 Recent pages that I've worked on" - }, - { - "type": "Title", - "element_id": "0ee6f0f9b05015cb4adbd8b305bec671", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Recently Updated" - }, - { - "type": "ListItem", - "element_id": "47b53f02cfa28a04fe0d27e5712c0d70", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "Overview", - "url": "/wiki/spaces/~7120205368eedfcecd43e18b25b2221316ee6f/overview" - }, - { - "text": "ahmet", - "url": "/wiki/display/~712020%3A5368eedf-cecd-43e1-8b25-b2221316ee6f" - } - ], - "emphasized_texts": [ - { - "text": "•", - "tag": "span" - } - ] - }, - "text": "Overview\n \n \n Jul 12, 2023 • contributed by ahmet" - }, - { - "type": "ListItem", - "element_id": "f3a94277c2df7e6c07d9b4ebd8d047ac", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "links": [ - { - "text": "ahmet", - "url": "/wiki/spaces/~7120205368eedfcecd43e18b25b2221316ee6f" - }, - { - "text": "ahmet", - "url": "/wiki/display/~712020%3A5368eedf-cecd-43e1-8b25-b2221316ee6f" - } - ], - "emphasized_texts": [ - { - "text": "•", - "tag": "span" - } - ] - }, - "text": "ahmet\n \n \n Jul 12, 2023 • contributed by ahmet" - }, - { - "type": "Title", - "element_id": "903f4f48016cafdf332e01f2d004487f", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Blog stream" - }, - { - "type": "NarrativeText", - "element_id": "cfe366b5f8c8991d1e2af6e507e35a4e", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "Create a blog post to share news and announcements with your team and company." - }, - { - "type": "Title", - "element_id": "8fd2c85152eb5e0750f65b52027e68c4", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "🖐 Get in touch" - }, - { - "type": "UncategorizedText", - "element_id": "93b2d3447a7fdd7df7cef8c0d0599d04", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1 - }, - "text": "✉️" - }, - { - "type": "NarrativeText", - "element_id": "4685ab1f21cb52cdf8973f8281fa9915", - "metadata": { - "data_source": {}, - "filetype": "text/html", - "page_number": 1, - "emphasized_texts": [ - { - "text": "End with a bang! Some options are:", - "tag": "span" - }, - { - "text": "", - "tag": "span" - } - ] - }, - "text": "End with a bang! Some options are: \"I am so grateful to be here at and very excited to get started!\" or \"Looking forward to meeting all of you!\" or \"Can't wait to get to know all of you!\"" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/test-ingest-confluence-diff.sh b/test_unstructured_ingest/test-ingest-confluence-diff.sh index 43e1f89f1..a7c7321f2 100755 --- a/test_unstructured_ingest/test-ingest-confluence-diff.sh +++ b/test_unstructured_ingest/test-ingest-confluence-diff.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -# Description: This test checks if all the processed content is the same as the expected outputs. +# Description: This test checks if all the processed content is the same as the expected outputs SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 @@ -27,6 +27,6 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --url https://unstructured-ingest-test.atlassian.net \ --user-email "$CONFLUENCE_USER_EMAIL" \ --api-token "$CONFLUENCE_API_TOKEN" \ - --max-num-of-spaces 4 + --list-of-spaces testteamsp,MFS \ sh "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME