mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
feat: include images when partitioning html (#3945)
Currently we [filter img
tags](2addb19473/unstructured/partition/html/partition.py (L226-L229)
)
before tags are converted to Elements by the html partitioner. More
importantly we also don’t currently have a defined “block” / mapping to
support these. This adds these mappings and logic to process.
It also respects `extract_image_block_types` and
`extract_image_block_to_payload` (as we do with pdfs) to determine
whether base64 is included in the metadata.
The partitioned Image Elements sets the text to the img tag’s alt text
if available.
The partitioned Image Elements include the [url in the
metadata](https://github.com/Unstructured-IO/unstructured/blob/main/unstructured/documents/elements.py#L209)
(rather than image_base64) if the img tag src is a url.
## Testing
unit tests have been added for explicit coverage.
existing integration tests and other unit test fixtures have been
updated to account for `Image` elements now present
---------
Co-authored-by: ryannikolaidis <ryannikolaidis@users.noreply.github.com>
This commit is contained in:
parent
74b0647aa2
commit
c0457c1cc3
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
||||
## 0.16.26-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
- **Add support for images in html partitioner** `<img>` tags will now be parsed as `Image` elements. When `extract_image_block_types` includes `Image` and `extract_image_block_to_payload`=True then the `image_base64` will be included for images that specify the base64 data (rather than url) as the source.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
## 0.16.25
|
||||
|
||||
### Enhancements
|
||||
|
@ -6,7 +6,7 @@ from __future__ import annotations
|
||||
|
||||
import io
|
||||
import pathlib
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
from lxml import etree
|
||||
@ -24,6 +24,7 @@ from unstructured.cleaners.core import clean_extra_whitespace
|
||||
from unstructured.documents.elements import (
|
||||
Address,
|
||||
CompositeElement,
|
||||
ElementType,
|
||||
ListItem,
|
||||
NarrativeText,
|
||||
Table,
|
||||
@ -296,6 +297,68 @@ def test_it_does_not_extract_text_in_style_tags():
|
||||
assert element.text == "Lorem ipsum dolor"
|
||||
|
||||
|
||||
# -- image parsing behaviors ---------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("extract_to_payload", "extract_types", "expect_base64"),
|
||||
[
|
||||
(True, ["Image"], True),
|
||||
(True, [], False),
|
||||
(True, None, False),
|
||||
(False, ["Image"], False),
|
||||
],
|
||||
)
|
||||
def test_partition_html_base64_for_images(
|
||||
opts_args: dict[str, Any],
|
||||
extract_to_payload: bool,
|
||||
extract_types: Optional[list[str]],
|
||||
expect_base64: bool,
|
||||
):
|
||||
base64 = (
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/"
|
||||
"w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=="
|
||||
)
|
||||
src = "data:image/png;base64," + base64
|
||||
alt_text = "Base64 Image"
|
||||
|
||||
html = f"""
|
||||
<div class="Page">
|
||||
<img src="{src}" alt="{alt_text}">
|
||||
</div>
|
||||
"""
|
||||
opts_args["text"] = html
|
||||
opts_args["extract_image_block_to_payload"] = extract_to_payload
|
||||
opts_args["extract_image_block_types"] = extract_types
|
||||
opts = HtmlPartitionerOptions(**opts_args)
|
||||
(element,) = list(_HtmlPartitioner.iter_elements(opts))
|
||||
|
||||
assert element.category == ElementType.IMAGE
|
||||
assert element.text == alt_text
|
||||
assert element.metadata.image_mime_type == "image/png"
|
||||
if expect_base64:
|
||||
assert element.metadata.image_base64 == base64
|
||||
else:
|
||||
assert element.metadata.image_base64 is None
|
||||
|
||||
|
||||
def test_partition_html_includes_url_for_images():
|
||||
url = "https://example.com/image.png"
|
||||
alt_text = "URL Image"
|
||||
# language=HTML
|
||||
html = f"""
|
||||
<div class="Page">
|
||||
<img src="{url}" alt="{alt_text}">
|
||||
</div>
|
||||
"""
|
||||
(image,) = partition_html(
|
||||
text=html,
|
||||
)
|
||||
assert image.category == ElementType.IMAGE
|
||||
assert image.text == alt_text
|
||||
assert image.metadata.url == url
|
||||
|
||||
|
||||
# -- table parsing behaviors ---------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -210,7 +210,7 @@ def test_auto_partition_epub_from_filename():
|
||||
elements = partition(example_doc_path("winter-sports.epub"), strategy=PartitionStrategy.HI_RES)
|
||||
|
||||
assert len(elements) > 0
|
||||
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
assert elements[2].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
|
||||
|
||||
def test_auto_partition_epub_from_file():
|
||||
@ -218,7 +218,7 @@ def test_auto_partition_epub_from_file():
|
||||
elements = partition(file=f, strategy=PartitionStrategy.HI_RES)
|
||||
|
||||
assert len(elements) > 0
|
||||
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
assert elements[2].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
|
||||
|
||||
# ================================================================================================
|
||||
@ -430,7 +430,7 @@ def test_auto_partition_processes_simple_ndjson(tmp_path: pathlib.Path):
|
||||
def test_partition_md_from_url_works_with_embedded_html():
|
||||
url = "https://raw.githubusercontent.com/Unstructured-IO/unstructured/main/README.md"
|
||||
elements = partition(url=url, content_type="text/markdown", strategy=PartitionStrategy.HI_RES)
|
||||
assert "unstructured" in elements[0].text
|
||||
assert "unstructured" in elements[1].text
|
||||
|
||||
|
||||
# ================================================================================================
|
||||
|
@ -14,14 +14,14 @@ def test_partition_epub_from_filename():
|
||||
|
||||
assert len(elements) > 0
|
||||
assert isinstance(elements[0], Text)
|
||||
assert elements[0].text.startswith("a shared culture")
|
||||
assert elements[1].text.startswith("a shared culture")
|
||||
if UNSTRUCTURED_INCLUDE_DEBUG_METADATA:
|
||||
assert {element.metadata.detection_origin for element in elements} == {"epub"}
|
||||
|
||||
|
||||
def test_partition_epub_from_filename_returns_table_in_elements():
|
||||
elements = partition_epub(example_doc_path("winter-sports.epub"))
|
||||
assert elements[10] == Table(
|
||||
assert elements[12] == Table(
|
||||
"Contents. List of Illustrations (In certain versions of this etext [in certain\nbrowsers]"
|
||||
" clicking on the image will bring up a larger\nversion.) (etext transcriber's note)"
|
||||
)
|
||||
@ -32,7 +32,7 @@ def test_partition_epub_from_file():
|
||||
elements = partition_epub(file=f)
|
||||
|
||||
assert len(elements) > 0
|
||||
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
assert elements[2].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||
|
||||
|
||||
# -- .metadata.filename --------------------------------------------------------------------------
|
||||
|
@ -13,89 +13,98 @@
|
||||
<p class="NarrativeText" id="d36113941235a14bdacafa399698ee71">
|
||||
The overview is the first page visitors will see when they visit your space, so it helps to include some information on what the space is about and what your team is working on.
|
||||
</p>
|
||||
<p class="NarrativeText" id="21e1683c1bc71c40ea20081368bcc7f6">
|
||||
<img alt="" class="Image" id="2051072f068db11d81f2bcbd031f8c19"/>
|
||||
<p class="NarrativeText" id="156af6589ee1a114454df9aa55b88d85">
|
||||
Add a header image. This gives your overview visual appeal and makes it welcoming for visitors.
|
||||
</p>
|
||||
<p class="NarrativeText" id="65f03aec0f3637db38c5a3741968eeff">
|
||||
<p class="NarrativeText" id="618dd7e3cee45b5b0f04847b33879336">
|
||||
Explain what the space is for. Start by summarizing the purpose of the space. This could be your team's mission statement or a brief description of the kind of work you do.
|
||||
</p>
|
||||
<p class="NarrativeText" id="e2522f792c3c5ef32bf1ba342a282fdd">
|
||||
<p class="NarrativeText" id="ca6d9e5f81ae268b7bbf6b62dad3357b">
|
||||
Share team goals. Add links to your team's OKRs, project plans, and product roadmaps so visitors can quickly get a sense of your team's goals.
|
||||
</p>
|
||||
<p class="NarrativeText" id="bd058a2d2c45c92a3178e327564e135a">
|
||||
<p class="NarrativeText" id="cf63812b68970732916946496b13b763">
|
||||
Tell people how to contact you. Share your timezone and links to Slack channels, email aliases, or other contact details your team uses so visitors can contact you with questions or feedback about your team's work.
|
||||
</p>
|
||||
<h1 class="Title" id="eab79997042ec6e273d0a13383347a57">
|
||||
<h1 class="Title" id="82d520e252b220d5c4c6ce29ffb1ade1">
|
||||
Use shortcuts for easy access
|
||||
</h1>
|
||||
<p class="NarrativeText" id="29cdfa9dda669b1dac60890795ab526c">
|
||||
<p class="NarrativeText" id="b2d427efb6bb6f37c4afd368cefab926">
|
||||
Shortcuts are helpful for important pages that members of a space might need to get to often. These shortcuts are added and organized by the space administrator. Space admins can link to pages in the space, other related spaces, or relevant external web content as well as reorder the shortcuts as needed.
|
||||
</p>
|
||||
<h1 class="Title" id="3251fe353cdbb64ce5cf084aef00cd96">
|
||||
<img alt="" class="Image" id="d9f3cfd98a3c67adb56cfafae39d3e03"/>
|
||||
<h1 class="Title" id="84ef673952608f3ba8bc4d2fa9deab59">
|
||||
💭Start discussions with inline comments
|
||||
</h1>
|
||||
<p class="NarrativeText" id="29a93ef334092c2a12daf86b1c1b61fb">
|
||||
<p class="NarrativeText" id="bcb788a54a545e7f1448f6e4dacb91eb">
|
||||
Thoughtful responses can get lost and lose context as email replies pile up. And if you neglect to copy someone or want to add them later on, it's difficult for them to get up to speed. Inline comments allow anyone (or everyone) to huddle around an idea while referencing key information on the project page.
|
||||
</p>
|
||||
<p class="NarrativeText" id="15cc91b0ec273ab28ab202cd5e7836ea">
|
||||
<p class="NarrativeText" id="c9dd716e43dfb450e3ff4cf59a3b5c63">
|
||||
To leave an inline comment, highlight text on the page and the comment icon will appear.
|
||||
</p>
|
||||
<p class="NarrativeText" id="c606d30a11f8686a33c4f5305ab878fa">
|
||||
<img alt="" class="Image" id="46647a4ff2f932d50ca02a1ef0ac51a2"/>
|
||||
<p class="NarrativeText" id="3452f07fead697f48e719306657044a6">
|
||||
Team members with permission to access the page can respond to any comment. Plus, when a comment thread comes to its natural conclusion, comments can be resolved and cleared away.
|
||||
</p>
|
||||
<h1 class="Title" id="9cec5c4cb40b1424590a7d2255ba5d98">
|
||||
<h1 class="Title" id="025ce3293479133863a7a64723611197">
|
||||
👋Loop in team members with @mentions
|
||||
</h1>
|
||||
<p class="NarrativeText" id="158ce46e2f05121666d26652b44ce556">
|
||||
<p class="NarrativeText" id="0fa6faf7cc80d654c319b481e7c7ffce">
|
||||
@mentions on Confluence function like @mentions on social media platforms like Twitter, Instagram, and Slack. Type the @ symbol on a Confluence page or in a comment, begin spelling a team member's first name, and a list will appear. Select the individual to ask a question or assign a task.
|
||||
</p>
|
||||
<h1 class="Title" id="aedbcb95b475418adc9e82fb50e1832f">
|
||||
<img alt="" class="Image" id="df15c1a5963603656576632632e1dced"/>
|
||||
<h1 class="Title" id="964954bfb165e4c1aa687b78fba71144">
|
||||
👏Endorse ideas with reactions
|
||||
</h1>
|
||||
<p class="NarrativeText" id="9dcf5a605331e2e0db925a329a727df8">
|
||||
<p class="NarrativeText" id="fe5335fa2c3bc18a1cbb8425fe071e47">
|
||||
Use reactions when you want to support a comment or acknowledge you've seen one without clogging up the thread with another comment.
|
||||
</p>
|
||||
<p class="NarrativeText" id="a26e40b5555fb394e0844b7ae0118a90">
|
||||
<p class="NarrativeText" id="d336ac79f4cbd3245fad05bfbc4c8f2b">
|
||||
You can also use reactions on a page or blog post. The author of the content will be notified, and if enough team members react or add comments to the content, it'll be surfaced on Confluence home feed
|
||||
</p>
|
||||
<h1 class="Title" id="04dfe464a23b5192ca7465fca96e8a56">
|
||||
<img alt="" class="Image" id="984da83593997e86b62223f8d1b03a62"/>
|
||||
<h1 class="Title" id="9901914d311723f7f14e905d32ee94fd">
|
||||
Take your Confluence space to the next level
|
||||
</h1>
|
||||
<p class="NarrativeText" id="06b459a1ab6ee59cbf44705c24934f15">
|
||||
<p class="NarrativeText" id="30b4b4dc49d65a5a014b40312edbb424">
|
||||
Extend the capabilities of your Confluence pages by adding extra functionality or including dynamic content.
|
||||
</p>
|
||||
<p class="NarrativeText" id="7d4a53bc8e11c662ba62212041b24cf6">
|
||||
<p class="NarrativeText" id="a4d482bff56873324e2f2578c381e971">
|
||||
To add functionality:
|
||||
</p>
|
||||
<p class="NarrativeText" id="29eaf10632e9bd8a0f0c46ac3f6ff876">
|
||||
<p class="NarrativeText" id="f17948e62a99462cb4013796e97eea23">
|
||||
Type ' / ' to open the list of items available to use
|
||||
</p>
|
||||
<p class="NarrativeText" id="885e34b9230d70d0c3257eef2d3f6a0f">
|
||||
<p class="NarrativeText" id="62804fd3619c5c942cf3944315db132c">
|
||||
Find the item to be inserted and select it
|
||||
</p>
|
||||
<p class="UncategorizedText" id="258ee604863fd54e308f2925d07ebd79">
|
||||
<p class="UncategorizedText" id="80ba4f784cb65e206b17b76f79c55818">
|
||||
Select Insert
|
||||
</p>
|
||||
<h1 class="Title" id="04a5e0e0b40cb961c84088dcc67b26b7">
|
||||
<img alt="" class="Image" id="7927a0fdb568097efde58fdd68ed7e0a"/>
|
||||
<h1 class="Title" id="60a261f17ffc821a917909bfb88a6d70">
|
||||
Useful elements for Team space
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="bd4f8d2535746efce21ce872c09ef973">
|
||||
<p class="UncategorizedText" id="39d32e21527ef07823ab779970d88f26">
|
||||
Introduce the team
|
||||
</p>
|
||||
<p class="NarrativeText" id="433789f2b20ca6275f62a944390e3c1d">
|
||||
<p class="NarrativeText" id="fd0d57485d0925b681a03e270faeeb06">
|
||||
Add user profiles to display a short summary of a given Confluence user's profile with their role, profile photo and contact details.
|
||||
</p>
|
||||
<p class="UncategorizedText" id="959ffe89453ca67c279ed576df24e196">
|
||||
<img alt="" class="Image" id="b82b06b66608a8353fc7f99608bd8b08"/>
|
||||
<img alt="" class="Image" id="32ce3055a4b209c2734306d8e7266c08"/>
|
||||
<p class="UncategorizedText" id="2bad3c29ae9bd81da3a1d4c52487b032">
|
||||
Share news and announcements with your team
|
||||
</p>
|
||||
<p class="NarrativeText" id="8b81b2db2cef191090cfa1d4204b8964">
|
||||
<p class="NarrativeText" id="aa92002440f8c5a41323b8f85d131665">
|
||||
Display a stream of latest blog posts so your team can easily see what's been going on.
|
||||
</p>
|
||||
<p class="NarrativeText" id="3fd46bb09e57e95f1211f475c45b575b">
|
||||
<p class="NarrativeText" id="b313e6521d8168c6c840f8113c0ebd27">
|
||||
Display a list of important pages
|
||||
</p>
|
||||
<p class="NarrativeText" id="5cbfe913e369743f1f14830c0b6572ab">
|
||||
<p class="NarrativeText" id="c4bffd5805a6c7d1cb196dcd505f13d1">
|
||||
Paste in page URLs to create smart links, or use the content report table to create a list of all the pages in the space.
|
||||
</p>
|
||||
<img alt="" class="Image" id="15e9a49d1413538015b1fd4d7dee1825"/>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -46,85 +46,91 @@
|
||||
<p class="UncategorizedText" id="9d2ea8da0d1c12bb3616cd3cb4e56128">
|
||||
Add team members to your space.
|
||||
</p>
|
||||
<h1 class="Title" id="8e206800f74b037f87bc91ce09a66587">
|
||||
<img alt="" class="Image" id="11d63c2d51214128c8caebb58f2bf06d"/>
|
||||
<h1 class="Title" id="3d68b97296629da6f56dbee7226fb9ea">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="2c4cc93ed9393b0f05a3e564c436e13e">
|
||||
<p class="UncategorizedText" id="b14012a7e1df00e14688673e6836af91">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="554c2527470d9fea2aaf8cefd8aa8ffc">
|
||||
<p class="UncategorizedText" id="2ee3fe067727e804a8089f8c0131cd7e">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="feb3b3be79c77e3d661dc3fa522de26f">
|
||||
<img alt="" class="Image" id="e206acc35c25cd275875533feb308ecf"/>
|
||||
<h1 class="Title" id="e9f3973e622aaacb42556e6f29d140c0">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="5a73ff028549542468675768deee0430">
|
||||
<p class="UncategorizedText" id="2b43cb7e0a29b1411d109e9a682940fa">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="94d211691238a7f3f74db151876c6734">
|
||||
<p class="UncategorizedText" id="3560a31004a2e271125262ae3435cd80">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="198d8ad5606c445ba4dcafd19926c65e">
|
||||
<img alt="" class="Image" id="48a5d1f209c8025b1cfb1d882658743e"/>
|
||||
<h1 class="Title" id="64c696a8ba912e8c86e3dacc55bcfd09">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="776f1a1125f787afd3d193ede37edbf3">
|
||||
<p class="UncategorizedText" id="60781a8a6086a335e6ef8efa6e767f74">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="7d9faf5ffc93c10998801ec69e82969d">
|
||||
<p class="UncategorizedText" id="47137487152e9d98851e213658f3b212">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="46bdd16cf46259b25d67480f1467e0b0">
|
||||
<h1 class="Title" id="5189c62c2edeed476df22eaa2bb5af21">
|
||||
Contact us
|
||||
</h1>
|
||||
<p class="NarrativeText" id="80dadf7b66548e15b0b7f73c59ee50cf">
|
||||
<p class="NarrativeText" id="43e843feeaed82e03996b90693f9c8eb">
|
||||
How can someone reach out to your team?
|
||||
</p>
|
||||
<div class="EmailAddress" id="23168bef3f665803fb9ec74644a65674">
|
||||
<div class="EmailAddress" id="0bae84d0e5cdc716a1dce4f739b86469">
|
||||
team@email.com
|
||||
</div>
|
||||
<p class="UncategorizedText" id="02510c1509479158e837ac5d13f84bf5">
|
||||
<p class="UncategorizedText" id="4d103f0c3f7f3527c37f34a8c4e86782">
|
||||
Tickets
|
||||
</p>
|
||||
<p class="UncategorizedText" id="c59943bccf5535ffd752fe52a2f6a184">
|
||||
<p class="UncategorizedText" id="deda95e4491b693fdb7bb978868beefd">
|
||||
Jira board
|
||||
</p>
|
||||
<p class="UncategorizedText" id="21d150625554235f8fe3270ed63d2921">
|
||||
<p class="UncategorizedText" id="e35c7cd3ecffe9ca0e65935f3feebfbd">
|
||||
#channel
|
||||
</p>
|
||||
<h1 class="Title" id="29c4e13f95e215957a8d697601c3d1cc">
|
||||
<h1 class="Title" id="f953d1e45bf1cf4cd4985b61255a41e3">
|
||||
Important Pages
|
||||
</h1>
|
||||
<p class="NarrativeText" id="8bdacdf1a36489a491926616432b7b8e">
|
||||
<p class="NarrativeText" id="53c5427b05c4256bd7c7e03346e58b9f">
|
||||
List them here
|
||||
</p>
|
||||
<h1 class="Title" id="68accd9d0365712f54b96da661cce03d">
|
||||
<img alt="" class="Image" id="6e5310473567927ff094c33ba42ff201"/>
|
||||
<h1 class="Title" id="a139fb30a2382364053eb57aa180550f">
|
||||
Onboarding FAQs
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="35aa0d02a38ad72c0ca0534155dbdeb8">
|
||||
<p class="UncategorizedText" id="eb784ba0d48bf9e06b53aed2ac3fbd72">
|
||||
Add resources for new hires
|
||||
</p>
|
||||
<h1 class="Title" id="ea538f1ebdd2ced67e8c86dcf50bc164">
|
||||
<img alt="" class="Image" id="768cfb8a51125da06add3109e7d155b3"/>
|
||||
<h1 class="Title" id="67503783d98953e33cdc2846b90c21fd">
|
||||
Meeting notes
|
||||
</h1>
|
||||
<p class="NarrativeText" id="6f4ae84a8d8a1d9005384f35e2ce793c">
|
||||
<p class="NarrativeText" id="27194483431e4365b86572cbc73b9af5">
|
||||
Add links to meeting notes
|
||||
</p>
|
||||
<h1 class="Title" id="9616030a71ad0e0654b28e61578d0443">
|
||||
<img alt="" class="Image" id="c941c078ee573a2bbca654a7b5ce68f4"/>
|
||||
<h1 class="Title" id="885ad7169d419802971c64780c7a7968">
|
||||
Team goals
|
||||
</h1>
|
||||
<p class="NarrativeText" id="d81cb76df56721595c0495e4f5e6094f">
|
||||
<p class="NarrativeText" id="4d12c0c0f2d8211bc2b3eae35ac4f854">
|
||||
List them here
|
||||
</p>
|
||||
<h1 class="Title" id="46c3bd98dbea47cb63923597c929b932">
|
||||
<h1 class="Title" id="019ded9026166e1794b589358870fe60">
|
||||
Team news
|
||||
</h1>
|
||||
<p class="NarrativeText" id="1558d5e9d97c1cbb5cbb5cb2b077f83d">
|
||||
<p class="NarrativeText" id="7cdd15b42c50cc95a64aa83149e72aec">
|
||||
Create a blog post to share team news. It will automatically appear here once it's published.
|
||||
</p>
|
||||
<h1 class="Title" id="c281ed85f2e1125c9aaf318fd5178d4d">
|
||||
<h1 class="Title" id="008813f1d7a4380879ff001294f8bc6e">
|
||||
Blog stream
|
||||
</h1>
|
||||
<p class="NarrativeText" id="4b401fd3bc190fce17f70000e0164772">
|
||||
<p class="NarrativeText" id="2a28d14ef4ba44c8f0098df26a520f23">
|
||||
Create a blog post to share news and announcements with your team and company.
|
||||
</p>
|
||||
</body>
|
||||
|
@ -46,85 +46,91 @@
|
||||
<p class="UncategorizedText" id="af3236ec30847a0d5e80d5c4c48d24b3">
|
||||
Add team members to your space.
|
||||
</p>
|
||||
<h1 class="Title" id="93eecf0cb223bb9b38800c595a2c1ce2">
|
||||
<img alt="" class="Image" id="33831fbc138ef739d88d4f83b4cfc58d"/>
|
||||
<h1 class="Title" id="240725efee18f416b470f886d83e54a3">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="75ee4a303fc5ab8639c7bca973f29e30">
|
||||
<p class="UncategorizedText" id="a8359a51dc7bc16fc9f2f412dfad01d7">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="22731d9c17747fc4708fd7f418e9dd57">
|
||||
<p class="UncategorizedText" id="4d2982f8ec1f943ba5887ea5e1c41722">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="c4327bb8ec4ea8444a6307fcdf6928cd">
|
||||
<img alt="" class="Image" id="1709eac9e1289421c96b86fa773e85ba"/>
|
||||
<h1 class="Title" id="8e408d997b6afdcc6dc7c5d2f60d51fe">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="aa48062270f019242d68093284c4fa0c">
|
||||
<p class="UncategorizedText" id="f86b21d5900d7c26053ce0d49624e22b">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="4bdb6fa86fd59b0729ecb9b6dbbf1ba7">
|
||||
<p class="UncategorizedText" id="ad6b52393cba4295aa11d461df801ec9">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="07671349c39424db27fcf99634ed95d2">
|
||||
<img alt="" class="Image" id="3fa16ff3939638c6415d5d1367aa01be"/>
|
||||
<h1 class="Title" id="92cda6e10ddc39a6274a39bd28d78fd6">
|
||||
Team member
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="bb14e5c4bda33439f627d9d0484b603c">
|
||||
<p class="UncategorizedText" id="c6bb501cb86fef4a7e6af33b44408860">
|
||||
Role
|
||||
</p>
|
||||
<p class="UncategorizedText" id="a85a7425fe31f85a4aa6ae0a3d5c4251">
|
||||
<p class="UncategorizedText" id="1e867147aebd2e2042c0b79216eb8ad6">
|
||||
Responsibility
|
||||
</p>
|
||||
<h1 class="Title" id="c250d32242e3900d71e3dc6a4a6ac3c4">
|
||||
<h1 class="Title" id="72969103d9798a14b6937a5f17e95250">
|
||||
Contact us
|
||||
</h1>
|
||||
<p class="NarrativeText" id="2cba66c761cce97def3ee35ad7e841a1">
|
||||
<p class="NarrativeText" id="a1f62f9caaa9e0ab38abfecc9992beb6">
|
||||
How can someone reach out to your team?
|
||||
</p>
|
||||
<div class="EmailAddress" id="bb593264cda1392498158b2ce65053ac">
|
||||
<div class="EmailAddress" id="d6507473bd42ae2c5043ef9682f5b71f">
|
||||
team@email.com
|
||||
</div>
|
||||
<p class="UncategorizedText" id="ad4aa408f6abd52bd1e2adf149fed96d">
|
||||
<p class="UncategorizedText" id="d68042b1765da182a599d7f147d2abef">
|
||||
Tickets
|
||||
</p>
|
||||
<p class="UncategorizedText" id="f98b3b59b55313381052f1cfa1194bc5">
|
||||
<p class="UncategorizedText" id="717b067188e80741597eb37455bf4fbe">
|
||||
Jira board
|
||||
</p>
|
||||
<p class="UncategorizedText" id="a9f4432dce00417cc8a4c304e424c28b">
|
||||
<p class="UncategorizedText" id="16455e060585b3e0817764ca31c32151">
|
||||
#channel
|
||||
</p>
|
||||
<h1 class="Title" id="3716407dd9d7c3bc756ab8ee46ea7770">
|
||||
<h1 class="Title" id="f773ae2bc874cb28cff580d0b63a627a">
|
||||
Important Pages
|
||||
</h1>
|
||||
<p class="NarrativeText" id="6fda3a7478f59f5290ac529d13bbceaf">
|
||||
<p class="NarrativeText" id="8a7363b7d1eb2cb37430121d27168de0">
|
||||
List them here
|
||||
</p>
|
||||
<h1 class="Title" id="f4186b4e1cec5ef7009560d11cb74087">
|
||||
<img alt="" class="Image" id="030568cacd3b66ce8ee6c6c3c9be840f"/>
|
||||
<h1 class="Title" id="fd9d745f22dffbb155b2e8022e2dc2e4">
|
||||
Onboarding FAQs
|
||||
</h1>
|
||||
<p class="UncategorizedText" id="a27a3099dea44c05dfea1e0e125abac5">
|
||||
<p class="UncategorizedText" id="71d0ef13e2b308bf6c79c3153f3ed35f">
|
||||
Add resources for new hires
|
||||
</p>
|
||||
<h1 class="Title" id="44d083c5ce62947d874c568db0dbc01b">
|
||||
<img alt="" class="Image" id="7e882f807cf95f54e80ea3d7b75f6edd"/>
|
||||
<h1 class="Title" id="16fda0efe288d0c8d1cf18b1037b5b0e">
|
||||
Meeting notes
|
||||
</h1>
|
||||
<p class="NarrativeText" id="23d9d3b7eb1b506a1031e99b28243136">
|
||||
<p class="NarrativeText" id="8bf5be7f0d4a4b5248347885f68f6b89">
|
||||
Add links to meeting notes
|
||||
</p>
|
||||
<h1 class="Title" id="5d12aca2ca2b8aba5c9dee48f1475f55">
|
||||
<img alt="" class="Image" id="56b696bc7b11d0f3e1165cb157426dcc"/>
|
||||
<h1 class="Title" id="c6fe156426f03a42912623025777f8c8">
|
||||
Team goals
|
||||
</h1>
|
||||
<p class="NarrativeText" id="70182a5acbdac0041ee51b85dfca692f">
|
||||
<p class="NarrativeText" id="d8f7425068e3b4e6e99affa00d268060">
|
||||
List them here
|
||||
</p>
|
||||
<h1 class="Title" id="243fc77b8eebdbcf00a6a108a8159b69">
|
||||
<h1 class="Title" id="e4589df20d851e29530dbf5f97444eca">
|
||||
Team news
|
||||
</h1>
|
||||
<p class="NarrativeText" id="3014e5236eb14590a7c13e83c36b20ce">
|
||||
<p class="NarrativeText" id="37a3e4a1755417a6944ff64115257147">
|
||||
Create a blog post to share team news. It will automatically appear here once it's published.
|
||||
</p>
|
||||
<h1 class="Title" id="800f984e0d3456624dce9630abfd873a">
|
||||
<h1 class="Title" id="e26ff7fd8e8e12c8aa704e6f97275fbf">
|
||||
Blog stream
|
||||
</h1>
|
||||
<p class="NarrativeText" id="800885acdda14ccb63621293f9a3aa2f">
|
||||
<p class="NarrativeText" id="18220fb2182492f64b3504513de4fbef">
|
||||
Create a blog post to share news and announcements with your team and company.
|
||||
</p>
|
||||
</body>
|
||||
|
@ -49,64 +49,65 @@
|
||||
<p class="UncategorizedText" id="ca6e8673360d0f9a946786edc086f26e">
|
||||
testtext3 testtext3 testtext3 testtext3 testtext3 testtext3 testtext3 testtext3 testtext3 testtext3
|
||||
</p>
|
||||
<p class="UncategorizedText" id="d0b45e375f3a7207caacb7be289ebd62">
|
||||
<img alt="" class="Image" id="f8085d2948c73dfb968f7b221f3e8fab"/>
|
||||
<p class="UncategorizedText" id="48d494bb12fd182b0106bff99dd2e3be">
|
||||
Testdoc3 List Item 1
|
||||
</p>
|
||||
<p class="UncategorizedText" id="351fc6ff4a9a491bf863ed7aa20fd5c5">
|
||||
<p class="UncategorizedText" id="3f1b3ecb6515a47b94579cf7de892f09">
|
||||
Testdoc3 List Item 1 Nested Item A
|
||||
</p>
|
||||
<p class="UncategorizedText" id="6688bffe9c19dca7cb61ee039a6ffa10">
|
||||
<p class="UncategorizedText" id="171423f703a966d2616837ed489f6975">
|
||||
Testdoc3 List Item 1 Nested Item B
|
||||
</p>
|
||||
<p class="UncategorizedText" id="e50d0b83f51c65bda0620ccec0368a41">
|
||||
<p class="UncategorizedText" id="87daeeb71306ae76a90c0e6ccac0dd47">
|
||||
Testdoc3 List Item 2
|
||||
</p>
|
||||
<p class="UncategorizedText" id="91697f192743d0583d02cb3e232d3c83">
|
||||
<p class="UncategorizedText" id="c4d15cc61c5d6a3f2350f758b82e487f">
|
||||
Testdoc3 List Item 3
|
||||
</p>
|
||||
<p class="UncategorizedText" id="7e1f204c284d5e878639feca87a022c4">
|
||||
<p class="UncategorizedText" id="e069a6333ef83f6f250880a500439da3">
|
||||
Testdoc3 List Item 4
|
||||
</p>
|
||||
<p class="UncategorizedText" id="24fc2762132dbbf33824a2c8575f6c14">
|
||||
<p class="UncategorizedText" id="2f030590e85c72dd4a2fc739cc05affe">
|
||||
Testdoc3 List Item 5
|
||||
</p>
|
||||
<p class="NarrativeText" id="c8d91bf0f74cf2d7474b81fa319cc0e5">
|
||||
<p class="NarrativeText" id="2db738cf60bf0471df90b6141fc6a8e5">
|
||||
This is the link for unstructured . io.
|
||||
</p>
|
||||
<li class="ListItem" id="ce274d7699a4270e902e3617c7cf6e36">
|
||||
<li class="ListItem" id="ab3005ca05b4f48396361646916154b4">
|
||||
Testdoc3 Checklist Item 1
|
||||
</li>
|
||||
<li class="ListItem" id="b54b171c49bbdb6f51308ff765b7f121">
|
||||
<li class="ListItem" id="ac70c0a823f0a1d56777036e77e77fd9">
|
||||
Testdoc3 Checklist Item 2 (checked)
|
||||
</li>
|
||||
<li class="ListItem" id="712a4752864712c0ec58730edb76b2f1">
|
||||
<li class="ListItem" id="099fca1cec6f3eaa5f71ed9c2ed235e4">
|
||||
Testdoc3 Checklist Item 3
|
||||
</li>
|
||||
<p class="UncategorizedText" id="f2e9daed509db420ecf36984f431900f">
|
||||
<p class="UncategorizedText" id="85d4a299ad3ee61201530bf0030808b1">
|
||||
😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃
|
||||
</p>
|
||||
<p class="NarrativeText" id="3cb5f4888419631affdc50af8f020348">
|
||||
<p class="NarrativeText" id="a1a4f27d3b3cc32777e25b3bb0766083">
|
||||
Testdoc3 bold text
|
||||
</p>
|
||||
<p class="UncategorizedText" id="9f5d86ea05eeb0bf570d9141a5b8994b">
|
||||
<p class="UncategorizedText" id="976749a5d532d1f18195d61fe8c04be3">
|
||||
Testdoc3 italic text
|
||||
</p>
|
||||
<h1 class="Title" id="64ba474681b32c7dbc2a00fb9ec3e757">
|
||||
<h1 class="Title" id="079d83c4a7622c70baab0336e3128ec4">
|
||||
Testdoc3 Heading 1 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="2d4a5727cd260bb321af0f777e2e699f">
|
||||
<h1 class="Title" id="68e58e6fec19f4ec291fd5bcca3dadd8">
|
||||
Testdoc3 Heading 2 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="0a5dc104636145b04136d2eee7c4469b">
|
||||
<h1 class="Title" id="8dbaefc9dcf7af80f14871cdd2b0c1d6">
|
||||
Testdoc3 Heading 3 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="470c624d2702678b94685d916908136a">
|
||||
<h1 class="Title" id="2ca9400f81b794c7d595f9bcd719b18a">
|
||||
Testdoc3 Heading 4 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="bd8ca6285c9717384bd456685550304b">
|
||||
<h1 class="Title" id="191672694e36e804e20214e8c9bd2d44">
|
||||
Testdoc3 Heading 5 Sized Text
|
||||
</h1>
|
||||
<table class="Table" id="99613e61c98f3e61b28c98d096524077" style="border: 1px solid black; border-collapse: collapse;">
|
||||
<table class="Table" id="10d98c7ebc4b2cd6e5508aa9563cc788" style="border: 1px solid black; border-collapse: collapse;">
|
||||
<tr style="border: 1px solid black;">
|
||||
<td style="border: 1px solid black;">
|
||||
Testdoc3 Table: Column 1 Row 0
|
||||
@ -141,5 +142,6 @@
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<img alt="" class="Image" id="45b142b9287e8ebadbc1dfb48ee5245a"/>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -49,64 +49,65 @@
|
||||
<p class="UncategorizedText" id="caab6974e98b9e03c78191c02591775e">
|
||||
testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2
|
||||
</p>
|
||||
<p class="UncategorizedText" id="a931e049fc3bd99cf74ef09502a71938">
|
||||
<img alt="" class="Image" id="61525bb0c406b033be2849cb375e9fcd"/>
|
||||
<p class="UncategorizedText" id="7e8cf6622ad36f28966178e194feaad8">
|
||||
Testdoc2 List Item 1
|
||||
</p>
|
||||
<p class="UncategorizedText" id="59e566b7776eba69071658b586226bd0">
|
||||
<p class="UncategorizedText" id="eeb9717b5c634b6bb4b4bb1b83101500">
|
||||
Testdoc2 List Item 1 Nested Item A
|
||||
</p>
|
||||
<p class="UncategorizedText" id="971bd18c2de3ef14a26ba1d8e4ef8668">
|
||||
<p class="UncategorizedText" id="c5bc1b1b90fd002ca136bc3d76e3d482">
|
||||
Testdoc2 List Item 1 Nested Item B
|
||||
</p>
|
||||
<p class="UncategorizedText" id="c688b4f7d2e49c8d8d7c77d28ddf5ecc">
|
||||
<p class="UncategorizedText" id="9ecb8d523cdd75be94294fa2ca440799">
|
||||
Testdoc2 List Item 2
|
||||
</p>
|
||||
<p class="UncategorizedText" id="9ce074ac38046f414a5f16cd9c7308b3">
|
||||
<p class="UncategorizedText" id="4909ea47e524d3b8fbac470c0663f589">
|
||||
Testdoc2 List Item 3
|
||||
</p>
|
||||
<p class="UncategorizedText" id="a4fbf964d1efe50e1c1ee181b453d4d6">
|
||||
<p class="UncategorizedText" id="ab916d1a2aa844a463b48a24637a8b2d">
|
||||
Testdoc2 List Item 4
|
||||
</p>
|
||||
<p class="UncategorizedText" id="b7c108f30be7dfb550213536c197e563">
|
||||
<p class="UncategorizedText" id="a9060bb59dfa0b37c1ec13e97228470a">
|
||||
Testdoc2 List Item 5
|
||||
</p>
|
||||
<p class="NarrativeText" id="9e78d28a1e5c130197f6a909ec74c987">
|
||||
<p class="NarrativeText" id="0dbff111f50ea5a58eee83c85a1c30b5">
|
||||
This is the link for unstructured . io.
|
||||
</p>
|
||||
<li class="ListItem" id="d8ae65b075a2f46c394461d4e393f0d5">
|
||||
<li class="ListItem" id="ed95477f3bff4586983201b6387c875b">
|
||||
Testdoc2 Checklist Item 1
|
||||
</li>
|
||||
<li class="ListItem" id="7f3784563903fdf80ca26e027ca7376d">
|
||||
<li class="ListItem" id="260cee6fb6f1562e7b1cb0f7644ea64d">
|
||||
Testdoc2 Checklist Item 2 (checked)
|
||||
</li>
|
||||
<li class="ListItem" id="81f723fb10893947353084829f8b5f68">
|
||||
<li class="ListItem" id="6820bf233e6604b7bd6750d0d2b62192">
|
||||
Testdoc2 Checklist Item 3
|
||||
</li>
|
||||
<p class="UncategorizedText" id="a28747bf65c9c6ad4981e57ec35822a3">
|
||||
<p class="UncategorizedText" id="8f13efbe5d5b289c6ce8eb00e2b5fae2">
|
||||
😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃
|
||||
</p>
|
||||
<p class="NarrativeText" id="ec5dbc92af9cfee5f32dba0e9919b1f7">
|
||||
<p class="NarrativeText" id="ecc13666f56ab3fb01917335016cd9c3">
|
||||
Testdoc2 bold text
|
||||
</p>
|
||||
<p class="UncategorizedText" id="f09110aa418d33cbaccc7b380e0fe0c6">
|
||||
<p class="UncategorizedText" id="93ec1210a0bf6e8b0c6c8504648e7489">
|
||||
Testdoc2 italic text
|
||||
</p>
|
||||
<h1 class="Title" id="fa11e4585afb53a4d046e095f08ac084">
|
||||
<h1 class="Title" id="f3c0e02138cb55302a075d5508843876">
|
||||
Testdoc2 Heading 1 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="2f06add07bf5f930085d334e1d1fdb6c">
|
||||
<h1 class="Title" id="5bc45f2fc513158f644f2c217cc9e54d">
|
||||
Testdoc2 Heading 2 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="0c493dc4e25a1447702be5bd7d8a156f">
|
||||
<h1 class="Title" id="81bebaf32dff5511a7856e553b526fa3">
|
||||
Testdoc2 Heading 3 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="dcf629a3cf73037815d0b85bf9878bd5">
|
||||
<h1 class="Title" id="833cc590b6815b09fd40d1d73a752420">
|
||||
Testdoc2 Heading 4 Sized Text
|
||||
</h1>
|
||||
<h1 class="Title" id="31d39e1ce259ec5bc37463b03c993697">
|
||||
<h1 class="Title" id="bff43fdce7563ccbd5cf3354090e8cf3">
|
||||
Testdoc2 Heading 5 Sized Text
|
||||
</h1>
|
||||
<table class="Table" id="8083af07d9148f975b439cdb91a216cf" style="border: 1px solid black; border-collapse: collapse;">
|
||||
<table class="Table" id="69b2cf7ade2f1034892b2b38b186fdaa" style="border: 1px solid black; border-collapse: collapse;">
|
||||
<tr style="border: 1px solid black;">
|
||||
<td style="border: 1px solid black;">
|
||||
Testdoc2 Table: Column 1 Row 0
|
||||
@ -141,5 +142,6 @@
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<img alt="" class="Image" id="825a8cbb41eecc2f1b29d4b34cb05c2f"/>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -164,19 +164,20 @@
|
||||
<p class="NarrativeText" id="fa3e9d761730605036aaf854d9edd5b4">
|
||||
Heading 1 content
|
||||
</p>
|
||||
<p class="UncategorizedText" id="c087a92c7251ca836ff023d35cb0a1aa">
|
||||
<img alt="" class="Image" id="387c4d334f8e9650a56b3b444b2ad5f6"/>
|
||||
<p class="UncategorizedText" id="60d9f47b086264ea72277b741e3b2bdd">
|
||||
d3d87fc6-61cc-4bb5-89ed-e9dff0df1526
|
||||
</p>
|
||||
<p class="UncategorizedText" id="3126a68fa0a12481ca6dc64c16511a7e">
|
||||
<p class="UncategorizedText" id="b39f61345657ccc5e201c20a6a90fad7">
|
||||
Stuff todo
|
||||
</p>
|
||||
<p class="UncategorizedText" id="8cfa5b216c8d3f774f8e1def029681e6">
|
||||
<p class="UncategorizedText" id="b95452fe8c6616a1ce1311457526c302">
|
||||
more stuff todo
|
||||
</p>
|
||||
<p class="NarrativeText" id="b538abdbf0aff3f9f1ab11d79bb5bc26">
|
||||
<p class="NarrativeText" id="a7c3ee9360b2020e28aa31835ef5283c">
|
||||
More things to do
|
||||
</p>
|
||||
<p class="NarrativeText" id="570c50d8758c5639a1dfd0f238f609d5">
|
||||
<p class="NarrativeText" id="349f058fcce7e32bb68b620841f40c9e">
|
||||
Something to do
|
||||
</p>
|
||||
</body>
|
||||
|
@ -10,5 +10,6 @@
|
||||
<p class="NarrativeText" id="4196fe41da19e8657761ecffcafd3d2f">
|
||||
Jane. This is a test of sending you an email from Salesforce! _____________________________________________________________________ Powered by Salesforce http://www.salesforce.com/
|
||||
</p>
|
||||
<img alt="" class="Image" id="f714fa214dac2f441515c4f28370d279"/>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -10,5 +10,6 @@
|
||||
<p class="NarrativeText" id="6f168cd430b41fc0d66a3691ef3caa0f">
|
||||
Hey Sean. Testing email parsing here. Type: email Just testing the email system _____________________________________________________________________ Powered by Salesforce http://www.salesforce.com/
|
||||
</p>
|
||||
<img alt="" class="Image" id="68870d055535f48c7439ce67092768f6"/>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -42,7 +42,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "21e1683c1bc71c40ea20081368bcc7f6",
|
||||
"element_id": "2051072f068db11d81f2bcbd031f8c19",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "156af6589ee1a114454df9aa55b88d85",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -69,7 +90,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "65f03aec0f3637db38c5a3741968eeff",
|
||||
"element_id": "618dd7e3cee45b5b0f04847b33879336",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -96,7 +117,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "e2522f792c3c5ef32bf1ba342a282fdd",
|
||||
"element_id": "ca6d9e5f81ae268b7bbf6b62dad3357b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -133,7 +154,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "bd058a2d2c45c92a3178e327564e135a",
|
||||
"element_id": "cf63812b68970732916946496b13b763",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -160,7 +181,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "eab79997042ec6e273d0a13383347a57",
|
||||
"element_id": "82d520e252b220d5c4c6ce29ffb1ade1",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -181,7 +202,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "29cdfa9dda669b1dac60890795ab526c",
|
||||
"element_id": "b2d427efb6bb6f37c4afd368cefab926",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -202,7 +223,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "3251fe353cdbb64ce5cf084aef00cd96",
|
||||
"element_id": "d9f3cfd98a3c67adb56cfafae39d3e03",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "84ef673952608f3ba8bc4d2fa9deab59",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -223,7 +265,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "29a93ef334092c2a12daf86b1c1b61fb",
|
||||
"element_id": "bcb788a54a545e7f1448f6e4dacb91eb",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -250,7 +292,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "15cc91b0ec273ab28ab202cd5e7836ea",
|
||||
"element_id": "c9dd716e43dfb450e3ff4cf59a3b5c63",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -271,7 +313,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "c606d30a11f8686a33c4f5305ab878fa",
|
||||
"element_id": "46647a4ff2f932d50ca02a1ef0ac51a2",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "3452f07fead697f48e719306657044a6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -292,7 +355,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "9cec5c4cb40b1424590a7d2255ba5d98",
|
||||
"element_id": "025ce3293479133863a7a64723611197",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -313,7 +376,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "158ce46e2f05121666d26652b44ce556",
|
||||
"element_id": "0fa6faf7cc80d654c319b481e7c7ffce",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -340,7 +403,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "aedbcb95b475418adc9e82fb50e1832f",
|
||||
"element_id": "df15c1a5963603656576632632e1dced",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "964954bfb165e4c1aa687b78fba71144",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -361,7 +445,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "9dcf5a605331e2e0db925a329a727df8",
|
||||
"element_id": "fe5335fa2c3bc18a1cbb8425fe071e47",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -382,7 +466,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "a26e40b5555fb394e0844b7ae0118a90",
|
||||
"element_id": "d336ac79f4cbd3245fad05bfbc4c8f2b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -403,7 +487,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "04dfe464a23b5192ca7465fca96e8a56",
|
||||
"element_id": "984da83593997e86b62223f8d1b03a62",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "9901914d311723f7f14e905d32ee94fd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -424,7 +529,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "06b459a1ab6ee59cbf44705c24934f15",
|
||||
"element_id": "30b4b4dc49d65a5a014b40312edbb424",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -445,7 +550,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "7d4a53bc8e11c662ba62212041b24cf6",
|
||||
"element_id": "a4d482bff56873324e2f2578c381e971",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -472,7 +577,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "29eaf10632e9bd8a0f0c46ac3f6ff876",
|
||||
"element_id": "f17948e62a99462cb4013796e97eea23",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -493,7 +598,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "885e34b9230d70d0c3257eef2d3f6a0f",
|
||||
"element_id": "62804fd3619c5c942cf3944315db132c",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -514,7 +619,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "258ee604863fd54e308f2925d07ebd79",
|
||||
"element_id": "80ba4f784cb65e206b17b76f79c55818",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -541,7 +646,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "04a5e0e0b40cb961c84088dcc67b26b7",
|
||||
"element_id": "7927a0fdb568097efde58fdd68ed7e0a",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "60a261f17ffc821a917909bfb88a6d70",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -562,7 +688,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "bd4f8d2535746efce21ce872c09ef973",
|
||||
"element_id": "39d32e21527ef07823ab779970d88f26",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -589,7 +715,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "433789f2b20ca6275f62a944390e3c1d",
|
||||
"element_id": "fd0d57485d0925b681a03e270faeeb06",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -616,7 +742,49 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "959ffe89453ca67c279ed576df24e196",
|
||||
"element_id": "b82b06b66608a8353fc7f99608bd8b08",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "32ce3055a4b209c2734306d8e7266c08",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "2bad3c29ae9bd81da3a1d4c52487b032",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -643,7 +811,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "8b81b2db2cef191090cfa1d4204b8964",
|
||||
"element_id": "aa92002440f8c5a41323b8f85d131665",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -670,7 +838,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "3fd46bb09e57e95f1211f475c45b575b",
|
||||
"element_id": "b313e6521d8168c6c840f8113c0ebd27",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -697,7 +865,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "5cbfe913e369743f1f14830c0b6572ab",
|
||||
"element_id": "c4bffd5805a6c7d1cb196dcd505f13d1",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
@ -722,5 +890,26 @@
|
||||
},
|
||||
"text": "Paste in page URLs to create smart links, or use the content report table to create a list of all the pages in the space.",
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "15e9a49d1413538015b1fd4d7dee1825",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:45.288000",
|
||||
"date_modified": "2023-07-09T12:54:45.288000",
|
||||
"record_locator": {
|
||||
"page_id": "1605956",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605956",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
}
|
||||
]
|
@ -315,7 +315,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "8e206800f74b037f87bc91ce09a66587",
|
||||
"element_id": "11d63c2d51214128c8caebb58f2bf06d",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "3d68b97296629da6f56dbee7226fb9ea",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -336,7 +357,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "2c4cc93ed9393b0f05a3e564c436e13e",
|
||||
"element_id": "b14012a7e1df00e14688673e6836af91",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -357,7 +378,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "554c2527470d9fea2aaf8cefd8aa8ffc",
|
||||
"element_id": "2ee3fe067727e804a8089f8c0131cd7e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -378,7 +399,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "feb3b3be79c77e3d661dc3fa522de26f",
|
||||
"element_id": "e206acc35c25cd275875533feb308ecf",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "e9f3973e622aaacb42556e6f29d140c0",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -399,7 +441,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "5a73ff028549542468675768deee0430",
|
||||
"element_id": "2b43cb7e0a29b1411d109e9a682940fa",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -420,7 +462,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "94d211691238a7f3f74db151876c6734",
|
||||
"element_id": "3560a31004a2e271125262ae3435cd80",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -441,7 +483,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "198d8ad5606c445ba4dcafd19926c65e",
|
||||
"element_id": "48a5d1f209c8025b1cfb1d882658743e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "64c696a8ba912e8c86e3dacc55bcfd09",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -462,7 +525,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "776f1a1125f787afd3d193ede37edbf3",
|
||||
"element_id": "60781a8a6086a335e6ef8efa6e767f74",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -483,7 +546,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "7d9faf5ffc93c10998801ec69e82969d",
|
||||
"element_id": "47137487152e9d98851e213658f3b212",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -504,7 +567,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "46bdd16cf46259b25d67480f1467e0b0",
|
||||
"element_id": "5189c62c2edeed476df22eaa2bb5af21",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -525,7 +588,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "80dadf7b66548e15b0b7f73c59ee50cf",
|
||||
"element_id": "43e843feeaed82e03996b90693f9c8eb",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -552,7 +615,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "23168bef3f665803fb9ec74644a65674",
|
||||
"element_id": "0bae84d0e5cdc716a1dce4f739b86469",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -579,7 +642,7 @@
|
||||
"type": "EmailAddress"
|
||||
},
|
||||
{
|
||||
"element_id": "02510c1509479158e837ac5d13f84bf5",
|
||||
"element_id": "4d103f0c3f7f3527c37f34a8c4e86782",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -600,7 +663,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c59943bccf5535ffd752fe52a2f6a184",
|
||||
"element_id": "deda95e4491b693fdb7bb978868beefd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -621,7 +684,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "21d150625554235f8fe3270ed63d2921",
|
||||
"element_id": "e35c7cd3ecffe9ca0e65935f3feebfbd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -642,7 +705,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "29c4e13f95e215957a8d697601c3d1cc",
|
||||
"element_id": "f953d1e45bf1cf4cd4985b61255a41e3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -663,7 +726,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "8bdacdf1a36489a491926616432b7b8e",
|
||||
"element_id": "53c5427b05c4256bd7c7e03346e58b9f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -690,7 +753,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "68accd9d0365712f54b96da661cce03d",
|
||||
"element_id": "6e5310473567927ff094c33ba42ff201",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "a139fb30a2382364053eb57aa180550f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -711,7 +795,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "35aa0d02a38ad72c0ca0534155dbdeb8",
|
||||
"element_id": "eb784ba0d48bf9e06b53aed2ac3fbd72",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -738,7 +822,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "ea538f1ebdd2ced67e8c86dcf50bc164",
|
||||
"element_id": "768cfb8a51125da06add3109e7d155b3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "67503783d98953e33cdc2846b90c21fd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -759,7 +864,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "6f4ae84a8d8a1d9005384f35e2ce793c",
|
||||
"element_id": "27194483431e4365b86572cbc73b9af5",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -786,7 +891,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "9616030a71ad0e0654b28e61578d0443",
|
||||
"element_id": "c941c078ee573a2bbca654a7b5ce68f4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
"date_modified": "2023-06-30T17:25:30.898000",
|
||||
"record_locator": {
|
||||
"page_id": "229477",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/229477",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "885ad7169d419802971c64780c7a7968",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -807,7 +933,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "d81cb76df56721595c0495e4f5e6094f",
|
||||
"element_id": "4d12c0c0f2d8211bc2b3eae35ac4f854",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -834,7 +960,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "46c3bd98dbea47cb63923597c929b932",
|
||||
"element_id": "019ded9026166e1794b589358870fe60",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -855,7 +981,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "1558d5e9d97c1cbb5cbb5cb2b077f83d",
|
||||
"element_id": "7cdd15b42c50cc95a64aa83149e72aec",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -882,7 +1008,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "c281ed85f2e1125c9aaf318fd5178d4d",
|
||||
"element_id": "008813f1d7a4380879ff001294f8bc6e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
@ -903,7 +1029,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "4b401fd3bc190fce17f70000e0164772",
|
||||
"element_id": "2a28d14ef4ba44c8f0098df26a520f23",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-06-30T17:25:25.504000",
|
||||
|
@ -315,7 +315,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "93eecf0cb223bb9b38800c595a2c1ce2",
|
||||
"element_id": "33831fbc138ef739d88d4f83b4cfc58d",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "240725efee18f416b470f886d83e54a3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -336,7 +357,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "75ee4a303fc5ab8639c7bca973f29e30",
|
||||
"element_id": "a8359a51dc7bc16fc9f2f412dfad01d7",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -357,7 +378,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "22731d9c17747fc4708fd7f418e9dd57",
|
||||
"element_id": "4d2982f8ec1f943ba5887ea5e1c41722",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -378,7 +399,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c4327bb8ec4ea8444a6307fcdf6928cd",
|
||||
"element_id": "1709eac9e1289421c96b86fa773e85ba",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "8e408d997b6afdcc6dc7c5d2f60d51fe",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -399,7 +441,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "aa48062270f019242d68093284c4fa0c",
|
||||
"element_id": "f86b21d5900d7c26053ce0d49624e22b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -420,7 +462,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "4bdb6fa86fd59b0729ecb9b6dbbf1ba7",
|
||||
"element_id": "ad6b52393cba4295aa11d461df801ec9",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -441,7 +483,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "07671349c39424db27fcf99634ed95d2",
|
||||
"element_id": "3fa16ff3939638c6415d5d1367aa01be",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "92cda6e10ddc39a6274a39bd28d78fd6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -462,7 +525,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "bb14e5c4bda33439f627d9d0484b603c",
|
||||
"element_id": "c6bb501cb86fef4a7e6af33b44408860",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -483,7 +546,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "a85a7425fe31f85a4aa6ae0a3d5c4251",
|
||||
"element_id": "1e867147aebd2e2042c0b79216eb8ad6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -504,7 +567,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c250d32242e3900d71e3dc6a4a6ac3c4",
|
||||
"element_id": "72969103d9798a14b6937a5f17e95250",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -525,7 +588,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "2cba66c761cce97def3ee35ad7e841a1",
|
||||
"element_id": "a1f62f9caaa9e0ab38abfecc9992beb6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -552,7 +615,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "bb593264cda1392498158b2ce65053ac",
|
||||
"element_id": "d6507473bd42ae2c5043ef9682f5b71f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -579,7 +642,7 @@
|
||||
"type": "EmailAddress"
|
||||
},
|
||||
{
|
||||
"element_id": "ad4aa408f6abd52bd1e2adf149fed96d",
|
||||
"element_id": "d68042b1765da182a599d7f147d2abef",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -600,7 +663,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "f98b3b59b55313381052f1cfa1194bc5",
|
||||
"element_id": "717b067188e80741597eb37455bf4fbe",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -621,7 +684,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "a9f4432dce00417cc8a4c304e424c28b",
|
||||
"element_id": "16455e060585b3e0817764ca31c32151",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -642,7 +705,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "3716407dd9d7c3bc756ab8ee46ea7770",
|
||||
"element_id": "f773ae2bc874cb28cff580d0b63a627a",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -663,7 +726,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "6fda3a7478f59f5290ac529d13bbceaf",
|
||||
"element_id": "8a7363b7d1eb2cb37430121d27168de0",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -690,7 +753,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "f4186b4e1cec5ef7009560d11cb74087",
|
||||
"element_id": "030568cacd3b66ce8ee6c6c3c9be840f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "fd9d745f22dffbb155b2e8022e2dc2e4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -711,7 +795,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "a27a3099dea44c05dfea1e0e125abac5",
|
||||
"element_id": "71d0ef13e2b308bf6c79c3153f3ed35f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -738,7 +822,28 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "44d083c5ce62947d874c568db0dbc01b",
|
||||
"element_id": "7e882f807cf95f54e80ea3d7b75f6edd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "16fda0efe288d0c8d1cf18b1037b5b0e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -759,7 +864,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "23d9d3b7eb1b506a1031e99b28243136",
|
||||
"element_id": "8bf5be7f0d4a4b5248347885f68f6b89",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -786,7 +891,28 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "5d12aca2ca2b8aba5c9dee48f1475f55",
|
||||
"element_id": "56b696bc7b11d0f3e1165cb157426dcc",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
"date_modified": "2023-07-13T14:13:27.275000",
|
||||
"record_locator": {
|
||||
"page_id": "1605859",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605859",
|
||||
"version": "2"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "c6fe156426f03a42912623025777f8c8",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -807,7 +933,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "70182a5acbdac0041ee51b85dfca692f",
|
||||
"element_id": "d8f7425068e3b4e6e99affa00d268060",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -834,7 +960,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "243fc77b8eebdbcf00a6a108a8159b69",
|
||||
"element_id": "e4589df20d851e29530dbf5f97444eca",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -855,7 +981,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "3014e5236eb14590a7c13e83c36b20ce",
|
||||
"element_id": "37a3e4a1755417a6944ff64115257147",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -882,7 +1008,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "800f984e0d3456624dce9630abfd873a",
|
||||
"element_id": "e26ff7fd8e8e12c8aa704e6f97275fbf",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
@ -903,7 +1029,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "800885acdda14ccb63621293f9a3aa2f",
|
||||
"element_id": "18220fb2182492f64b3504513de4fbef",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:54:40.304000",
|
||||
|
@ -308,7 +308,29 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "d0b45e375f3a7207caacb7be289ebd62",
|
||||
"element_id": "f8085d2948c73dfb968f7b221f3e8fab",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
"date_modified": "2023-07-09T12:57:59.173000",
|
||||
"record_locator": {
|
||||
"page_id": "1605989",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605989",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "48d494bb12fd182b0106bff99dd2e3be",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -330,7 +352,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "351fc6ff4a9a491bf863ed7aa20fd5c5",
|
||||
"element_id": "3f1b3ecb6515a47b94579cf7de892f09",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -352,7 +374,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "6688bffe9c19dca7cb61ee039a6ffa10",
|
||||
"element_id": "171423f703a966d2616837ed489f6975",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -374,7 +396,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "e50d0b83f51c65bda0620ccec0368a41",
|
||||
"element_id": "87daeeb71306ae76a90c0e6ccac0dd47",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -396,7 +418,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "91697f192743d0583d02cb3e232d3c83",
|
||||
"element_id": "c4d15cc61c5d6a3f2350f758b82e487f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -418,7 +440,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "7e1f204c284d5e878639feca87a022c4",
|
||||
"element_id": "e069a6333ef83f6f250880a500439da3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -440,7 +462,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "24fc2762132dbbf33824a2c8575f6c14",
|
||||
"element_id": "2f030590e85c72dd4a2fc739cc05affe",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -462,7 +484,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c8d91bf0f74cf2d7474b81fa319cc0e5",
|
||||
"element_id": "2db738cf60bf0471df90b6141fc6a8e5",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -490,7 +512,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "ce274d7699a4270e902e3617c7cf6e36",
|
||||
"element_id": "ab3005ca05b4f48396361646916154b4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -512,7 +534,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "b54b171c49bbdb6f51308ff765b7f121",
|
||||
"element_id": "ac70c0a823f0a1d56777036e77e77fd9",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -534,7 +556,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "712a4752864712c0ec58730edb76b2f1",
|
||||
"element_id": "099fca1cec6f3eaa5f71ed9c2ed235e4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -556,7 +578,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "f2e9daed509db420ecf36984f431900f",
|
||||
"element_id": "85d4a299ad3ee61201530bf0030808b1",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -578,7 +600,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "3cb5f4888419631affdc50af8f020348",
|
||||
"element_id": "a1a4f27d3b3cc32777e25b3bb0766083",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -606,7 +628,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "9f5d86ea05eeb0bf570d9141a5b8994b",
|
||||
"element_id": "976749a5d532d1f18195d61fe8c04be3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -634,7 +656,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "64ba474681b32c7dbc2a00fb9ec3e757",
|
||||
"element_id": "079d83c4a7622c70baab0336e3128ec4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -656,7 +678,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "2d4a5727cd260bb321af0f777e2e699f",
|
||||
"element_id": "68e58e6fec19f4ec291fd5bcca3dadd8",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -678,7 +700,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "0a5dc104636145b04136d2eee7c4469b",
|
||||
"element_id": "8dbaefc9dcf7af80f14871cdd2b0c1d6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -700,7 +722,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "470c624d2702678b94685d916908136a",
|
||||
"element_id": "2ca9400f81b794c7d595f9bcd719b18a",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -722,7 +744,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "bd8ca6285c9717384bd456685550304b",
|
||||
"element_id": "191672694e36e804e20214e8c9bd2d44",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -744,7 +766,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "99613e61c98f3e61b28c98d096524077",
|
||||
"element_id": "10d98c7ebc4b2cd6e5508aa9563cc788",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
@ -765,5 +787,27 @@
|
||||
},
|
||||
"text": "Testdoc3 Table: Column 1 Row 0 Testdoc3 Table: Column 2 Row 0 Testdoc3 Table: Column 3 Row 0 Testdoc3 Table: Column 1 Row 1 Testdoc3 Table: Column 2 Row 1 Testdoc3 Table: Column 3 Row 1 Testdoc3 Table: Column 1 Row 2 Testdoc3 Table: Column 2 Row 2 Testdoc3 Table: Column 3 Row 2",
|
||||
"type": "Table"
|
||||
},
|
||||
{
|
||||
"element_id": "45b142b9287e8ebadbc1dfb48ee5245a",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-09T12:56:40.842000",
|
||||
"date_modified": "2023-07-09T12:57:59.173000",
|
||||
"record_locator": {
|
||||
"page_id": "1605989",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1605989",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
}
|
||||
]
|
@ -308,7 +308,29 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "a931e049fc3bd99cf74ef09502a71938",
|
||||
"element_id": "61525bb0c406b033be2849cb375e9fcd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
"date_modified": "2023-07-11T17:01:47.340000",
|
||||
"record_locator": {
|
||||
"page_id": "1802252",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "7e8cf6622ad36f28966178e194feaad8",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -330,7 +352,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "59e566b7776eba69071658b586226bd0",
|
||||
"element_id": "eeb9717b5c634b6bb4b4bb1b83101500",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -352,7 +374,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "971bd18c2de3ef14a26ba1d8e4ef8668",
|
||||
"element_id": "c5bc1b1b90fd002ca136bc3d76e3d482",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -374,7 +396,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c688b4f7d2e49c8d8d7c77d28ddf5ecc",
|
||||
"element_id": "9ecb8d523cdd75be94294fa2ca440799",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -396,7 +418,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "9ce074ac38046f414a5f16cd9c7308b3",
|
||||
"element_id": "4909ea47e524d3b8fbac470c0663f589",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -418,7 +440,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "a4fbf964d1efe50e1c1ee181b453d4d6",
|
||||
"element_id": "ab916d1a2aa844a463b48a24637a8b2d",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -440,7 +462,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "b7c108f30be7dfb550213536c197e563",
|
||||
"element_id": "a9060bb59dfa0b37c1ec13e97228470a",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -462,7 +484,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "9e78d28a1e5c130197f6a909ec74c987",
|
||||
"element_id": "0dbff111f50ea5a58eee83c85a1c30b5",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -490,7 +512,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "d8ae65b075a2f46c394461d4e393f0d5",
|
||||
"element_id": "ed95477f3bff4586983201b6387c875b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -512,7 +534,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "7f3784563903fdf80ca26e027ca7376d",
|
||||
"element_id": "260cee6fb6f1562e7b1cb0f7644ea64d",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -534,7 +556,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "81f723fb10893947353084829f8b5f68",
|
||||
"element_id": "6820bf233e6604b7bd6750d0d2b62192",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -556,7 +578,7 @@
|
||||
"type": "ListItem"
|
||||
},
|
||||
{
|
||||
"element_id": "a28747bf65c9c6ad4981e57ec35822a3",
|
||||
"element_id": "8f13efbe5d5b289c6ce8eb00e2b5fae2",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -578,7 +600,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "ec5dbc92af9cfee5f32dba0e9919b1f7",
|
||||
"element_id": "ecc13666f56ab3fb01917335016cd9c3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -606,7 +628,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "f09110aa418d33cbaccc7b380e0fe0c6",
|
||||
"element_id": "93ec1210a0bf6e8b0c6c8504648e7489",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -634,7 +656,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "fa11e4585afb53a4d046e095f08ac084",
|
||||
"element_id": "f3c0e02138cb55302a075d5508843876",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -656,7 +678,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "2f06add07bf5f930085d334e1d1fdb6c",
|
||||
"element_id": "5bc45f2fc513158f644f2c217cc9e54d",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -678,7 +700,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "0c493dc4e25a1447702be5bd7d8a156f",
|
||||
"element_id": "81bebaf32dff5511a7856e553b526fa3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -700,7 +722,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "dcf629a3cf73037815d0b85bf9878bd5",
|
||||
"element_id": "833cc590b6815b09fd40d1d73a752420",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -722,7 +744,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "31d39e1ce259ec5bc37463b03c993697",
|
||||
"element_id": "bff43fdce7563ccbd5cf3354090e8cf3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -744,7 +766,7 @@
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "8083af07d9148f975b439cdb91a216cf",
|
||||
"element_id": "69b2cf7ade2f1034892b2b38b186fdaa",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
@ -765,5 +787,27 @@
|
||||
},
|
||||
"text": "Testdoc2 Table: Column 1 Row 0 Testdoc2 Table: Column 2 Row 0 Testdoc2 Table: Column 3 Row 0 Testdoc2 Table: Column 1 Row 1 Testdoc2 Table: Column 2 Row 1 Testdoc2 Table: Column 3 Row 1 Testdoc2 Table: Column 1 Row 2 Testdoc2 Table: Column 2 Row 2 Testdoc2 Table: Column 3 Row 2",
|
||||
"type": "Table"
|
||||
},
|
||||
{
|
||||
"element_id": "825a8cbb41eecc2f1b29d4b34cb05c2f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-07-11T17:01:39.240000",
|
||||
"date_modified": "2023-07-11T17:01:47.340000",
|
||||
"record_locator": {
|
||||
"page_id": "1802252",
|
||||
"url": "https://unstructured-ingest-test.atlassian.net"
|
||||
},
|
||||
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
||||
"version": "1"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
}
|
||||
]
|
@ -538,7 +538,22 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "c087a92c7251ca836ff023d35cb0a1aa",
|
||||
"element_id": "387c4d334f8e9650a56b3b444b2ad5f6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
"date_modified": "2023-08-17T18:48:00.000Z"
|
||||
},
|
||||
"filetype": "text/html",
|
||||
"languages": [
|
||||
"eng"
|
||||
]
|
||||
},
|
||||
"text": "",
|
||||
"type": "Image"
|
||||
},
|
||||
{
|
||||
"element_id": "60d9f47b086264ea72277b741e3b2bdd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
@ -553,7 +568,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "3126a68fa0a12481ca6dc64c16511a7e",
|
||||
"element_id": "b39f61345657ccc5e201c20a6a90fad7",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
@ -568,7 +583,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "8cfa5b216c8d3f774f8e1def029681e6",
|
||||
"element_id": "b95452fe8c6616a1ce1311457526c302",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
@ -583,7 +598,7 @@
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "b538abdbf0aff3f9f1ab11d79bb5bc26",
|
||||
"element_id": "a7c3ee9360b2020e28aa31835ef5283c",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
@ -598,7 +613,7 @@
|
||||
"type": "NarrativeText"
|
||||
},
|
||||
{
|
||||
"element_id": "570c50d8758c5639a1dfd0f238f609d5",
|
||||
"element_id": "349f058fcce7e32bb68b620841f40c9e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-04T18:31:00.000Z",
|
||||
|
@ -26,5 +26,33 @@
|
||||
"date_modified": "1692628456.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Image",
|
||||
"element_id": "f714fa214dac2f441515c4f28370d279",
|
||||
"text": "",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "message/rfc822",
|
||||
"email_message_id": "KhIK4000000000000000000000000000000000000000000000RZP1T400CmuP1P5wTm2m679gi-mnIg@sfdc.net",
|
||||
"sent_from": [
|
||||
"devops+salesforce-connector@unstructured.io"
|
||||
],
|
||||
"sent_to": [
|
||||
"jane_gray@uoa.edu"
|
||||
],
|
||||
"subject": "Test of email 1",
|
||||
"data_source": {
|
||||
"url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU",
|
||||
"version": "1694691603.0",
|
||||
"record_locator": {
|
||||
"id": "02sHu00001efErPIAU"
|
||||
},
|
||||
"date_created": "1692542056.0",
|
||||
"date_modified": "1692628456.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -26,5 +26,33 @@
|
||||
"date_modified": "1692542155.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Image",
|
||||
"element_id": "68870d055535f48c7439ce67092768f6",
|
||||
"text": "",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "message/rfc822",
|
||||
"email_message_id": "CuWky000000000000000000000000000000000000000000000RZP1VO00MaLK8OmEQm2Bw-c3ek6uNg@sfdc.net",
|
||||
"sent_from": [
|
||||
"devops+salesforce-connector@unstructured.io"
|
||||
],
|
||||
"sent_to": [
|
||||
"sean@edge.com"
|
||||
],
|
||||
"subject": "Test of Salesforce 2",
|
||||
"data_source": {
|
||||
"url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
|
||||
"version": "1694691603.0",
|
||||
"record_locator": {
|
||||
"id": "02sHu00001efErQIAU"
|
||||
},
|
||||
"date_created": "1692542149.0",
|
||||
"date_modified": "1692542155.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -1 +1 @@
|
||||
__version__ = "0.16.25" # pragma: no cover
|
||||
__version__ = "0.16.26-dev1" # pragma: no cover
|
||||
|
@ -75,6 +75,7 @@ Other background
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections import defaultdict, deque
|
||||
from types import MappingProxyType
|
||||
from typing import Any, Iterable, Iterator, Mapping, NamedTuple, Sequence, cast
|
||||
@ -89,6 +90,7 @@ from unstructured.documents.elements import (
|
||||
Element,
|
||||
ElementMetadata,
|
||||
EmailAddress,
|
||||
Image,
|
||||
ListItem,
|
||||
NarrativeText,
|
||||
Table,
|
||||
@ -477,6 +479,34 @@ class Pre(BlockItem):
|
||||
return _PreElementAccumulator(self)
|
||||
|
||||
|
||||
class ImageBlock(Flow):
|
||||
"""Custom element-class for `<img>` elements."""
|
||||
|
||||
BASE64_IMAGE_REGEX = re.compile(r"^data:(image/[^;]+);base64,(.*)")
|
||||
|
||||
def iter_elements(self) -> Iterator[Element]:
|
||||
"""Generate an Image element based on `src`, `data-src`, and `alt`."""
|
||||
img_src = self.get("data-src", "").strip() or self.get("src", "").strip()
|
||||
img_alt = self.get("alt", "").strip()
|
||||
|
||||
if not img_src: # Early exit if no image source
|
||||
return
|
||||
|
||||
mime_match = self.BASE64_IMAGE_REGEX.match(img_src)
|
||||
img_mime_type = mime_match.group(1) if mime_match else None
|
||||
img_base64 = mime_match.group(2) if mime_match else None
|
||||
img_url = None if img_base64 else img_src
|
||||
|
||||
yield Image(
|
||||
text=img_alt,
|
||||
metadata=ElementMetadata(
|
||||
image_mime_type=img_mime_type,
|
||||
image_base64=img_base64,
|
||||
url=img_url,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TableBlock(Flow):
|
||||
"""Custom element-class for `<table>` element."""
|
||||
|
||||
@ -928,6 +958,8 @@ element_class_lookup.get_namespace(None).update(
|
||||
"ol": ListBlock,
|
||||
"ul": ListBlock,
|
||||
"li": ListItemBlock,
|
||||
# -- image --
|
||||
"img": ImageBlock,
|
||||
# -- table --
|
||||
"table": TableBlock,
|
||||
# -- annotated phrasing --
|
||||
|
@ -10,7 +10,7 @@ import requests
|
||||
from lxml import etree
|
||||
|
||||
from unstructured.chunking import add_chunking_strategy
|
||||
from unstructured.documents.elements import Element
|
||||
from unstructured.documents.elements import Element, ElementType
|
||||
from unstructured.file_utils.encoding import read_txt_file
|
||||
from unstructured.file_utils.model import FileType
|
||||
from unstructured.partition.common.metadata import apply_metadata, get_last_modified_date
|
||||
@ -108,6 +108,8 @@ class HtmlPartitionerOptions:
|
||||
detection_origin: str | None,
|
||||
html_parser_version: Literal["v1", "v2"] = "v1",
|
||||
image_alt_mode: Optional[Literal["to_text"]] = "to_text",
|
||||
extract_image_block_types: Optional[list[str]] = None,
|
||||
extract_image_block_to_payload: bool = False,
|
||||
):
|
||||
self._file_path = file_path
|
||||
self._file = file
|
||||
@ -120,6 +122,8 @@ class HtmlPartitionerOptions:
|
||||
self._detection_origin = detection_origin
|
||||
self._html_parser_version = html_parser_version
|
||||
self._image_alt_mode = image_alt_mode
|
||||
self._extract_image_block_types = extract_image_block_types
|
||||
self._extract_image_block_to_payload = extract_image_block_to_payload
|
||||
|
||||
@lazyproperty
|
||||
def detection_origin(self) -> str | None:
|
||||
@ -183,6 +187,15 @@ class _HtmlPartitioner:
|
||||
def __init__(self, opts: HtmlPartitionerOptions):
|
||||
self._opts = opts
|
||||
|
||||
def _should_include_image_base64(self, element: Element) -> bool:
|
||||
"""Determines if an image_base64 element should be included in the output."""
|
||||
return (
|
||||
element.category == ElementType.IMAGE
|
||||
and self._opts._extract_image_block_to_payload
|
||||
and self._opts._extract_image_block_types is not None
|
||||
and "Image" in self._opts._extract_image_block_types
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def iter_elements(cls, opts: HtmlPartitionerOptions) -> Iterator[Element]:
|
||||
"""Partition HTML document provided by `opts` into document-elements."""
|
||||
@ -202,6 +215,10 @@ class _HtmlPartitioner:
|
||||
for e in elements_iter:
|
||||
e.metadata.last_modified = self._opts.last_modified
|
||||
e.metadata.detection_origin = self._opts.detection_origin
|
||||
|
||||
# -- remove <image_base64> if not requested --
|
||||
if not self._should_include_image_base64(e):
|
||||
e.metadata.image_base64 = None
|
||||
yield e
|
||||
|
||||
@lazyproperty
|
||||
@ -224,7 +241,7 @@ class _HtmlPartitioner:
|
||||
# -- remove a variety of HTML element types like <script> and <style> that we prefer not
|
||||
# -- to encounter while parsing.
|
||||
etree.strip_elements(
|
||||
root, ["del", "img", "link", "meta", "noscript", "script", "style"], with_tail=False
|
||||
root, ["del", "link", "meta", "noscript", "script", "style"], with_tail=False
|
||||
)
|
||||
|
||||
# -- remove <header> and <footer> tags if the caller doesn't want their contents --
|
||||
|
Loading…
x
Reference in New Issue
Block a user