mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-24 13:44:05 +00:00
chore: bump inference to 0.6.6 (#1563)
- bump `unstructured-inference` to `0.6.6` - specify default model name for element detection to be `detectron2_onnx` to keep current behavior - NOTE: the updated inference package by default would use yolox as element detection model; this will be evaluated and enabled in a separated PR --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: badGarnet <badGarnet@users.noreply.github.com>
This commit is contained in:
parent
af7639e23f
commit
ad59a879cc
11
CHANGELOG.md
11
CHANGELOG.md
@ -1,4 +1,13 @@
|
||||
## 0.10.19-dev0
|
||||
## 0.10.19-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
* **bump `unstructured-inference` to `0.6.6`** The updated version of `unstructured-inference` makes table extraction in `hi_res` mode configurable to fine tune table extraction performance; it also improves element detection by adding a deduplication post processing step in the `hi_res` partitioning of pdfs and images.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
## 0.10.18
|
||||
|
||||
|
||||
@ -39,5 +39,8 @@ matplotlib==3.7.2
|
||||
# NOTE(crag) - pin to available pandas for python 3.8 (at least in CI)
|
||||
fsspec==2023.9.1
|
||||
pandas<2.0.4
|
||||
# langchain limits this to 3.1.7
|
||||
anyio==3.1.7
|
||||
# langchain limits anyio to below 4.0
|
||||
anyio<4.0
|
||||
# pinned in unstructured paddleocr
|
||||
opencv-python==4.8.0.76
|
||||
opencv-contrib-python==4.8.0.76
|
||||
|
||||
@ -4,8 +4,10 @@
|
||||
#
|
||||
# pip-compile requirements/dev.in
|
||||
#
|
||||
anyio==4.0.0
|
||||
# via jupyter-server
|
||||
anyio==3.7.1
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# jupyter-server
|
||||
appnope==0.1.3
|
||||
# via
|
||||
# ipykernel
|
||||
@ -42,7 +44,7 @@ certifi==2023.7.22
|
||||
# -c requirements/constraints.in
|
||||
# -c requirements/test.txt
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via argon2-cffi-bindings
|
||||
cfgv==3.4.0
|
||||
# via pre-commit
|
||||
@ -151,7 +153,7 @@ jupyter-client==8.3.1
|
||||
# qtconsole
|
||||
jupyter-console==6.6.3
|
||||
# via jupyter
|
||||
jupyter-core==5.3.1
|
||||
jupyter-core==5.3.2
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# ipykernel
|
||||
@ -393,7 +395,7 @@ urllib3==1.26.16
|
||||
# requests
|
||||
virtualenv==20.24.5
|
||||
# via pre-commit
|
||||
wcwidth==0.2.6
|
||||
wcwidth==0.2.7
|
||||
# via prompt-toolkit
|
||||
webcolors==1.13
|
||||
# via jsonschema
|
||||
|
||||
@ -33,7 +33,7 @@ cssselect==1.2.0
|
||||
# via premailer
|
||||
cssutils==2.7.1
|
||||
# via premailer
|
||||
cycler==0.11.0
|
||||
cycler==0.12.0
|
||||
# via matplotlib
|
||||
cython==3.0.2
|
||||
# via unstructured-paddleocr
|
||||
@ -112,9 +112,12 @@ numpy==1.24.4
|
||||
# unstructured-paddleocr
|
||||
# visualdl
|
||||
opencv-contrib-python==4.8.0.76
|
||||
# via unstructured-paddleocr
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# unstructured-paddleocr
|
||||
opencv-python==4.8.0.76
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# imgaug
|
||||
# unstructured-paddleocr
|
||||
openpyxl==3.1.2
|
||||
|
||||
@ -5,7 +5,7 @@ pdf2image
|
||||
pdfminer.six
|
||||
# Do not move to contsraints.in, otherwise unstructured-inference will not be upgraded
|
||||
# when unstructured library is.
|
||||
unstructured-inference==0.5.31
|
||||
unstructured-inference==0.6.6
|
||||
# unstructured fork of pytesseract that provides an interface to allow for multiple output formats
|
||||
# from one tesseract call
|
||||
unstructured.pytesseract>=0.3.12
|
||||
|
||||
@ -11,7 +11,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
@ -24,7 +24,7 @@ contourpy==1.1.1
|
||||
# via matplotlib
|
||||
cryptography==41.0.4
|
||||
# via pdfminer-six
|
||||
cycler==0.11.0
|
||||
cycler==0.12.0
|
||||
# via matplotlib
|
||||
effdet==0.4.1
|
||||
# via layoutparser
|
||||
@ -95,6 +95,7 @@ onnxruntime==1.16.0
|
||||
# via unstructured-inference
|
||||
opencv-python==4.8.0.76
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# layoutparser
|
||||
# unstructured-inference
|
||||
packaging==23.1
|
||||
@ -213,7 +214,7 @@ tqdm==4.66.1
|
||||
# huggingface-hub
|
||||
# iopath
|
||||
# transformers
|
||||
transformers==4.33.2
|
||||
transformers==4.33.3
|
||||
# via unstructured-inference
|
||||
typing-extensions==4.8.0
|
||||
# via
|
||||
@ -224,7 +225,7 @@ typing-extensions==4.8.0
|
||||
# torch
|
||||
tzdata==2023.3
|
||||
# via pandas
|
||||
unstructured-inference==0.5.31
|
||||
unstructured-inference==0.6.6
|
||||
# via -r requirements/extra-pdf-image.in
|
||||
unstructured-pytesseract==0.3.12
|
||||
# via
|
||||
|
||||
@ -97,7 +97,7 @@ tqdm==4.66.1
|
||||
# huggingface-hub
|
||||
# sacremoses
|
||||
# transformers
|
||||
transformers==4.33.2
|
||||
transformers==4.33.3
|
||||
# via -r requirements/huggingface.in
|
||||
typing-extensions==4.8.0
|
||||
# via
|
||||
|
||||
@ -21,7 +21,7 @@ inflection==0.5.1
|
||||
# via pyairtable
|
||||
pyairtable==2.1.0.post1
|
||||
# via -r requirements/ingest-airtable.in
|
||||
pydantic==1.10.12
|
||||
pydantic==1.10.13
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# pyairtable
|
||||
|
||||
@ -30,7 +30,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via
|
||||
# azure-datalake-store
|
||||
# cryptography
|
||||
|
||||
@ -15,7 +15,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
|
||||
@ -47,7 +47,7 @@ google-api-core==2.12.0
|
||||
# via
|
||||
# google-cloud-core
|
||||
# google-cloud-storage
|
||||
google-auth==2.23.0
|
||||
google-auth==2.23.2
|
||||
# via
|
||||
# gcsfs
|
||||
# google-api-core
|
||||
@ -107,7 +107,6 @@ urllib3==1.26.16
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# google-auth
|
||||
# requests
|
||||
yarl==1.9.2
|
||||
# via aiohttp
|
||||
|
||||
@ -9,7 +9,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via
|
||||
# cryptography
|
||||
# pynacl
|
||||
|
||||
@ -19,7 +19,7 @@ google-api-core==2.12.0
|
||||
# via google-api-python-client
|
||||
google-api-python-client==2.101.0
|
||||
# via -r requirements/ingest-google-drive.in
|
||||
google-auth==2.23.0
|
||||
google-auth==2.23.2
|
||||
# via
|
||||
# google-api-core
|
||||
# google-api-python-client
|
||||
@ -63,5 +63,4 @@ urllib3==1.26.16
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# google-auth
|
||||
# requests
|
||||
|
||||
@ -4,33 +4,35 @@
|
||||
#
|
||||
# pip-compile requirements/ingest-notion.in
|
||||
#
|
||||
anyio==3.7.1
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# httpcore
|
||||
certifi==2023.7.22
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# httpcore
|
||||
# httpx
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# httpx
|
||||
h11==0.12.0
|
||||
exceptiongroup==1.1.3
|
||||
# via anyio
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
htmlbuilder==1.0.0
|
||||
# via -r requirements/ingest-notion.in
|
||||
httpcore==0.13.3
|
||||
httpcore==0.18.0
|
||||
# via httpx
|
||||
httpx==0.20.0
|
||||
httpx==0.25.0
|
||||
# via notion-client
|
||||
idna==3.4
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# anyio
|
||||
# httpx
|
||||
# rfc3986
|
||||
notion-client==2.0.0
|
||||
# via -r requirements/ingest-notion.in
|
||||
rfc3986[idna2008]==1.5.0
|
||||
# via httpx
|
||||
sniffio==1.3.0
|
||||
# via
|
||||
# anyio
|
||||
# httpcore
|
||||
# httpx
|
||||
|
||||
@ -15,7 +15,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
|
||||
@ -10,6 +10,10 @@ aiohttp==3.8.5
|
||||
# openai
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
anyio==3.7.1
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# langchain
|
||||
async-timeout==4.0.3
|
||||
# via
|
||||
# aiohttp
|
||||
@ -30,6 +34,8 @@ dataclasses-json==0.6.1
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# langchain
|
||||
exceptiongroup==1.1.3
|
||||
# via anyio
|
||||
frozenlist==1.4.0
|
||||
# via
|
||||
# aiohttp
|
||||
@ -37,9 +43,14 @@ frozenlist==1.4.0
|
||||
idna==3.4
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# anyio
|
||||
# requests
|
||||
# yarl
|
||||
langchain==0.0.298
|
||||
jsonpatch==1.33
|
||||
# via langchain
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
langchain==0.0.304
|
||||
# via -r requirements/ingest-openai.in
|
||||
langsmith==0.0.41
|
||||
# via langchain
|
||||
@ -69,7 +80,7 @@ packaging==23.1
|
||||
# via
|
||||
# -c requirements/base.txt
|
||||
# marshmallow
|
||||
pydantic==1.10.12
|
||||
pydantic==1.10.13
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# langchain
|
||||
@ -87,6 +98,8 @@ requests==2.31.0
|
||||
# langsmith
|
||||
# openai
|
||||
# tiktoken
|
||||
sniffio==1.3.0
|
||||
# via anyio
|
||||
sqlalchemy==2.0.21
|
||||
# via langchain
|
||||
tenacity==8.2.3
|
||||
|
||||
@ -9,7 +9,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
|
||||
@ -11,7 +11,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
|
||||
@ -9,7 +9,7 @@ certifi==2023.7.22
|
||||
# -c requirements/base.txt
|
||||
# -c requirements/constraints.in
|
||||
# requests
|
||||
cffi==1.15.1
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.2.0
|
||||
# via
|
||||
|
||||
@ -74,7 +74,7 @@ pluggy==1.3.0
|
||||
# via pytest
|
||||
pycodestyle==2.11.0
|
||||
# via flake8
|
||||
pydantic==1.10.12
|
||||
pydantic==1.10.13
|
||||
# via
|
||||
# -c requirements/constraints.in
|
||||
# -r requirements/test.in
|
||||
@ -113,7 +113,7 @@ types-click==7.1.8
|
||||
# via -r requirements/test.in
|
||||
types-markdown==3.4.2.10
|
||||
# via -r requirements/test.in
|
||||
types-requests==2.31.0.5
|
||||
types-requests==2.31.0.6
|
||||
# via -r requirements/test.in
|
||||
types-tabulate==0.9.0.3
|
||||
# via -r requirements/test.in
|
||||
|
||||
@ -9,7 +9,7 @@ docker run -d --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -
|
||||
echo "Waiting for Elasticsearch container to start..."
|
||||
sleep 1
|
||||
|
||||
url="http://localhost:9200/_cluster/health"
|
||||
url="http://localhost:9200/_cluster/health?wait_for_status=green&timeout=50s"
|
||||
status_code=0
|
||||
retry_count=0
|
||||
max_retries=6
|
||||
|
||||
@ -440,7 +440,7 @@ def test_partition_image_formats_languages_for_tesseract():
|
||||
ocr_languages="jpn_vert",
|
||||
ocr_mode="entire_page",
|
||||
extract_tables=False,
|
||||
model_name=None,
|
||||
model_name="detectron2_onnx",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -407,7 +407,7 @@ def test_partition_pdf_with_dpi():
|
||||
ocr_languages="eng",
|
||||
ocr_mode="entire_page",
|
||||
extract_tables=False,
|
||||
model_name=None,
|
||||
model_name="detectron2_onnx",
|
||||
pdf_image_dpi=100,
|
||||
)
|
||||
|
||||
@ -858,7 +858,7 @@ def test_partition_pdf_formats_languages_for_tesseract():
|
||||
ocr_languages="eng",
|
||||
ocr_mode="entire_page",
|
||||
extract_tables=False,
|
||||
model_name=None,
|
||||
model_name="detectron2_onnx",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -381,7 +381,7 @@ def test_auto_partition_formats_languages_for_tesseract():
|
||||
ocr_languages="chi_sim+chi_sim_vert+chi_tra+chi_tra_vert",
|
||||
ocr_mode="entire_page",
|
||||
extract_tables=False,
|
||||
model_name=None,
|
||||
model_name="detectron2_onnx",
|
||||
)
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -360,25 +360,6 @@
|
||||
},
|
||||
"text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed. Other methods. —Unless the Service has published a regulation or procedure to the contrary, all other changes in accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of mcome attributable to the sale or furnishing of utility services no later than the year in which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these changes."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "7685df2334a5f6c8c8099dea61a8f1b4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "abfs://container1/IRS-form-1987.png",
|
||||
"version": 328871203465633719836776597535876541325,
|
||||
"record_locator": {
|
||||
"protocol": "abfs",
|
||||
"remote_file_path": "container1/IRS-form-1987.png"
|
||||
},
|
||||
"date_created": "2023-03-10T09:44:55+00:00",
|
||||
"date_modified": "2023-03-10T09:44:55+00:00"
|
||||
},
|
||||
"filetype": "image/png",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5756fb398995bb6518a87637f24f426e",
|
||||
|
||||
@ -30,8 +30,8 @@
|
||||
"text": "Data in Brief"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9234133787d0a6b3976b16569c0b5cf3",
|
||||
"type": "Title",
|
||||
"element_id": "0ca3f075fdccf9232449ff461b63ceb9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
@ -161,63 +161,23 @@
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "b877cc5d670d770084dcc0bb41ac73a0",
|
||||
"element_id": "ac89a2886224c42ad15982cd34421ff8",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Subject area More specific subject area Type of data"
|
||||
"text": "Subject area More specific subject area Surface science and engineering Type of data"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "b27e559f6c00d2bde61efba5db252e31",
|
||||
"type": "NarrativeText",
|
||||
"element_id": "0a789b33a0101a46f5a01d22d9a6ce2b",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Materials engineering"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "a2c3879ecb580742973c6a914fb905bb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Surface science and engineering"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "1064dcef42380cfdb90c668aa3a670a3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Table and figure"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "e4359c72057b318ddf5a64f9b97539c4",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "n Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "e102dc7c1db28c29d5e4bde8062592ed",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "E-mail address: tayo.sanni@yahoo.com (O. Sanni)."
|
||||
"text": "* Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za E-mail address: tayo.sanni@yahoo.com (O. Sanni)."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -310,44 +270,14 @@
|
||||
"text": "Value of the data"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "682e6210329b84f8b00548088196ffc9",
|
||||
"type": "ListItem",
|
||||
"element_id": "7def44ffc91f3f064b85dc04b23767ec",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1d61e3468bc681ba1a7e647000c6828c",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "39b6040280a179e1f8e4f4fb5ec4ae05",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "1ddde62c3188f81dfc835b6f036f1734",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "nature of inhibition of metals."
|
||||
"text": "© Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment. © The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments. © The data can be used to examine the relationship between the process variable as it affect the nature of inhibition of metals."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
@ -529,25 +459,15 @@
|
||||
},
|
||||
"text": "Exposure Time (Hours)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "25db7b1d2f5780559e1034d72bcb4050",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "Fig. 1. Weight loss versus exposure time for stainless steel presence of ES."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "cbd563dd2fcd7d0b5a0b2173465fd328",
|
||||
"element_id": "45cd54c64e38abe8c1128a5979ca8cd5",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "immersed in 0.5 M H2SO4 solution in the absence and"
|
||||
"text": "Fig. 1. Weight loss versus exposure time for stainless steel immersed in 0.5M H2SO, solution in the absence and presence of ES."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1080,14 +1000,14 @@
|
||||
"text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "27b45633a0f31b9e01d179d70d7dc282",
|
||||
"type": "Image",
|
||||
"element_id": "b5ee6af3d776b0bbd2e581a3ab2ab2e1",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "5 1 os = — 10; =o ° © —\" 205 i —~é é —ip a5 — Control -2 — & 2.5 T T T 0.0000001 + —-0.00001 0.001 O14 Current Density (A/cm2)"
|
||||
"text": "Potential (Vv)nm°in°}aryT T T0.00001 0.001 olCurrent Density (A/cm2)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -1450,34 +1370,34 @@
|
||||
"text": "455"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "273fb301b173075f79b2cbdab962e2ff",
|
||||
"type": "Image",
|
||||
"element_id": "caa364fead90039aae1f13d64dcb8b37",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "SEM HV: Q0KY WD: 14.89 rmrm ‘9EM MAO: 209 x Det: DOE Pectomsence In nanospact"
|
||||
"text": "SEM HV: Q0KY WD: 14.89 rmrm‘DEM MAO: 209 x ‘Dor Pecforsence In nenospact"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "520d1da08c86ce165cd2843e2dc27f98",
|
||||
"type": "Image",
|
||||
"element_id": "a0463ca888a6f2c8c3ba40ba47be0f2f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "SEMHV: 20.0KV WD: 15.54 mm EM ING: ACO x Dei: OSE"
|
||||
"text": "gEOOwaeSemny. z00RV | WD: 1424 renn rtirint VEoa3 Tescan20 yin Fertormaros in nancepace|"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "d04d110c16a4ebc184fa130f09b8d423",
|
||||
"type": "Image",
|
||||
"element_id": "88301d6b47b17df03b78789b9890a6f1",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "Sem ny. 200 Rv"
|
||||
"text": "°@¢Naafe«MgsSEM HY: 20.0KV 7 ETOP LU ULL UL OCT 0BEM IAAG: 400 x a"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1530,7 +1450,7 @@
|
||||
"text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"type": "Title",
|
||||
"element_id": "a80826543c9e0d0e9f6c2108ae3c3f73",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
@ -1560,17 +1480,7 @@
|
||||
"text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [3–5]. The structural formula of egg shell powder is shown in Fig. 9."
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "060e14f01e484ba252e902cd5c6f94f9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 6
|
||||
},
|
||||
"text": "ou H,;COCHNY OH"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1dc2692eee9b01e9a960f80c4dabe07b",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
@ -1890,74 +1800,14 @@
|
||||
"text": "References"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d844a31ead19b2e2fae786d2a5495072",
|
||||
"type": "ListItem",
|
||||
"element_id": "86174db2f99ff948055caeda83334bb7",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "[1] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d0be94eaaf9c0f43bc51381f031e1381",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "using eco-friendly waste product, Results Phys. 9 (2018) 225–230."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "7e9cfcc1c32c353e319aae7d9be537bd",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "c00e8be0806aa2ded72da0ef746a4291",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1–15."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1d76a4bb6ba7984cea4548ab574beb8f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "ffd9e4babdf76600a881851ebbf35d3f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "corrosion in chloride solution, Def. Technol. 14 (2018) 463–468."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "dd7f4838500dd709556225fa3f6b7339",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 7
|
||||
},
|
||||
"text": "[4] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1–17. https://doi.org/10.1007/ s13632-018-0495-5."
|
||||
"text": "[1] 0. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results Phys. 9 (2018) 225-230. [2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1-15. [3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel corrosion in chloride solution, Def. Technol. 14 (2018) 463-468. [4] O. Sanni, A.P.I. Popoola, 0.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1-17. https://doi.org/10.1007/ $13632-018-0495-5, [5] O. Sanni, A-P.I. Popoola, O.S.1. Fayomi, Inhibition of engineering material in sulphuric acid solution using waste product, Contributed Papers from Materials Science and Technology (MS&T18), 2018. (lnttps://doi.org/10.7449/2018/MST_2018_254 261)."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
|
||||
@ -30,8 +30,8 @@
|
||||
"text": "Data in Brief"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9234133787d0a6b3976b16569c0b5cf3",
|
||||
"type": "Title",
|
||||
"element_id": "0ca3f075fdccf9232449ff461b63ceb9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
@ -249,6 +249,16 @@
|
||||
},
|
||||
"text": "Value of the data"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "510d0bce379a0d3ba5ff46d536bdb7c5",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "© The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the performance of the algorithms for the MDVSP. © The data provide all the information that is required to model the MDVSP by using the existing mathematical formulations. e All the problem instances are available for use without any restrictions. e The benchmark solutions and solution time for the problem instances are presented in [3] and can be used for the comparison. © The dataset includes a program that can generate similar problem instances of different sizes."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "f2fdefc49840022ffb3a88bd4a3512d0",
|
||||
@ -261,66 +271,6 @@
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "bd7d750cb9f652c80c17a264072b8858",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "performance of the algorithms for the MDVSP."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "7c8bc2811f71480b433eb6fee2a3bb33",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "68d39f7bcfe99749cc221fa901314626",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "mathematical formulations."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "e69dab6e2bc16d11cfd2d80a804d89fb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "24d7f2ed4386a169639b93a5bf03fd79",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "be used for the comparison."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1c1d6b35ac0925a35ea3bb4d018e675f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes."
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "c2b2b778d53cc9a1cb4dc340476bc5aa",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
@ -330,134 +280,24 @@
|
||||
"text": "1. Data"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "41ce7670e476aaf9a595bc28c13dbba0",
|
||||
"type": "ListItem",
|
||||
"element_id": "86e53159056da85c215281a9c68d46b9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "10c22bcf4c768b515be4e94bcafc71bf",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "for"
|
||||
"text": "For each problem instance, the following information is provided: The number of depots (m), The number of trips (n), The number of locations (I), The number of vehicles at each depot, For each tripie 1,2,...,n,a start time, ft}, an end time, ff, a start location, i, and an end location, i, and"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "a18c70d23b71c51ddfe33311232c241c",
|
||||
"element_id": "07732da32c53fed3ffd5342c61ab643b",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "‘RN-8-1500-01.dat’, is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, (m,n), five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "aea66a7c89c6de4d3e3ed6c1ada31104",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "the size,"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "e0feab8a8888b2955af1cc1a2acff883",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "‘ðm; nÞ’,"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "0b113c91aaaf031e5d7b74747e1b4153",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "respectively. For example,"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "6dd3e9101394a1fbacb451c4c9ba03b9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "the problem instance,"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "33d26eae1edf215a9677101c7147d671",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "For each problem instance, the following information is provided: The number of depots mð The number of trips ðnÞ, The number of locations ðlÞ, The number of vehicles at each depot, For each trip i A 1; 2; …; n, a start time, ts"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "c6490fc185478150e7816c45ef8a48d5",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "Þ,"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5a15b4000add06e52b66591cd8cac950",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "i , an end time, te"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "7798ae4daad9264de38e67c98f2bd624",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "i , a start location, ls"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "801a0d00a5b76dbd0f039368ee45eda3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "i , and an end location, le i ,"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "6201111b83a0cb5b0922cb37cc442b9a",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "and"
|
||||
"text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number ‘RN-8–1500-01.dat’, for is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, ðm; nÞ, five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -511,13 +351,13 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "faee1001fc912565a74ea2d69fa0d689",
|
||||
"element_id": "694b9c582265698bf49806b056c64adc",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "travel empty from —¢). Aschedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:"
|
||||
"text": "j , the vehicle must travel empty from le j (cid:3)te i Þ. A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -529,76 +369,6 @@
|
||||
},
|
||||
"text": "A trip j can be covered after trip i by the same vehicle, if ts j"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "3e549e73bba49a63f20841b5821cfda9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "i to ls"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "43dad32a26a446c5a2c74f3f2328b849",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "3feb623147ddb3265b5968ce2efb8f6b",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "Z te"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "5201e1037409ea15055e320409a9f5eb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "i þδ"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "189f40034be7a199f1fa9891668ee3ab",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "j"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "a10959d132f2b0d3723ae6b8b77f86b7",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "a ls"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4137b01e139589b7a1d3b3fc4da031d8",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "must"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "2d6b506bd58a7dd7bbf1c8599ef630c8",
|
||||
@ -629,56 +399,6 @@
|
||||
},
|
||||
"text": "A sufficient number of vehicles are provided to maintain the feasibility of an instance. For each instance size ðm; nÞ, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over five instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "252f10c83610ebca1a059c0bae8255eb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "f"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "928fa0dcad70f173bc989ee5715375c5",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "89507815c6b4a6f31e6d3da7fca6b561",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "(cid:1)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "33a2b57b388470db1cb13defbe73dc18",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "(cid:3)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "cdb4ee2aea69cc6a83331bbe96dc2caa",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "e731dc92fddc0512e142bfb2bed62bbf",
|
||||
@ -689,6 +409,16 @@
|
||||
},
|
||||
"text": "The dataset also includes a program ‘GenerateInstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots ðmÞ, the number of trips ðnÞ, and the number of instances for each size ðm; nÞ."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "92b491d0e108ec13f263b16646ecac65",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots (m), the number of trips, (n), and the number of locations (I), in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, ie{1,...,n}, and provides the start location, the start time, the end location, and the end time of trip i. The next | lines present the travel times between any two locations, i,j e {1, wal}. The dataset also includes a program ‘Generatelnstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots (m), the number of trips (n), and the"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "d8e33a2b60213fb3cebaf5c3a36b0b63",
|
||||
@ -850,34 +580,14 @@
|
||||
"text": "Table 2 Description of file format for each problem instance."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "151e509ce97fe40eecae3822c78adcf5",
|
||||
"type": "NarrativeText",
|
||||
"element_id": "444f48f6d4f0ee6d3a04b7bf76218980",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "Number of lines"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "0d42fdb9458af19413eee0a1227f415c",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "Number of columns in each line"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "526e0087cc3f254d9f86f6c7d8e23d95",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "Description"
|
||||
"text": "Number of Number of columns in Description lines each line"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -919,48 +629,18 @@
|
||||
},
|
||||
"text": "l"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "336074805fc853987abe6f7fe3ad97a6",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "time"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "78f6ff03dfac8dfb7f319de1e369590d",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rg at each depot d. One line for each trip, i= 1,2, ...,n. Each line provides the start location and the end time ¢¢ for the corresponding trip. Each element, 6j, where i,j ¢ 1,2, ...,1, refers to the travel time between location i and location j."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "8ee69286d5f681913dbfdeb60bedc572",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "i , the end location le"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "08238905e7bba7115b7d7d58fef13ec6",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "i , the start"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "f096a8499e50cac1f45ceb8340dace5a",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rd at each depot d. One line for each trip, i ¼ 1; 2; …; n. Each line provides the start location ls time ts i and the end time te i for the corresponding trip. Each element, δij; where i; j A 1; 2; …; l, refers to the travel time between location i and location j."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "764eef872135149aaf95224bab69c844",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
@ -1031,102 +711,12 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "5a1d84f7d74fc4ceeacb634d524cc041",
|
||||
"element_id": "ba0af0b44e7cc27de119a1771c07dfc2",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "bec40b25a277a08de3415e33284fc76d",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "problem, Networks 19 (5) (1989) 531–548."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "19dee0a4e8fd073350e234b4352b8af6",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time–space network based exact optimization model for multi-depot bus scheduling, Eur."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "5f5ca82752a3220998c06ea0c44eb80e",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "J. Oper. Res. 175 (3) (2006) 1616–1627."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "64cd13c78330953bd999d37dacbeaf0e",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "c4f2c64b5f38feaa921647abceebaec8",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457–487."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "16c341408703257ff517dcc76140e2c0",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "aa252076bc877d1ba2b95aa13b73ff72",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "problem, J. Sched. 12 (1) (2009) 17."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "2e00441177bee9377583470218bea299",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "4b1b8c9df00f25e26176a85d84c8c927",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "(1994) 41–52."
|
||||
"text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling problem, Networks 19 (5) (1989) 531-548. [2] N. Kliewer, T. Mellouli, L. Suhl, A time-space network based exact optimization model for multi-depot bus scheduling, Eur. J. Oper. Res. 175 (3) (2006) 1616-1627. [3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457-487. [4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling problem, J. Sched. 12 (1) (2009) 17. [5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1) (1994) 41-52."
|
||||
}
|
||||
]
|
||||
@ -169,16 +169,6 @@
|
||||
},
|
||||
"text": "37], layout detection [38, 22], table detection [26], and scene text detection [4]. A generalized learning-based framework dramatically reduces the need for the manual specification of complicated rules, which is the status quo with traditional methods. DL has the potential to transform DIA pipelines and benefit a broad spectrum of large-scale document digitization projects."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1f0f5df7c23d4f8e8de4de3085abd7d8",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "The library implements simple and intuitive Python APIs without sacrificing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will benefit a variety of end-users, and will lead to advances in applications in both industry and academic research."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "c1f1ba1630bc19bd24c1dfbc1548f2d8",
|
||||
@ -201,73 +191,23 @@
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "074b2bd4ba1bf0caf3dbf1973217416a",
|
||||
"element_id": "dc2c331204369d29f5bdcd8dc88a8174",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "1. An off-the-shelf toolkit for applying DL models for layout detection, character"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "569ce8891b02bc38f50a0cde0039e951",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "2. A rich repository of pre-trained neural network models (Model Zoo) that"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "18dcbc2839f9783d2c91cbce75d3e685",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "3. Comprehensive tools for efficient document image data annotation and model"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "efe6ba3afae54e3c7a05d81583543296",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "4. A DL model hub and community platform for the easy sharing, distribu- tion, and discussion of DIA models and pipelines, to promote reusability, reproducibility, and extensibility (Section 4)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "50f59772d4134ececeaf37069d480784",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "underlies the off-the-shelf usage"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c7f4b9a2c7b93fdcc32112de7d9563ba",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "recognition, and other DIA tasks (Section 3)"
|
||||
"text": "1. An off-the-shelf toolkit for applying DL models for recognition, and other DIA tasks (Section Bp ayout det ection, character 2. A rich repository of pre-trained neural network models (Model Zoo) that underlies the off-the-shelf usage 3. Comprehensive tools for efficient document image tuning to support different levels of customization 4. A DL model hub and community platform for t tion, and discussion of DIA models and pipeline: reproducibility, and extensibility (Section [4) ne easy S. ata annotation and model haring, distribu- s, to promote reusability,"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9a576fe6eb4355cdf1e772cf462a9eb7",
|
||||
"element_id": "1f0f5df7c23d4f8e8de4de3085abd7d8",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "tuning to support different levels of customization"
|
||||
"text": "The library implements simple and intuitive Python APIs without sacrificing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will benefit a variety of end-users, and will lead to advances in applications in both industry and academic research."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -496,7 +436,7 @@
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5,
|
||||
"text_as_html": "<table><thead><th>Dataset</th><th>| Base Model'|</th><th>Large Model</th><th>Notes</th></thead><tr><td>PubLayNet B8]|</td><td>F/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr><tr><td></td><td>M</td><td>-</td><td>Layouts of scanned modern magazines and scientific reports</td></tr><tr><td></td><td>F</td><td>-</td><td>Layouts of scanned US newspapers from the 20th century</td></tr><tr><td>TableBank</td><td>F</td><td>F</td><td>nd business document. Table region on modern scientific</td></tr><tr><td>HJDataset</td><td>F/M</td><td>-</td><td>Layouts of history Japanese documents</td></tr></table>"
|
||||
"text_as_html": "<table><thead><th>Dataset</th><th>| Base Model'|</th><th>Large Model</th><th>| Notes</th></thead><tr><td>PubLayNet B8]|</td><td>F/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr><tr><td></td><td>M</td><td>-</td><td>Layouts of scanned modern magazines and scientific reports</td></tr><tr><td></td><td>F</td><td>-</td><td>Layouts of scanned US newspapers from the 20th century</td></tr><tr><td>TableBank</td><td>F</td><td>F</td><td>nd business document. Table region on modern scientific</td></tr><tr><td>HJDataset</td><td>F/M</td><td>-</td><td>Layouts of history Japanese documents</td></tr></table>"
|
||||
},
|
||||
"text": "Dataset | Base Model'| Large Model | Notes PubLayNet B8]| F/M M Layouts of modern scientific documents PRImA M - nned modern magazines and scientific reports Newspapei F - canned US newspapers from the 20th century TableBank F F Table region on modern scientific and business document HJDataset F/M - Layouts of history Japanese documents"
|
||||
},
|
||||
@ -591,34 +531,14 @@
|
||||
"text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and storage of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "65f9f864775ddef6f9895c53e16c50d4",
|
||||
"type": "ListItem",
|
||||
"element_id": "e416e69991bf6a4b338df18ebdb6e712",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "1 import layoutparser as lp 2 image = cv2 . imread ( \" image_file \" ) # load images 3 model = lp . De t e c tro n2 Lay outM odel ("
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "61b33f079528d200f91471f41645cdc6",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "4 5 layout = model . detect ( image )"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "6cd3a9e132c1264a05ec11a2df6b8066",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "\" lp :// PubLayNet / f as t er _ r c nn _ R _ 50 _ F P N_ 3 x / config \" )"
|
||||
"text": "import layoutparser as lp image = cv2.imread(\"image_file\") # load images model = lp.Detectron2LayoutModel ( \"1p://PubLayNet/faster_rcnn_R_50_FPN_3x/config\") layout = model.detect (image)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -651,14 +571,14 @@
|
||||
"text": "Z. Shen et al."
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "185e67615d123b35d38ea72e0cdb6d99",
|
||||
"type": "Image",
|
||||
"element_id": "2f498bdd91739a7083490999507420a5",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 6
|
||||
},
|
||||
"text": "- ° . 3 a a 4 a 3 oo er ‘ 2 § 8 a 8 3 3 ‘ £ 4 A g a 9 ‘ 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 ’ g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff"
|
||||
"text": "33§3 fectange vada8883 Coordinate83 +*Block | [Block | [Read8 Extra features Tet | [Tye | [oder[ coordinatel textblock1 |» , see383 , textblock2 , layout] ]4A list of the layout elementsThe same transformation and operation APIs"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
@ -1102,14 +1022,14 @@
|
||||
"text": "9"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "975d6cb141cb0a0313375630ae063fa8",
|
||||
"type": "Image",
|
||||
"element_id": "6df6057f894a166cf24fd34f64267f09",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 9
|
||||
},
|
||||
"text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position"
|
||||
"text": "a ESStee eaeoooMode I: Showing Layout on the Original ImageMode Il: Drawing OCR'd Text at the Correspoding Position10g Bpunog vayoy feyds1q :1 vondo‘xog Burpunog vay apiH z word"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1172,14 +1092,14 @@
|
||||
"text": "Z. Shen et al."
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "2680b3c7a55754a3ba2738cb3d9d5e8b",
|
||||
"type": "Image",
|
||||
"element_id": "cd0055b04f6049e9d9bf49a4f309f7e9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "et Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable HEE company type Column Categories (J tite we) adaress —_ (7) section Header by ‘e * Column reading order a a (a) Illustration of the original Japanese Maximum Allowed Height BRE B>e EER eR (b) Illustration of the recreated document with dense text structure for better OCR performance"
|
||||
"text": "Text‘Token CategoriestieAddress(Numberig:3pio Bupeas uwunjog(a) Illustration of the original Japanese document with detected layout elements highlighted in colored boxesColumn CategoriesCRE) OR REKER te setPikes enceee+41ybiay pamoyy wnwrxey(b) Illustration of the recreated document with dense text structure for better OCR performance"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1302,14 +1222,14 @@
|
||||
"text": "The digitization of historical documents can unlock valuable data that can shed light on many important social, economic, and historical questions. Yet due to scan noises, page wearing, and the prevalence of complicated layout structures, ob- taining a structured representation of historical document scans is often extremely complicated. In this example, LayoutParser was used to develop a comprehensive pipeline, shown in Figure 5, to gener- ate high-quality structured data from historical Japanese firm financial ta- bles with complicated layouts. The pipeline applies two layout models to identify different levels of document structures and two customized OCR engines for optimized character recog- nition accuracy."
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "b33b2bc3b9c416673c7f74c6a00c49d8",
|
||||
"type": "Image",
|
||||
"element_id": "d32d5d93079c0053b7ef655185e47bb4",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 11
|
||||
},
|
||||
"text": "(spe peepee, ‘Active Learning Layout Annotate Layout Dataset | + ‘Annotation Toolkit ¥ a Deep Leaming Layout Model Training & Inference, ¥ ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <—— Default ane Customized ¥ ee Layout Structure Visualization & Export | <—— | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules"
|
||||
"text": "Annotate Layout Dataset(spe peepee,Active Learning LayoutAnnotation Toolkit4Layout Detection<—Deep Learning LayoutModel Training & Inference,4Post-processin Handy Data Structures &pl 9 APIs for Layout DataText Recognition Default and Customized: r OCR ModelsVisualization & Export |], bayou StructureVisualization & StorageThe Japanese DocumentDigitization PipelineHelpful LayoutParserModules"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1323,23 +1243,13 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "4005fd5e1a8a65c8e989071255cd7386",
|
||||
"element_id": "de8f09a4156ca73defac521bb354a297",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 11
|
||||
},
|
||||
"text": "15 A document page consists of eight rows like this. For simplicity we skip the row"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5d0786de7b188a10caffb32c951327a2",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 11
|
||||
},
|
||||
"text": "segmentation discussion and refer readers to the source code when available."
|
||||
"text": "& document page consists of eight rows like this. For simplicity we skip the row segmentation discussion and refer readers to the source code when available."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -1412,44 +1322,14 @@
|
||||
"text": "Overall, it is possible to create an intricate and highly accurate digitization pipeline for large-scale digitization using LayoutParser. The pipeline avoids specifying the complicated rules used in traditional methods, is straightforward to develop, and is robust to outliers. The DL models also generate fine-grained results that enable creative approaches like page reorganization for OCR."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d11adbfd88959ce24fbfdc7f8155e777",
|
||||
"type": "ListItem",
|
||||
"element_id": "122f0a4bde97c6e10e95c6e54479e34e",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 12
|
||||
},
|
||||
"text": "16 This measures the overlap between the detected and ground-truth characters, and"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "5b6b4f6a5766bdb4f09f0a0387a3a373",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 12
|
||||
},
|
||||
"text": "the maximum is 1."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "48033291e6d72fefde1a56827e6dacfb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 12
|
||||
},
|
||||
"text": "17 This measures the number of edits from the ground-truth text to the predicted text,"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "5737ba23368c5333b0c39f7e8e474d03",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 12
|
||||
},
|
||||
"text": "and lower is better."
|
||||
"text": "16 This measures the overlap between the detected and ground-truth characters, and the maximum is 1. '7 This measures the number of edits from the ground-truth text to the predicted text, and lower is better."
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
@ -1472,14 +1352,14 @@
|
||||
"text": "13"
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "7d42bb6af1404a95a6e8870d5c4d07bf",
|
||||
"type": "Image",
|
||||
"element_id": "f58d47bde7ebddd81c4a678c918a8f1b",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 13
|
||||
},
|
||||
"text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line"
|
||||
"text": "(2) Partial table atthe bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1592,84 +1472,14 @@
|
||||
"text": "References"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "b5bf13691648f2be7e686436513a7366",
|
||||
"type": "ListItem",
|
||||
"element_id": "af2a971baba0e022d1e53fc0e44b1d94",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[1] Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man´e, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi´egas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015), https://www.tensorflow.org/, software available from tensorflow.org"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "098ca0ae774b51e7eba5dbe98641da88",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[2] Alberti, M., Pondenkandath, V., W¨ursch, M., Ingold, R., Liwicki, M.: Deepdiva: a highly-functional python framework for reproducible experiments. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 423–428. IEEE (2018)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "0054c11c9691968349806c35f6aa5f0f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[3] Antonacopoulos, A., Bridson, D., Papadopoulos, C., Pletschacher, S.: A realistic dataset for performance evaluation of document layout analysis. In: 2009 10th International Conference on Document Analysis and Recognition. pp. 296–300. IEEE (2009)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "607a64b13da109e96c62ecaedce91c4f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[4] Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. pp. 9365–9374 (2019)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "9409d20f2ee25336c2566bda8d8bb83c",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[5] Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "44c5093519506610b07942b24d966d77",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "Hierarchical Image Database. In: CVPR09 (2009)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "ad1bf75fc53d123c878f8254f9304c9f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[6] Deng, Y., Kanervisto, A., Ling, J., Rush, A.M.: Image-to-markup generation with coarse-to-fine attention. In: International Conference on Machine Learning. pp. 980–989. PMLR (2017)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "c6e835fe03323406543926cc0f5a94de",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 14
|
||||
},
|
||||
"text": "[7] Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International conference on machine learning. pp. 1180–1189. PMLR (2015)"
|
||||
"text": "[1] Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, ot G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Mané, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Viégas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015), software available from tensorflow.org Alberti, M., Pondenkandath, V., Wiirsch, M., Ingold, R., Liwicki, M.: Deepdiva: a highly-functional python framework for reproducible experiments. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 423-428. IEEE (2018) Antonacopoulos, A., Bridson, D., Papadopoulos, C., Pletschacher, S.: A realistic dataset for performance evaluation of document layout analysis. In: 2009 10th International Conference on Document Analysis and Recognition. pp. 296-300. IEEE (2009) Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. pp. 9365-9374 (2019) Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale Hierarchical Image Database. In: CVPRO9 (2009) Deng, Y., Kanervisto, A., Ling, J., Rush, A.M.: Image-to-markup generation with coarse-to-fine attention. In: International Conference on Machine Learning. pp. 980-989. PMLR (2017) Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International conference on machine learning. pp. 1180-1189. PMLR (2015)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
@ -1692,164 +1502,14 @@
|
||||
"text": "15"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "16390873ae6b6a173fc894a873bab022",
|
||||
"type": "ListItem",
|
||||
"element_id": "ab02ce354f7464ee1d53d58faa93745f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[9]"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "068bf90a7743f50c4a00d4827035e42f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[11] Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991–995. IEEE (2015) [12] He, K., Gkioxari, G., Doll´ar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "813cac1316043d454f3c928740435736",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[10] Graves, A., Fern´andez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369–376 (2006)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "2f103adde52e35a8853cbb476720a6ef",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[8] Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) (cid:32)Lukasz Garncarek, Powalski, R., Stanis(cid:32)lawek, T., Topolski, B., Halama, P., Grali´nski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4d54eb351d8fc3bfbbf7286aa15eabe3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "IEEE international conference on computer vision. pp. 2961–2969 (2017)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "124b6b55da69fccc1c06568bda34f63c",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[13] He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770–778 (2016)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "9b9688203e9cdea89ded788342be4032",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[14] Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "e90f44c0e10f9acb4d8f4c5895846d1e",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "2007(159), 2 (Jul 2007)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "3e0b97d540b7b43ad61292a89a58137f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[15] Lamiroy, B., Lopresti, D.: An open architecture for end-to-end document analysis benchmarking. In: 2011 International Conference on Document Analysis and Recognition. pp. 42–47. IEEE (2011)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "80498c312fd32cb744e5953dfef18604",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[16] Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120–122. UIST ’20 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https://doi.org/10.1145/3379350.3416143, https://doi-org.offcampus. lib.washington.edu/10.1145/3379350.3416143"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "09cfad31b28b1315b0bc7bd219136057",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[17] Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055–3062. Association for Computing Machinery, New York, NY, USA (2020), https://doi.org/10.1145/3340531.3412767"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "be647bda3f1ca1b63554ef22d1313a43",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[18] Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "890eb2d0b6b7dbf00a5e0a4ad2f82107",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[19] Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll´ar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740–755. Springer (2014)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "62b12089ccbd0d2dd2f6c292cfa6a6fb",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[20] Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431–3440 (2015)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "f7cfa7ca2e7175d8bdba9c0cb26a7c98",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[21] Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Sufi, S., Williams, A., Wolsten- croft, K.: An experimental workflow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161–168 (2011)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "aae12b8f70e03a3e35015ebda5974ebe",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 15
|
||||
},
|
||||
"text": "[22] Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7–12. IEEE (2018)"
|
||||
"text": "17 18 19 20 Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) Lukasz Garncarek, Powalski, R., Stanistawek, T., Topolski, B., Halama, P., Graliriski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020) Graves, A., Fernandez, $., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369-376 (2006) Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991-995. IEEE (2015) He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision. pp. 2961-2969 (2017) He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770-778 (2016) Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J. 2007(159), 2 (Jul 2007) Lamiroy, B., Lopresti, D.: An open architecture for end-to-end document analysis benchmarking. In: 2011 International Conference on Document Analysis and Recognition. pp. 42-47. IEEE (2011) Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120-122. UIST 20 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https: //doi.org/10.1145/3379350.3416143 Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055-3062. Association for Computing Machinery, New York, NY, USA (2020), Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019) Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Dollar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740-755. Springer (2014) Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431-3440 (2015) Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Sufi, $., Williams, A., Wolsten- croft, K.: An experimental workflow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161-168 (2011) Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7-12. IEEE (2018)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -1872,183 +1532,13 @@
|
||||
"text": "Z. Shen et al."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "1abcfa28cce9b0f5194dec0d534f28e5",
|
||||
"type": "ListItem",
|
||||
"element_id": "993f472d953f5d0e4054f1d4ad6fc4f0",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[27] Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142–147. IEEE (2019)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "f7c67eae65521c3a753337d08c5a7cc3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[28] Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91–99 (2015)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "4f43b2e563a35ae0208a8626f7e3280e",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[31] Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548–549 (2020)"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "b66713d3f2d1689f9174e1cb87429eed",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[32] Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "10a3ff59f6157f21733e659a41031f83",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[37] Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "219033258f3fff3de33bed379610c8f3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[23] Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017) [24] Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) [25] Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257–260. IEEE (2010)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "285ce5849d6fd9036e5d16724c024ab9",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[26] Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572–573 (2020)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "a18dcb504d62cb9f8ed4641014b6eeb2",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[29] Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE transactions on neural networks 20(1), 61–80 (2008) [30] Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162–1167. IEEE (2017)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "da6733a53c75743361e9edcc1d36a20c",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[33] Studer, L., Alberti, M., Pondenkandath, V., Goktepe, P., Kolonko, T., Fischer, A., Liwicki, M., Ingold, R.: A comprehensive study of imagenet pre-training for historical document image analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 720–725. IEEE (2019)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "385c241b43ef196663b8d30a6b8768ed",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[34] Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., et al.: Huggingface’s transformers: State-of- the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019) [35] Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https://"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d207e2724a17741e3ae1986d63cb5636",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[36] Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "93d261a89a8422fb8d166e6cdf95d8f6",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "github.com/facebookresearch/detectron2 (2019)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9dce913bddaa63724f5de64e539b7016",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "based layout annotation. arXiv preprint arXiv:2010.01762 (2020)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "2625b6830768eac986cfee208c0270de",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "text and layout for document image understanding (2019)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "21d399ba787aabbf69a8ca861cbcc4a3",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "[38] Zhong, X., Tang, J., Yepes, A.J.: Publaynet:"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "462753569cb801c6f858759742a93793",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "ument Analysis and Recognition (ICDAR). pp. 1015–1022. https://doi.org/10.1109/ICDAR.2019.00166"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c7fc0ade487926854bb602bca85fad60",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "layout analysis."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "96c49c3fbbb585f8062778e9a404b00f",
|
||||
"metadata": {
|
||||
"data_source": {},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 16
|
||||
},
|
||||
"text": "largest dataset ever for doc- In: 2019 International Conference on Document IEEE (Sep 2019)."
|
||||
"text": "23 github. com/facebookresearch/detectron2) (2019) Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017) Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257-260. IEEE (2010) Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572-573 (2020) Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142-147. IEEE (2019) Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91-99 (2015) Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE transactions on neural networks 20(1), 61-80 (2008) Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162-1167. IEEE (2017) Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548-549 (2020) Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning based layout annotation. arXiv preprint arXiv:2010.01762 (2020) Studer, L., Alberti, M., Pondenkandath, V., Goktepe, P., Kolonko, T., Fischer, A., Liwicki, M., Ingold, R.: A comprehensive study of imagenet pre-training for historical document image analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 720-725. IEEE (2019) Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., et al.: Huggingface’s transformers: State-of- the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019) Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020) Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of text and layout for document image understanding (2019) Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for doc- ument layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (Sep 2019). https: //doi.org/10.1109/ICDAR.2019.00166"
|
||||
}
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@ -199,7 +199,7 @@
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "9c4387f669c689e9af0a712fd494b2d7",
|
||||
"element_id": "e18242a460d9d495ea7cffee38c1e647",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
@ -213,43 +213,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "The need for harmony in the nuclear regulatory environment"
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "93e7dedc9d334470067ad2de1f9ee788",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "The need for a holistic safety paradigm for the whole electricity system."
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "3cc3e847449fed4fa13bbd94f86e43a9",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "The need to create a level playing field that values reliability and energy security"
|
||||
"text": "° The need to create a level playing field that values reliability and energy security ° The need for harmony in the nuclear regulatory environment ° The need for a holistic safety paradigm for the whole electricity system."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -3439,7 +3403,7 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "338d3e15917414641f2b559473f168f8",
|
||||
"element_id": "0ad07326f56e66781da5dbb9488eaa67",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
@ -3453,7 +3417,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 9
|
||||
},
|
||||
"text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand x"
|
||||
"text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand”"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -3474,8 +3438,8 @@
|
||||
"text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times."
|
||||
},
|
||||
{
|
||||
"type": "FigureCaption",
|
||||
"element_id": "eeda9f9210dfe4be7e82b4385290d3ca",
|
||||
"type": "Image",
|
||||
"element_id": "36ca9b7cdbbcba729a46487cf86c07eb",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
@ -3636,8 +3600,8 @@
|
||||
"text": "i"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5d7f49449ab22deac22d767b89549c55",
|
||||
"type": "ListItem",
|
||||
"element_id": "ffc47b19bb43cce8c23421b5c78b17b4",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
@ -3651,223 +3615,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "ii"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "f5557d4fcf727a981a3c315aca733eef",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "iii"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "0ab306823035661bb8dba21cc2535231",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "iv"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "d3fc2842ddfad4c8d3859f84d4439bfd",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "Vv"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4c94485e0c21ae6c41ce1dfe7b6bface",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c0ff93ea8927a7366db0331e5fd9d19f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "vi"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c0ff93ea8927a7366db0331e5fd9d19f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "vi"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c1d2906220d1eef1b17422b7132872a8",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "vii"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "de72de35f0092bdd3107011f3be18dc0",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "b6396ecd6f60e3dcca17c045c00846c1",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "ed171375d0bf81eaa5512140c3a29b8f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "ix"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "2d711642b726b04401627ca9fbac32f5",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "x"
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "5897aff759a5cc8d94710101c73af296",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf",
|
||||
"version": 177372694731575984083482917563244941766,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:10:36"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid."
|
||||
"text": "i nternational Energy Agency (20 results Nuclear Association. ii nternational iii nternational Energy Agency (20 publications/nuclear/ 8), World Energy Outloo! Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=>WORLD&year=20 =chart&dataTable=ELECTRICITYANDHEAT - with visual modifications by World Nuclear Association. 9), Nuclear Power in a CI 2018. Data accessed from https://www.iea.org/weo/ — Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the ikely to stem from the implementation of announced policy intentions — with visual modification by World 6&category=Electricity&indicator=ElecGenByFuel&mode lean Energy System. Accessed from: https://www.iea.org/ iv Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https:/Awww.ipce.ch/sr15/ Vv nternational Energy Agency (20 publications/nuclear/ vi nternational vii International Publications/PDF/P1695_web.pdf and NRC SOARCA study 2015 ix nternational x bid. 9), Nuclear Power in a CI Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs o 2015 Edition. Accessed from: https:/Awww.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf Atomic Energy Agency (2015), Technical challenges in the application and instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview lean Energy System. Accessed from: https://www.iea.org/ generating Electricity — icensing of digital Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
|
||||
@ -1549,7 +1549,7 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "8921c0f3c29bc04c22c9c40f4eef6613",
|
||||
"element_id": "a9d31d88b0e2026dbed12c8b5536ab2b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
@ -1563,7 +1563,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3"
|
||||
"text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution®"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
@ -1585,7 +1585,7 @@
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "e450813fe6430d87c4caa64e4792bc74",
|
||||
"element_id": "1ff44442b3a554331aaf4ffb30b7eda6",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
@ -1599,25 +1599,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "31138d5dc0c297144d27d5dbd15d5ef0",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 5
|
||||
},
|
||||
"text": "2012 UNSCEAR report and the 2015 US NRC SOARCA study."
|
||||
"text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. $ Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the 2012 UNSCEAR report and the 2015 US NRC SOARCA study."
|
||||
},
|
||||
{
|
||||
"type": "UncategorizedText",
|
||||
@ -2178,8 +2160,8 @@
|
||||
"text": "i"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "e72fdf383c0b4d8cba0284d4f7ff06d5",
|
||||
"type": "ListItem",
|
||||
"element_id": "158d56841d65947a9a91a3ca34163a4c",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
@ -2193,385 +2175,7 @@
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5d7f49449ab22deac22d767b89549c55",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "ii"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "f5557d4fcf727a981a3c315aca733eef",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "iii"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4c94485e0c21ae6c41ce1dfe7b6bface",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "c0ff93ea8927a7366db0331e5fd9d19f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "vi"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9d45931b60fa1041a13243a1ee1bb170",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "794a96b3ab9a3e860f65549c3a106704",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "94178a8c2e84bf4b8f2eed9c79d7cfd5",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health-"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "4051afedda98549176dc28aaa9087e81",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d85940c91ae6b53fc4b41bd5137e7371",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact-"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "9a236889bced20048d1619798291d194",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "vii World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "26a84724035df76d7d8a6610a6fa4627",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "6e98dee26ce2439cd4b8af82426e894e",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "understanding/statistics"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "759772833f6756e511150b2a49233864",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "professional/cancer-statistics/risk"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "86c0a0cef7faa217f386f75ead17dbec",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "sheets/detail/climate-change-and-health"
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "7267222b91f507e040c69dad9af7941f",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "the-full-costs-of-electricity-provision?details=true"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "2ef1e8614bc32af635d2a0c894b2ed3c",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747."
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "e4d7c811a799c3c8e706125556f8a370",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "98e5f594de0e79990a0650489fdf295c",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "d5658e2a49995a2f4ca4b45d95f2058b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "c328c06c32c00c43471cd3c9d257c68b",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "6bbd046b939157389606adf4059fe1f3",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf",
|
||||
"version": 306475068461766865312866697521104206816,
|
||||
"record_locator": {
|
||||
"protocol": "s3",
|
||||
"remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf"
|
||||
},
|
||||
"date_modified": "2023-02-12T10:09:32"
|
||||
},
|
||||
"filetype": "application/pdf",
|
||||
"page_number": 10
|
||||
},
|
||||
"text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8"
|
||||
"text": "Vi VIL xi xii World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https:/Awww.bbc.co.uk/news/ business-50953712 Slovic, P, 2010. The Psychology of risk. Sauide e Sociedade, 19(4), pp. 731-747. United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https:/Avww.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018 Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8 World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021] National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/ understanding/statistics Cancer Research UK (n.d.). Cancer risk statistics. Available at: https:/Awww.cancerresearchuk.org/health- professional/cancer-statistics/risk OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https:/Avww.oecd-nea.org/jcms/pl_14998/ the-full-costs-of-electricity-provision?details=true World Health Organization (2018). Climate change and health. Available at: https:/Awww.who.int/news-room/fact- sheets/detail/climate-change-and-health BP 2020. BP Statistical Review of World Energy, London: BP"
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.10.19-dev0" # pragma: no cover
|
||||
__version__ = "0.10.19-dev1" # pragma: no cover
|
||||
|
||||
@ -329,7 +329,11 @@ def _partition_pdf_or_image_local(
|
||||
|
||||
ocr_languages = prepare_languages_for_tesseract(languages)
|
||||
|
||||
model_name = model_name if model_name else os.environ.get("UNSTRUCTURED_HI_RES_MODEL_NAME")
|
||||
model_name = (
|
||||
model_name
|
||||
if model_name
|
||||
else os.environ.get("UNSTRUCTURED_HI_RES_MODEL_NAME", "detectron2_onnx")
|
||||
)
|
||||
pdf_image_dpi = kwargs.pop("pdf_image_dpi", None)
|
||||
extract_images_in_pdf = kwargs.get("extract_images_in_pdf", False)
|
||||
image_output_dir_path = kwargs.get("image_output_dir_path", None)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user