mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix: hi_res PDF parsing: only uncategorized text for extracted elements (#3975)
This commit is contained in:
parent
8fc41811eb
commit
dfa17bd3a0
@ -1,10 +1,11 @@
|
||||
## 0.17.6-dev0
|
||||
## 0.17.6-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
- **Do not use NLP to determine element types for extracted elements with hi_res.** This avoids extraneous Title elements in hi_res outputs. This only applies to *extracted* elements, meaning text objects that are found outside of Object Detection objects which get mapped to *inferred* elements. (*extracted* and *inferred* elements get merged together to form the list of `Element`s returned by `pdf_partition()`)
|
||||
|
||||
## 0.17.5
|
||||
|
||||
|
@ -823,8 +823,8 @@ def test_partition_categorization_backup():
|
||||
example_doc_path("pdf/layout-parser-paper-fast.pdf"),
|
||||
strategy=PartitionStrategy.HI_RES,
|
||||
)
|
||||
# Should have changed the element class from Text to Title
|
||||
assert isinstance(elements[0], Title)
|
||||
# Should NOT have changed the element class from Text to Title
|
||||
assert isinstance(elements[0], Text)
|
||||
assert elements[0].text == text
|
||||
|
||||
|
||||
|
@ -141,7 +141,7 @@ def test_partition_msg_can_process_attachments():
|
||||
"Text",
|
||||
"Text",
|
||||
"Image",
|
||||
"Title",
|
||||
"Text",
|
||||
"Text",
|
||||
"Title",
|
||||
"Title",
|
||||
|
@ -14,9 +14,9 @@
|
||||
<p class="NarrativeText" id="0ac27467f42b45650b5bf093d76055d6">
|
||||
Contents lists available at ScienceDirect
|
||||
</p>
|
||||
<h1 class="Title" id="2a9146ee3c09e107d11693c7b6e4725c">
|
||||
<p class="UncategorizedText" id="2a9146ee3c09e107d11693c7b6e4725c">
|
||||
Data in Brief
|
||||
</h1>
|
||||
</p>
|
||||
<p class="NarrativeText" id="97e80c6e7dc2754c9083b263ff65039e">
|
||||
journal homepage: www.elsevier.com/locate/dib
|
||||
</p>
|
||||
@ -28,19 +28,19 @@
|
||||
Data on environmental sustainable corrosion inhibitor for stainless steel in aggressive environment
|
||||
</h1>
|
||||
<img alt="" class="Image" id="151a01a072f2b18a3fda459fd6e71d79"/>
|
||||
<h1 class="Title" id="5781f22b6e47a24d5f7847e6a4720677">
|
||||
<p class="UncategorizedText" id="5781f22b6e47a24d5f7847e6a4720677">
|
||||
(Jee
|
||||
</h1>
|
||||
</p>
|
||||
<h1 class="Title" id="bddd1cbc864e9b44cc0715a1cccf8dbc">
|
||||
Omotayo Sanni n, Abimbola Patricia I. Popoola
|
||||
</h1>
|
||||
<p class="NarrativeText" id="589a383c831226a006e06ae55dba9b55">
|
||||
Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa
|
||||
</p>
|
||||
<p class="NarrativeText" id="658c1a75d44888e4fe434dc3daf48818">
|
||||
<p class="UncategorizedText" id="658c1a75d44888e4fe434dc3daf48818">
|
||||
a r t i c l e i n f o
|
||||
</p>
|
||||
<p class="NarrativeText" id="b9e48f235de5b531427187eb6ea135fe">
|
||||
<p class="UncategorizedText" id="b9e48f235de5b531427187eb6ea135fe">
|
||||
a b s t r a c t
|
||||
</p>
|
||||
<h1 class="Title" id="911bfead9b546998812e2d1d615ecc87">
|
||||
@ -88,19 +88,19 @@
|
||||
<h1 class="Title" id="13fd694e1ff862d163b840a246964e58">
|
||||
Value of the data
|
||||
</h1>
|
||||
<p class="NarrativeText" id="5f1c4074c1b5d641b724b99be6f5ddfd">
|
||||
<p class="UncategorizedText" id="5f1c4074c1b5d641b724b99be6f5ddfd">
|
||||
© Data presented here provide optimum conditions of waste material as inhibitor for stainless steel
|
||||
</p>
|
||||
<li class="ListItem" id="afed004de4c50d761640b6c18729a988">
|
||||
Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment.
|
||||
</li>
|
||||
<p class="NarrativeText" id="f93d89ccb971e2b60f44afbf710673c6">
|
||||
<p class="UncategorizedText" id="f93d89ccb971e2b60f44afbf710673c6">
|
||||
© The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316
|
||||
</p>
|
||||
<li class="ListItem" id="cb6e8acb9c24820b59f8973cc236ef35">
|
||||
can be used as basis in determining the inhibitive performance of the same inhibitor in other environments.
|
||||
</li>
|
||||
<p class="NarrativeText" id="5964ede27be8850de7a13e0dd32c1b21">
|
||||
<p class="UncategorizedText" id="5964ede27be8850de7a13e0dd32c1b21">
|
||||
© The data can be used to examine the relationship between the process variable as it affect the
|
||||
</p>
|
||||
<li class="ListItem" id="e1f7e635d8739a97d8d0000ba8004f61">
|
||||
@ -152,9 +152,9 @@
|
||||
<table class="Table" id="7e0388ec6fd4ec451d96232e30d41e7c" style="border: 1px solid black; border-collapse: collapse;">
|
||||
Inhibitor be (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm?) Polarization Corrosion concentration (g) resistance (Q) rate (mm/year) oO 0.0335 0.0409 —0.9393 0.0003 24.0910 2.8163 2 1.9460 0.0596 —0.8276 0.0002 121.440 1.5054 4 0.0163 0.2369 —0.8825 0.0001 42.121 0.9476 6 0.3233 0.0540 —0.8027 5.39E-05 373.180 0.4318 8 0.1240 0.0556 —0.5896 5.46E-05 305.650 0.3772 10 0.0382 0.0086 —0.5356 1.24E-05 246.080 0.0919
|
||||
</table>
|
||||
<h1 class="Title" id="d61e56d1a4c761ad3c69f4b970ba4f3c">
|
||||
<p class="UncategorizedText" id="d61e56d1a4c761ad3c69f4b970ba4f3c">
|
||||
rate (mm/year)
|
||||
</h1>
|
||||
</p>
|
||||
<p class="NarrativeText" id="3a5534c2aafc2d8a4c0b65d530d00ab3">
|
||||
The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6–8 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8.
|
||||
</p>
|
||||
@ -232,12 +232,12 @@
|
||||
<p class="NarrativeText" id="25833fe4955e01b455cf77d0cfd7d71f">
|
||||
The potentiodynamic polarization method was performed on the prepared test samples immersed in 0.5 M H2SO4 solution in the presence and absence of different ES concentrations. A three electrode system was used; stainless steel Type 316 plate as working electrode with an exposed area of 1.0 cm2, platinum rod as counter electrode and silver chloride electrode as reference electrode. The electrode was polished, degreased in acetone and thoroughly rinsed with distilled water before the experiment. Current density against applied potential was plotted. The slope of the linear part in anodic and cathodic plots gives anodic and cathodic constants according to the Stern–Geary equation, and the
|
||||
</p>
|
||||
<h1 class="Title" id="57906367eca399b52f7eecbf78345bf4">
|
||||
<p class="UncategorizedText" id="57906367eca399b52f7eecbf78345bf4">
|
||||
ð2Þ
|
||||
</h1>
|
||||
<h1 class="Title" id="cff55ae1916232dbda5239f59c897cb9">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="cff55ae1916232dbda5239f59c897cb9">
|
||||
ð3Þ
|
||||
</h1>
|
||||
</p>
|
||||
<div class="Header" id="e40c3ee561b10ca5b7a76900c8d5b263">
|
||||
O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457
|
||||
</div>
|
||||
|
@ -14,9 +14,9 @@
|
||||
<p class="NarrativeText" id="fefc7aa600d4266a6cca6d017bc77306">
|
||||
Contents lists available at ScienceDirect
|
||||
</p>
|
||||
<h1 class="Title" id="6e552bae24f7a412e4b5764d0428a5eb">
|
||||
<p class="UncategorizedText" id="6e552bae24f7a412e4b5764d0428a5eb">
|
||||
Data in Brief
|
||||
</h1>
|
||||
</p>
|
||||
<p class="NarrativeText" id="c1b3d4f53698b892fcc23fc10a72e6fb">
|
||||
journal homepage: www.elsevier.com/locate/dib
|
||||
</p>
|
||||
@ -28,9 +28,9 @@
|
||||
A benchmark dataset for the multiple depot vehicle scheduling problem
|
||||
</h1>
|
||||
<img alt="" class="Image" id="3934d1d731466b344854fc9932fd9e3d"/>
|
||||
<h1 class="Title" id="cb34109c5030876248f9a9bbdd65093f">
|
||||
<p class="UncategorizedText" id="cb34109c5030876248f9a9bbdd65093f">
|
||||
(eee
|
||||
</h1>
|
||||
</p>
|
||||
<p class="NarrativeText" id="0cda4eb20070fdf01ec0d47b2a550241">
|
||||
Sarang Kulkarni a,b,c,n, Mohan Krishnamoorthy d,e, Abhiram Ranade f, Andreas T. Ernst c, Rahul Patil b
|
||||
</p>
|
||||
@ -52,16 +52,16 @@
|
||||
<p class="UncategorizedText" id="03b4116b32ee9de3beea142b52694b19">
|
||||
e School of Information Technology and Electrical Engineering, The University of Queensland, QLD 4072,
|
||||
</p>
|
||||
<h1 class="Title" id="bfcbabb9ed9169f6a4be19576064f702">
|
||||
<p class="UncategorizedText" id="bfcbabb9ed9169f6a4be19576064f702">
|
||||
Australia
|
||||
</h1>
|
||||
</p>
|
||||
<p class="NarrativeText" id="85875ebbc1de554e92edc54674add1d5">
|
||||
f Department of Computer Science and Engineering, IIT Bombay, Powai, Mumbai 400076, India
|
||||
</p>
|
||||
<p class="NarrativeText" id="f9f33fff8fbb981301df3055b60e12c7">
|
||||
<p class="UncategorizedText" id="f9f33fff8fbb981301df3055b60e12c7">
|
||||
a r t i c l e i n f o
|
||||
</p>
|
||||
<p class="NarrativeText" id="4f3f69dd17ddae776c656ec73d9837ae">
|
||||
<p class="UncategorizedText" id="4f3f69dd17ddae776c656ec73d9837ae">
|
||||
a b s t r a c t
|
||||
</p>
|
||||
<p class="NarrativeText" id="34522460857b10c63d8c2c8d2fbb3087">
|
||||
@ -106,13 +106,13 @@
|
||||
<li class="ListItem" id="26ac34f98623dc94e0854dc5e841d4e4">
|
||||
© The data provide all the information that is required to model the MDVSP by using the existing mathematical formulations.
|
||||
</li>
|
||||
<p class="NarrativeText" id="79e2a2e0c24e1e8befe2b6beb2f1df64">
|
||||
<p class="UncategorizedText" id="79e2a2e0c24e1e8befe2b6beb2f1df64">
|
||||
e All the problem instances are available for use without any restrictions.
|
||||
</p>
|
||||
<li class="ListItem" id="d401597b8ff2854bfb89f2833d02a763">
|
||||
e The benchmark solutions and solution time for the problem instances are presented in [3] and can be used for the comparison.
|
||||
</li>
|
||||
<p class="NarrativeText" id="c1cff3abe7c7915accab35910df1c5cd">
|
||||
<p class="UncategorizedText" id="c1cff3abe7c7915accab35910df1c5cd">
|
||||
© The dataset includes a program that can generate similar problem instances of different sizes.
|
||||
</p>
|
||||
<h1 class="Title" id="fb765d6762e6a423cb8b9dab27359732">
|
||||
@ -121,9 +121,9 @@
|
||||
<p class="NarrativeText" id="1f3d79f338b86fbfcfa7054f11de28f0">
|
||||
The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number for the size, ‘ðm;nÞ’, respectively. For example, the problem instance, ‘RN-8–1500-01.dat’, is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8,12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, ðm;nÞ, five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net. For each problem instance, the following information is provided:
|
||||
</p>
|
||||
<h1 class="Title" id="fc547df12bfc22e91a0b5927670caa78">
|
||||
<p class="UncategorizedText" id="fc547df12bfc22e91a0b5927670caa78">
|
||||
The number of depots mð
|
||||
</h1>
|
||||
</p>
|
||||
<p class="UncategorizedText" id="320f6d28582c354d35673c2a4119851f">
|
||||
Þ,
|
||||
</p>
|
||||
@ -187,9 +187,9 @@
|
||||
<table class="Table" id="63de709cd751564fc9622864af4e9310" style="border: 1px solid black; border-collapse: collapse;">
|
||||
Instance size (m, n) Average number of Locations Times Vehicles (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 (16, 3000) 1087.20 1101.60 1284.60 2,684,983.60
|
||||
</table>
|
||||
<h1 class="Title" id="ec04cd3d411eed35515b3ea80ebac5af">
|
||||
<p class="UncategorizedText" id="ec04cd3d411eed35515b3ea80ebac5af">
|
||||
Possible empty travels
|
||||
</h1>
|
||||
</p>
|
||||
<div class="Header" id="fa23407a7c3c99ae3b6fb79034698807">
|
||||
S. Kulkarni et al. / Data in Brief 22 (2019) 484–487
|
||||
</div>
|
||||
|
@ -76,8 +76,8 @@
|
||||
<p class="NarrativeText" id="ad58a94e747d9fe18e2550e58c54f6bc">
|
||||
Camila Loureiro*1, Corsi-Zuelli Fabiana1, Fachim Helene Aparecida1, Shuhama Rosana1, Menezes Paulo Rossi1, Dalton Caroline F2,
|
||||
</p>
|
||||
<h1 class="Title" id="6a0290d48528f40c9c2288fddff94e3e">
|
||||
<p class="UncategorizedText" id="6a0290d48528f40c9c2288fddff94e3e">
|
||||
AQ3
|
||||
</h1>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -11,7 +11,7 @@
|
||||
WORLD ASSOCIATION
|
||||
</h1>
|
||||
<img alt="" class="Image" id="4ab4d4df6aeb3d4fb6d8102edd876ab8"/>
|
||||
<p class="NarrativeText" id="7137c1e14141fad3ad306fe68918a967">
|
||||
<p class="UncategorizedText" id="7137c1e14141fad3ad306fe68918a967">
|
||||
Recalibrating risk
|
||||
</p>
|
||||
<p class="NarrativeText" id="dbdc2d6c6381e4fa1c7b8058bf86abef">
|
||||
@ -89,69 +89,69 @@
|
||||
In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi.
|
||||
</p>
|
||||
<img alt="25 24.6 20 18.4 e 15 10 5 4.6 2.8 0 Coal Oil Bio m ass Natural gas 0.07 Wind 0.04 Hydropower 0.02 Solar 0.01 Nuclear " class="Image" id="c0a86e51afb417a3b057d7cf101bbed6"/>
|
||||
<h1 class="Title" id="a8706e82b3f90cffc996a24348e3b670">
|
||||
<p class="UncategorizedText" id="a8706e82b3f90cffc996a24348e3b670">
|
||||
r
|
||||
</h1>
|
||||
<h1 class="Title" id="da631c23500655c51b9311a61f55744f">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="da631c23500655c51b9311a61f55744f">
|
||||
a
|
||||
</h1>
|
||||
<h1 class="Title" id="d78a11e9e55235934c3a4922053c68e5">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="d78a11e9e55235934c3a4922053c68e5">
|
||||
e
|
||||
</h1>
|
||||
<h1 class="Title" id="8d14df8b7fd7744365fbf8e02d69415a">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="8d14df8b7fd7744365fbf8e02d69415a">
|
||||
y
|
||||
</h1>
|
||||
<h1 class="Title" id="f4df01bee1b8ffb973ac8539649c5189">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="f4df01bee1b8ffb973ac8539649c5189">
|
||||
W
|
||||
</h1>
|
||||
<h1 class="Title" id="b733cf49de269e22bed7c9883b958669">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="b733cf49de269e22bed7c9883b958669">
|
||||
T
|
||||
</h1>
|
||||
<h1 class="Title" id="c4b47d788b26c3d5c62ad462ed3ca2db">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="c4b47d788b26c3d5c62ad462ed3ca2db">
|
||||
r
|
||||
</h1>
|
||||
<h1 class="Title" id="bff4435574259239761670b31432cc8a">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="bff4435574259239761670b31432cc8a">
|
||||
e
|
||||
</h1>
|
||||
<h1 class="Title" id="8ba15a3a71eb0bb689c582098cce6730">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="8ba15a3a71eb0bb689c582098cce6730">
|
||||
p
|
||||
</h1>
|
||||
<h1 class="Title" id="5fde097ba00ad7647206ae11c721d28c">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="5fde097ba00ad7647206ae11c721d28c">
|
||||
s
|
||||
</h1>
|
||||
</p>
|
||||
<p class="UncategorizedText" id="81331ee9da4145c2651d6483696fe966">
|
||||
8
|
||||
</p>
|
||||
<h1 class="Title" id="81f1f3b9da6df38d938bf7871fa069b5">
|
||||
<p class="UncategorizedText" id="81f1f3b9da6df38d938bf7871fa069b5">
|
||||
e
|
||||
</h1>
|
||||
<h1 class="Title" id="aa4a79651a9a0087b66fcc40a2213113">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="aa4a79651a9a0087b66fcc40a2213113">
|
||||
i
|
||||
</h1>
|
||||
<h1 class="Title" id="6d1c0d05d3a424b43d9572188a76c2d4">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="6d1c0d05d3a424b43d9572188a76c2d4">
|
||||
t
|
||||
</h1>
|
||||
<h1 class="Title" id="392a17b2f3eba46f4bcf078e0b204514">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="392a17b2f3eba46f4bcf078e0b204514">
|
||||
i
|
||||
</h1>
|
||||
<h1 class="Title" id="d24a9a771e46fdd6b269f1ecaf0b5eec">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="d24a9a771e46fdd6b269f1ecaf0b5eec">
|
||||
l
|
||||
</h1>
|
||||
<h1 class="Title" id="9dc4537afa8ae0b959a542f9ba5c1e03">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="9dc4537afa8ae0b959a542f9ba5c1e03">
|
||||
S
|
||||
</h1>
|
||||
<h1 class="Title" id="919dac2487a4c860747318a132a54a72">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="919dac2487a4c860747318a132a54a72">
|
||||
a
|
||||
</h1>
|
||||
<h1 class="Title" id="04ee5d05c3fcfffd945762e803478600">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="04ee5d05c3fcfffd945762e803478600">
|
||||
t
|
||||
</h1>
|
||||
<h1 class="Title" id="63dabde368e2cf310d20a885fe50314a">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="63dabde368e2cf310d20a885fe50314a">
|
||||
a
|
||||
</h1>
|
||||
<h1 class="Title" id="796538927664e4d87312c428469428f5">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="796538927664e4d87312c428469428f5">
|
||||
F
|
||||
</h1>
|
||||
</p>
|
||||
<p class="FigureCaption" id="d1496d2dc28f6877646e280c0c47e9ab">
|
||||
Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3
|
||||
</p>
|
||||
@ -251,9 +251,9 @@
|
||||
<li class="ListItem" id="59f05d231c2357ab111ee31b0da3c25d">
|
||||
World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries
|
||||
</li>
|
||||
<h1 class="Title" id="a95a2add68d668b944cc332c88ea721e">
|
||||
<p class="UncategorizedText" id="a95a2add68d668b944cc332c88ea721e">
|
||||
i
|
||||
</h1>
|
||||
</p>
|
||||
<li class="ListItem" id="2ab37467d413d491735b002a679afdb8">
|
||||
ii BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712
|
||||
</li>
|
||||
|
@ -114,9 +114,9 @@
|
||||
<li class="ListItem" id="04d62ad595016d7b490dff67a00b9f35">
|
||||
import layoutparser as lp
|
||||
</li>
|
||||
<h1 class="Title" id="9d40bf1b2e2af1692f5689a1c44ab2ae">
|
||||
<p class="UncategorizedText" id="9d40bf1b2e2af1692f5689a1c44ab2ae">
|
||||
wwe
|
||||
</h1>
|
||||
</p>
|
||||
<li class="ListItem" id="cafbdebf75706654ed769cd9785e8697">
|
||||
image = cv2.imread("image_file") # load images
|
||||
</li>
|
||||
|
@ -22,24 +22,24 @@
|
||||
<div class="Header" id="e3a383b7e9439f39773c13ea769297b7">
|
||||
2 n u J 1 2 ] V C . s c [ 2 v 8 4 3 5 1 . 3 0 1 2 :
|
||||
</div>
|
||||
<h1 class="Title" id="4608f9aa33a0cab158565817b0d15743">
|
||||
<p class="UncategorizedText" id="4608f9aa33a0cab158565817b0d15743">
|
||||
v
|
||||
</h1>
|
||||
<h1 class="Title" id="6f69e5f921907e689f1a52bd84282b31">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="6f69e5f921907e689f1a52bd84282b31">
|
||||
arXiv
|
||||
</h1>
|
||||
<h1 class="Title" id="ed4e590932b333f40d0e1367b6b0e32e">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="ed4e590932b333f40d0e1367b6b0e32e">
|
||||
i
|
||||
</h1>
|
||||
<h1 class="Title" id="8cb024fb60457b7c572b167801037f75">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="8cb024fb60457b7c572b167801037f75">
|
||||
X
|
||||
</h1>
|
||||
<h1 class="Title" id="c202bdacd2daf4c52fa3a6ddd64a0728">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="c202bdacd2daf4c52fa3a6ddd64a0728">
|
||||
r
|
||||
</h1>
|
||||
<h1 class="Title" id="3db474893ec321c81ef9d1a2afd5f660">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="3db474893ec321c81ef9d1a2afd5f660">
|
||||
a
|
||||
</h1>
|
||||
</p>
|
||||
<h1 class="Title" id="d3be9e3d661e2a79f37257caa5b54d8c">
|
||||
LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis
|
||||
</h1>
|
||||
@ -115,28 +115,28 @@
|
||||
<div class="Footer" id="b1fa4bbd1bdda08489faab5bf3adf5cc">
|
||||
6 The number shown is obtained by specifying the search type as ‘code’.
|
||||
</div>
|
||||
<h1 class="Title" id="db639db124b6064248de0c0dc71510a4">
|
||||
<p class="UncategorizedText" id="db639db124b6064248de0c0dc71510a4">
|
||||
7 https://ocr-d.de/en/about
|
||||
</h1>
|
||||
<h1 class="Title" id="d881ce84f017d89f6e35e2bc4b133bfc">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="d881ce84f017d89f6e35e2bc4b133bfc">
|
||||
8 https://github.com/BobLd/DocumentLayoutAnalysis
|
||||
</h1>
|
||||
<h1 class="Title" id="9b96c128deddda1a32c739a2df157496">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="9b96c128deddda1a32c739a2df157496">
|
||||
9 https://github.com/leonlulu/DeepLayout
|
||||
</h1>
|
||||
<h1 class="Title" id="5cf72e821375f4480a1529bef97608ef">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="5cf72e821375f4480a1529bef97608ef">
|
||||
10 https://github.com/hpanwar08/detectron2
|
||||
</h1>
|
||||
<h1 class="Title" id="4ab94e79eedc3a7ac498aaf737ca8878">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="4ab94e79eedc3a7ac498aaf737ca8878">
|
||||
11 https://github.com/JaidedAI/EasyOCR
|
||||
</h1>
|
||||
<h1 class="Title" id="460b163c13ad7cad4fce325820a76481">
|
||||
</p>
|
||||
<p class="UncategorizedText" id="460b163c13ad7cad4fce325820a76481">
|
||||
12 https://github.com/PaddlePaddle/PaddleOCR
|
||||
</h1>
|
||||
</p>
|
||||
<p class="UncategorizedText" id="fe238f610fe610b8ce1abaa08a0e3e63">
|
||||
4
|
||||
</p>
|
||||
<p class="NarrativeText" id="92c4289ad4af7c0793e40d5662707e0a">
|
||||
<p class="UncategorizedText" id="92c4289ad4af7c0793e40d5662707e0a">
|
||||
Z. Shen et al.
|
||||
</p>
|
||||
<img alt="Efficient Data Annotation Model Customization Document Images Community Platform ‘a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY" class="Image" id="642416e5d6c99219b16dbba6f72392c5"/>
|
||||
@ -263,7 +263,7 @@
|
||||
<p class="UncategorizedText" id="676118b62c2261113a23a610c2ac50cb">
|
||||
6
|
||||
</p>
|
||||
<p class="NarrativeText" id="710ac103981c6363195774b02ee582d4">
|
||||
<p class="UncategorizedText" id="710ac103981c6363195774b02ee582d4">
|
||||
Z. Shen et al.
|
||||
</p>
|
||||
<img alt='- ° . 3 a a 4 a 3 oo er ‘ 2 § 8 a 8 3 3 ‘ £ 4 A g a 9 ‘ 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 ’ g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff' class="Image" id="6eb2bb6ca50b3be177565f9ff546bce8"/>
|
||||
@ -303,7 +303,7 @@
|
||||
<p class="NarrativeText" id="fa023ccf2ac1042ef254ecf47cc592ca">
|
||||
LayoutParser also comes with a DL-based CNN-RNN OCR model [6] trained with the Connectionist Temporal Classification (CTC) loss [10]. It can be used like the other OCR modules, and can be easily trained on customized datasets.
|
||||
</p>
|
||||
<p class="NarrativeText" id="a2a0a2ef0279f0710f3cd34474ca8645">
|
||||
<p class="UncategorizedText" id="a2a0a2ef0279f0710f3cd34474ca8645">
|
||||
13 This is also available in the LayoutParser documentation pages.
|
||||
</p>
|
||||
<li class="ListItem" id="5498a550b5367fa8dc935013956d09fa">
|
||||
|
@ -63,7 +63,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Data in Brief",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "97e80c6e7dc2754c9083b263ff65039e",
|
||||
@ -148,7 +148,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "(Jee",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "bddd1cbc864e9b44cc0715a1cccf8dbc",
|
||||
@ -187,7 +187,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "a r t i c l e i n f o",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "b9e48f235de5b531427187eb6ea135fe",
|
||||
@ -200,7 +200,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "a b s t r a c t",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "911bfead9b546998812e2d1d615ecc87",
|
||||
@ -432,7 +432,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "© Data presented here provide optimum conditions of waste material as inhibitor for stainless steel",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "afed004de4c50d761640b6c18729a988",
|
||||
@ -458,7 +458,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "© The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "cb6e8acb9c24820b59f8973cc236ef35",
|
||||
@ -484,7 +484,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "© The data can be used to examine the relationship between the process variable as it affect the",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "e1f7e635d8739a97d8d0000ba8004f61",
|
||||
@ -744,7 +744,7 @@
|
||||
"page_number": 4
|
||||
},
|
||||
"text": "rate (mm/year)",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "3a5534c2aafc2d8a4c0b65d530d00ab3",
|
||||
@ -1134,7 +1134,7 @@
|
||||
"page_number": 6
|
||||
},
|
||||
"text": "ð2Þ",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "cff55ae1916232dbda5239f59c897cb9",
|
||||
@ -1147,7 +1147,7 @@
|
||||
"page_number": 6
|
||||
},
|
||||
"text": "ð3Þ",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "e40c3ee561b10ca5b7a76900c8d5b263",
|
||||
|
@ -63,7 +63,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Data in Brief",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "c1b3d4f53698b892fcc23fc10a72e6fb",
|
||||
@ -148,7 +148,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "(eee",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "0cda4eb20070fdf01ec0d47b2a550241",
|
||||
@ -252,7 +252,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "Australia",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "85875ebbc1de554e92edc54674add1d5",
|
||||
@ -278,7 +278,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "a r t i c l e i n f o",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "4f3f69dd17ddae776c656ec73d9837ae",
|
||||
@ -291,7 +291,7 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "a b s t r a c t",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "34522460857b10c63d8c2c8d2fbb3087",
|
||||
@ -534,7 +534,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "e All the problem instances are available for use without any restrictions.",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "d401597b8ff2854bfb89f2833d02a763",
|
||||
@ -560,7 +560,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "© The dataset includes a program that can generate similar problem instances of different sizes.",
|
||||
"type": "NarrativeText"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "fb765d6762e6a423cb8b9dab27359732",
|
||||
@ -606,7 +606,7 @@
|
||||
"page_number": 2
|
||||
},
|
||||
"text": "The number of depots mð",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "320f6d28582c354d35673c2a4119851f",
|
||||
@ -892,7 +892,7 @@
|
||||
"page_number": 3
|
||||
},
|
||||
"text": "Possible empty travels",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
},
|
||||
{
|
||||
"element_id": "fa23407a7c3c99ae3b6fb79034698807",
|
||||
|
@ -309,6 +309,6 @@
|
||||
"page_number": 1
|
||||
},
|
||||
"text": "AQ3",
|
||||
"type": "Title"
|
||||
"type": "UncategorizedText"
|
||||
}
|
||||
]
|
@ -186,7 +186,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "7137c1e14141fad3ad306fe68918a967",
|
||||
"text": "Recalibrating risk",
|
||||
"metadata": {
|
||||
@ -2790,7 +2790,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "a8706e82b3f90cffc996a24348e3b670",
|
||||
"text": "r",
|
||||
"metadata": {
|
||||
@ -2883,7 +2883,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "da631c23500655c51b9311a61f55744f",
|
||||
"text": "a",
|
||||
"metadata": {
|
||||
@ -2976,7 +2976,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "d78a11e9e55235934c3a4922053c68e5",
|
||||
"text": "e",
|
||||
"metadata": {
|
||||
@ -3069,7 +3069,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "8d14df8b7fd7744365fbf8e02d69415a",
|
||||
"text": "y",
|
||||
"metadata": {
|
||||
@ -3162,7 +3162,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "f4df01bee1b8ffb973ac8539649c5189",
|
||||
"text": "W",
|
||||
"metadata": {
|
||||
@ -3255,7 +3255,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "b733cf49de269e22bed7c9883b958669",
|
||||
"text": "T",
|
||||
"metadata": {
|
||||
@ -3348,7 +3348,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "c4b47d788b26c3d5c62ad462ed3ca2db",
|
||||
"text": "r",
|
||||
"metadata": {
|
||||
@ -3441,7 +3441,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "bff4435574259239761670b31432cc8a",
|
||||
"text": "e",
|
||||
"metadata": {
|
||||
@ -3534,7 +3534,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "8ba15a3a71eb0bb689c582098cce6730",
|
||||
"text": "p",
|
||||
"metadata": {
|
||||
@ -3627,7 +3627,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "5fde097ba00ad7647206ae11c721d28c",
|
||||
"text": "s",
|
||||
"metadata": {
|
||||
@ -3813,7 +3813,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "81f1f3b9da6df38d938bf7871fa069b5",
|
||||
"text": "e",
|
||||
"metadata": {
|
||||
@ -3906,7 +3906,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "aa4a79651a9a0087b66fcc40a2213113",
|
||||
"text": "i",
|
||||
"metadata": {
|
||||
@ -3999,7 +3999,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "6d1c0d05d3a424b43d9572188a76c2d4",
|
||||
"text": "t",
|
||||
"metadata": {
|
||||
@ -4092,7 +4092,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "392a17b2f3eba46f4bcf078e0b204514",
|
||||
"text": "i",
|
||||
"metadata": {
|
||||
@ -4185,7 +4185,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "d24a9a771e46fdd6b269f1ecaf0b5eec",
|
||||
"text": "l",
|
||||
"metadata": {
|
||||
@ -4278,7 +4278,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "9dc4537afa8ae0b959a542f9ba5c1e03",
|
||||
"text": "S",
|
||||
"metadata": {
|
||||
@ -4371,7 +4371,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "919dac2487a4c860747318a132a54a72",
|
||||
"text": "a",
|
||||
"metadata": {
|
||||
@ -4464,7 +4464,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "04ee5d05c3fcfffd945762e803478600",
|
||||
"text": "t",
|
||||
"metadata": {
|
||||
@ -4557,7 +4557,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "63dabde368e2cf310d20a885fe50314a",
|
||||
"text": "a",
|
||||
"metadata": {
|
||||
@ -4650,7 +4650,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "796538927664e4d87312c428469428f5",
|
||||
"text": "F",
|
||||
"metadata": {
|
||||
@ -8184,7 +8184,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "a95a2add68d668b944cc332c88ea721e",
|
||||
"text": "i",
|
||||
"metadata": {
|
||||
|
@ -177,7 +177,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "9d40bf1b2e2af1692f5689a1c44ab2ae",
|
||||
"text": "wwe",
|
||||
"metadata": {
|
||||
|
@ -110,7 +110,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "4608f9aa33a0cab158565817b0d15743",
|
||||
"text": "v",
|
||||
"metadata": {
|
||||
@ -132,7 +132,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "6f69e5f921907e689f1a52bd84282b31",
|
||||
"text": "arXiv",
|
||||
"metadata": {
|
||||
@ -154,7 +154,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "ed4e590932b333f40d0e1367b6b0e32e",
|
||||
"text": "i",
|
||||
"metadata": {
|
||||
@ -176,7 +176,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "8cb024fb60457b7c572b167801037f75",
|
||||
"text": "X",
|
||||
"metadata": {
|
||||
@ -198,7 +198,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "c202bdacd2daf4c52fa3a6ddd64a0728",
|
||||
"text": "r",
|
||||
"metadata": {
|
||||
@ -220,7 +220,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "3db474893ec321c81ef9d1a2afd5f660",
|
||||
"text": "a",
|
||||
"metadata": {
|
||||
@ -1022,7 +1022,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "db639db124b6064248de0c0dc71510a4",
|
||||
"text": "7 https://ocr-d.de/en/about",
|
||||
"metadata": {
|
||||
@ -1044,7 +1044,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "d881ce84f017d89f6e35e2bc4b133bfc",
|
||||
"text": "8 https://github.com/BobLd/DocumentLayoutAnalysis",
|
||||
"metadata": {
|
||||
@ -1066,7 +1066,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "9b96c128deddda1a32c739a2df157496",
|
||||
"text": "9 https://github.com/leonlulu/DeepLayout",
|
||||
"metadata": {
|
||||
@ -1088,7 +1088,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "5cf72e821375f4480a1529bef97608ef",
|
||||
"text": "10 https://github.com/hpanwar08/detectron2",
|
||||
"metadata": {
|
||||
@ -1110,7 +1110,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "4ab94e79eedc3a7ac498aaf737ca8878",
|
||||
"text": "11 https://github.com/JaidedAI/EasyOCR",
|
||||
"metadata": {
|
||||
@ -1132,7 +1132,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "460b163c13ad7cad4fce325820a76481",
|
||||
"text": "12 https://github.com/PaddlePaddle/PaddleOCR",
|
||||
"metadata": {
|
||||
@ -1176,7 +1176,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "92c4289ad4af7c0793e40d5662707e0a",
|
||||
"text": "Z. Shen et al.",
|
||||
"metadata": {
|
||||
@ -1739,7 +1739,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "710ac103981c6363195774b02ee582d4",
|
||||
"text": "Z. Shen et al.",
|
||||
"metadata": {
|
||||
@ -2083,7 +2083,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"type": "UncategorizedText",
|
||||
"element_id": "a2a0a2ef0279f0710f3cd34474ca8645",
|
||||
"text": "13 This is also available in the LayoutParser documentation pages.",
|
||||
"metadata": {
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.17.6-dev0" # pragma: no cover
|
||||
__version__ = "0.17.6-dev1" # pragma: no cover
|
||||
|
@ -362,7 +362,10 @@ def partition_pdf_or_image(
|
||||
table_ocr_agent=table_ocr_agent,
|
||||
**kwargs,
|
||||
)
|
||||
out_elements = _process_uncategorized_text_elements(elements)
|
||||
# NOTE(crag): do not call _process_uncategorized_text_elements here, because
|
||||
# extracted elements (which are text blocks outside of OD-determined blocks)
|
||||
# are likely not Titles and should not be identified as such.
|
||||
return elements
|
||||
|
||||
elif strategy == PartitionStrategy.FAST:
|
||||
out_elements = _partition_pdf_with_pdfparser(
|
||||
|
Loading…
x
Reference in New Issue
Block a user