chore: pass ocr_mode in partition_pdf_or_image (#1154)

Set to individual_blocks for now to work around [this
bug](https://github.com/Unstructured-IO/unstructured-inference/issues/179).

I verified by printing the current ocr_mode in inference. The
`entire_page` default is overridden.

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: awalker4 <awalker4@users.noreply.github.com>
This commit is contained in:
Austin Walker 2023-08-18 16:59:08 -04:00 committed by GitHub
parent 1456f06b2d
commit dd243b4fd9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 231 additions and 173 deletions

View File

@ -1,7 +1,8 @@
## 0.10.4-dev0
## 0.10.4
### Enhancements
* Adds ability to reuse connections per process in unstructured-ingest
* Pass ocr_mode in partition_pdf and set the default back to individual pages for now
### Features

View File

@ -177,6 +177,7 @@ def test_partition_pdf_with_model_name_env_var(
filename,
is_image=False,
ocr_languages="eng",
ocr_mode="individual_blocks",
extract_tables=False,
model_name="checkbox",
)
@ -197,6 +198,7 @@ def test_partition_pdf_with_model_name(
filename,
is_image=False,
ocr_languages="eng",
ocr_mode="individual_blocks",
extract_tables=False,
model_name="checkbox",
)
@ -402,6 +404,7 @@ def test_partition_pdf_with_dpi():
filename,
is_image=False,
ocr_languages="eng",
ocr_mode="individual_blocks",
extract_tables=False,
model_name=None,
pdf_image_dpi=100,

View File

@ -1,13 +1,13 @@
[
{
"type": "Title",
"element_id": "88591a76b54e47215c0827ae8838ec13",
"element_id": "05ca56aec1964bf626b4012a5b4a7c55",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "Instructions for Form 3115 (Rev. November 1987)"
"text": "SERGE EECHE PAW VIMY wuUl VivaInstructions for Form 3115(Rev. November 1987)BP nw wo BE oe oe et a ee fia fl ae ee iw OM ee eee Le ye RA. LL. J"
},
{
"type": "NarrativeText",
@ -31,7 +31,7 @@
},
{
"type": "ListItem",
"element_id": "14e8cc92f6875b7562c7b37b363a4271",
"element_id": "e8d040fcadaf595b3624579225028b80",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -121,7 +121,7 @@
},
{
"type": "ListItem",
"element_id": "2beea67e67a36c0708e98cba96d1779f",
"element_id": "64b044a845d6a903604d0edc68d5c8d1",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -131,7 +131,7 @@
},
{
"type": "ListItem",
"element_id": "5157d731aa6a97c9b166799db2295bce",
"element_id": "aed90f3480456a62ac47f6cf5c5e526a",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -151,7 +151,7 @@
},
{
"type": "ListItem",
"element_id": "34b66452ca63c465c69d849e4acf6d46",
"element_id": "fdf216c15df57c2716f392d4cc8b2fbe",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -171,13 +171,33 @@
},
{
"type": "ListItem",
"element_id": "353b10e26575591f537f9718242cd574",
"element_id": "4df2762fd804bd5859df0774d1d51796",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": ") the Inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section"
"text": ") th"
},
{
"type": "ListItem",
"element_id": "6ea7ec2e8449de6c5c662bb59e333fa7",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "nclusion of income attributable to the sale orfurnishing of utility services no later than the yea"
},
{
"type": "ListItem",
"element_id": "2a639c819f6663cf3a9940f3528b3205",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "n which the services were provided to customers(Act section"
},
{
"type": "ListItem",
@ -191,7 +211,7 @@
},
{
"type": "ListItem",
"element_id": "13f155c0754434406190f3cf49c82c3c",
"element_id": "1a278d181295c8d1b6bfd86baca09eaf",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -211,7 +231,7 @@
},
{
"type": "ListItem",
"element_id": "178d6933ed193747b1c4aa1c048e7f94",
"element_id": "e9cae276abe56d0cb30fcf798f0c134e",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -241,13 +261,23 @@
},
{
"type": "ListItem",
"element_id": "f09181ea8ac5d177b8d2f79bbae03f18",
"element_id": "d4b18f9d6e11f561661bef4f8bc5fb7c",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "Signature Individuals. —An individual desiring the change should sign the application. If the application pertains to a husband and wife filing a joint income tax return, the names of both should appear in the heading and both should sign. Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.” Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized to sign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file. For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation. Fiduciaries.—The-form should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrix, etc., having legal authority to sign, and his or her title. Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page"
"text": "Signatur"
},
{
"type": "ListItem",
"element_id": "6e9dc7d49fe15e842fbd7373af8d020a",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "ndividuals. —An individual desiring the changeshould sign the application. If the applicationpertains to a husband and wife filing a jointincome tax return, the names of both shouldappear in the heading and both should sign.Partnerships.—The form should be signed withthe partnership name followed by the signatureof one of the general partners and the words“General Partner.”Corporations, cooperatives, and insurancecompanies.—The form should show the name ofthe corporation, cooperative, or insurancecompany and the signature of the president, vicepresident, treasurer, assistant treasurer, or chiefaccounting officer (such as tax officer) authorizedto sign, and his or her official title. Receivers,trustees, or assignees must sign any applicationthey are required to file. For a subsidiarycorporation filing a consolidated return with itsparent, the form should be signed by an officer ofthe parent corporation.Fiduciaries.—The-form should show the nameof the estate or trust and be signed by thefiduciary, personal representative, executor,executrix, administrator, administratrix, etc.,having legal authority to sign, and his or her title.Preparer other than partner, officer, etc.—Thesignature of the individual preparing theapplication should appear in the space providedon page"
},
{
"type": "ListItem",
@ -261,7 +291,7 @@
},
{
"type": "NarrativeText",
"element_id": "828767cbc922e731b59894afba55fe10",
"element_id": "989ff7b05e9807cf0865ac828552f045",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -291,13 +321,13 @@
},
{
"type": "NarrativeText",
"element_id": "84e7e32f584e2ee9f47ba593bf86c559",
"element_id": "4a52253d27bd51d65285045e1e3e3cf1",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "Generally, applicants must complete Section A. In addition, complete the appropriate sections (B-1 through H) for which a change Is desired."
"text": "Generally, applicants must complete SectionA. In addition, complete the appropriate sections(B-1 through H) for which a change Is desired.Vinee mised evissn all palecsninte Sante ime!"
},
{
"type": "Title",
@ -311,7 +341,7 @@
},
{
"type": "Title",
"element_id": "af8bdf713f162b09567c8d1a3a2d4de7",
"element_id": "476eb0569b23e73460f08455530f0d4b",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -321,13 +351,13 @@
},
{
"type": "Title",
"element_id": "1df7107903f249d938fbf3710f50283a",
"element_id": "71b6d7f72c57641ea91dd411abdc9959",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "If the individual or firm is also authorized to represent the applicant before the IRS, receive a copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)."
"text": "MIT RGSSIf the individual or firm is also authorized torepresent the applicant before the IRS, receivea copy of the requested ruling, or perform anyother act(s), the power of attorney must reflectsuch authorization(s)."
},
{
"type": "Title",
@ -341,7 +371,7 @@
},
{
"type": "Title",
"element_id": "242a9dba10a04654d4adef9c58ff96f6",
"element_id": "cd746731c7a892b0087828c0801c022b",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -351,7 +381,7 @@
},
{
"type": "Title",
"element_id": "58703de56debc34a1d68e6ed6f8fd067",
"element_id": "f0a757884fb918f704c1d90b762f5894",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -361,13 +391,13 @@
},
{
"type": "Title",
"element_id": "12f877f0bd47f9b761ed7e74be1afacd",
"element_id": "d1e074ec4e3a00f9e646b34b3ff94101",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below."
"text": "GIL SGNTY GRPNVaNUT.Note: /f this form is being filed in accordancewith Rev. Proc. 74-11, see Section G below."
},
{
"type": "Title",
@ -381,13 +411,13 @@
},
{
"type": "NarrativeText",
"element_id": "eb076cfd3d47e546c28611750afedc49",
"element_id": "6e1d51f920ee67d5cfb7a2600d4cb494",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 1
},
"text": "Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and"
"text": "Disregard the instructions under Time andPlace for Filing and Late Applications. Instead,attach Form 3115 to your income tax return forthe year of change; do not file it separately. Alsoinclude on a separate statement accompanyingthe Form 3115 the period over which the section481(a) adjustment will be taken into account andthe basis for that conclusion. Identify the"
},
{
"type": "Title",
@ -401,7 +431,7 @@
},
{
"type": "NarrativeText",
"element_id": "742730130f9c14403ad272eec208a456",
"element_id": "e054f522926ec7602c8380a8d7eb3296",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -431,17 +461,17 @@
},
{
"type": "ListItem",
"element_id": "b9c2a964cd107c5155ef70e5b235a05d",
"element_id": "f27e09e405abe4f2f2a9a28fad38974d",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that is not completed within the tax year in which it"
"text": "(f) provides that the term “long-terncontract” means any contract for themanufacturing, building, installation, orconstruction of property that is not completedwithin the tax year in which it"
},
{
"type": "ListItem",
"element_id": "8e69cd6874d876dce416a44e695b58eb",
"element_id": "cf29164f7821b3a6775b230f5e247551",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -451,7 +481,7 @@
},
{
"type": "ListItem",
"element_id": "1b69bc9514700ed89e0af2872cbb95c8",
"element_id": "dd39fef35cb957547bd3efad8b3d6557",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -461,7 +491,7 @@
},
{
"type": "ListItem",
"element_id": "aaa7abdc10628a69ab04fcea8ecdc29d",
"element_id": "ae214de0f0455b7dc7212c1f815d65d4",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -471,33 +501,43 @@
},
{
"type": "ListItem",
"element_id": "86bbefb59cb32bc6b6ff1b92e0b76d6f",
"element_id": "0fda0b69a885bf1425cddd8675d70be1",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "calendar months to complete."
"text": "calendar monthto complete."
},
{
"type": "NarrativeText",
"element_id": "1bbe995811e9fd4c3ce1b218cb641f4e",
"element_id": "c6b3c248ee1c921f6196a7e5cd870d67",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "(1) Gives your best estimate of the percentage of the section 481(a) adjustment that would have been required if the requested change had been made for each of the 3 preceding years; and"
"text": "(1) Gives your best estimate of the percentageof the section 481(a) adjustment that would havebeen required if the requested change had beenmade for each of the 3 preceding years; andVAN C."
},
{
"type": "ListItem",
"element_id": "91057a4a80779d62b06d27fdce5da42c",
"element_id": "99618a049629ef4f50aeafc1a365ad75",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "All long-term contracts entered into after February"
"text": "AMM RIM AIAll long-term contracts entered into afte"
},
{
"type": "ListItem",
"element_id": "f7ca8476d7c8a3ac84efbd8699f97f87",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "ebruary"
},
{
"type": "ListItem",
@ -511,7 +551,7 @@
},
{
"type": "ListItem",
"element_id": "7613695d576752ab22ae7c02866cf1e3",
"element_id": "b60ab3f42291035b6184fde93a3b9230",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -521,7 +561,7 @@
},
{
"type": "ListItem",
"element_id": "a288051b2eda0f2b8d6b45647c73a1ad",
"element_id": "93bcd9d786ff021bed0fe0c9d71fc976",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -531,7 +571,7 @@
},
{
"type": "ListItem",
"element_id": "f2923844fb3e4992f1c6ddd808867d96",
"element_id": "9ff4779aaab33521b8398aeb72f613c0",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -561,7 +601,7 @@
},
{
"type": "ListItem",
"element_id": "4df00d9659b3bfaac5990114275c4bf5",
"element_id": "1e970967cee7e2aa31666b6108587f35",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -571,7 +611,7 @@
},
{
"type": "ListItem",
"element_id": "dcf589bb37d079ecce4b375abc332606",
"element_id": "070baf413b0aca84064c63f5afaf041e",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -581,23 +621,23 @@
},
{
"type": "NarrativeText",
"element_id": "2de8f0b5003bcb8c12a4dc59c8e1f740",
"element_id": "6db00b1816c20e862ee46d0de12e17fa",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "See section 5.06(2) of Rev. Proc. 84-74 for the required perjury statement that must be attached."
"text": "—_———eeeerorT eeeSee section 5.06(2) of Rev. Proc. 84-74 for therequired perjury statement that must beattached."
},
{
"type": "NarrativeText",
"element_id": "751abc8c6a0fa412c3e8c18345f57f95",
"element_id": "db1cb1f9a7219a27df1875b2cfd5475c",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable."
"text": "TE RIG TINEME FN eke!Item 13, page 2.—Insert the actual number oftax years. Use of the term “since inception” 1s notacceptable. However, “more than 6 years” Isacceptable."
},
{
"type": "Title",
@ -641,7 +681,7 @@
},
{
"type": "Title",
"element_id": "4688916bf1d6b205af02a0e954156688",
"element_id": "6ccbf93cd42f38f04abdba8a103c8350",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -651,7 +691,7 @@
},
{
"type": "NarrativeText",
"element_id": "aaf93c2be8f4f2db87bd760783fedfa5",
"element_id": "851830b0996c633165de287a96eb0aa4",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -671,7 +711,7 @@
},
{
"type": "NarrativeText",
"element_id": "e5bed7fe04dd22cabe5e5c0362d37743",
"element_id": "54f2708b4cfb39e6586ec74244fe7f1e",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -681,17 +721,17 @@
},
{
"type": "ListItem",
"element_id": "32ebb1abcc1c601ceb9c4e3c4faba0ca",
"element_id": "ca6f93345af1b79e8253b00b046b4403",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "("
"text": "LEXCEPt TOF Lax SNENETS) GOES NOL apply LO-——("
},
{
"type": "ListItem",
"element_id": "e388a9c123531db35a336ca587dc1a78",
"element_id": "daaf36cd7c9f373f7192a7f76716cfc4",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -721,7 +761,7 @@
},
{
"type": "ListItem",
"element_id": "124f8e567bb2fc32647f9a44201e0688",
"element_id": "920fa4651462da72706415162fc8bc85",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -741,7 +781,7 @@
},
{
"type": "ListItem",
"element_id": "91621b3a2068ab97aafa195a272a663e",
"element_id": "b4537ecf064e370911fbd07081bd5bc7",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -751,17 +791,17 @@
},
{
"type": "ListItem",
"element_id": "70360b86614c25f67ca8959ac00d5389",
"element_id": "883d3cfcbe67e5a5f2ba5cf430c5129e",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method."
"text": "requires certain Ccorporations and partnerships with a Ccorporation as a partner to use the accrualmethod.YAN Nal find maccamalpnansan pnenapapiocna"
},
{
"type": "NarrativeText",
"element_id": "86d11953bb813a770ecd242ff97d4e43",
"element_id": "1fbc7ab18ebbfd6edfcbe19b4d5a84cd",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
@ -781,12 +821,12 @@
},
{
"type": "NarrativeText",
"element_id": "0607edfa2419dd0cdc80f457872fe238",
"element_id": "3e5744a95d40d31aed481a28b3859577",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 2
},
"text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law,"
"text": "(2) Qualified personal service corporations. —A “qualified personal service corporation” is anycorporation: (a) substantially all of the activitiesof which involve the performance of services inthe fields of health, law, engineering,"
}
]

View File

@ -1,17 +1,17 @@
[
{
"type": "Title",
"element_id": "88591a76b54e47215c0827ae8838ec13",
"element_id": "0c4e18d78e721c8179f3946b75b17d15",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Instructions for Form 3115 (Rev. November 1987)"
"text": "Instructions for Form 3115 (Rev. November 1987) Annlicatinn far Chance in Accounting Mathond"
},
{
"type": "NarrativeText",
"element_id": "766cf1d1243ef2cdbb0db5ad32d7f9c9",
"element_id": "41f3d9c83b2b4679195c9796134fd8f5",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -21,7 +21,7 @@
},
{
"type": "ListItem",
"element_id": "36a565493a214d3f7e7f24794c1dc7f4",
"element_id": "97968e4ba14bd2d082a70ec61ef2d9b1",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -111,7 +111,7 @@
},
{
"type": "ListItem",
"element_id": "59bc2945a7f606bd5078bac3bc1199d4",
"element_id": "f0d2beb7f43493694a91137e8e65b5f3",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -121,7 +121,7 @@
},
{
"type": "ListItem",
"element_id": "5157d731aa6a97c9b166799db2295bce",
"element_id": "13f2a282f705590fbe7b6ce15b08862a",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -141,7 +141,7 @@
},
{
"type": "ListItem",
"element_id": "34b66452ca63c465c69d849e4acf6d46",
"element_id": "9820f79275e683f5afe3f2f1283de4ca",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -161,7 +161,7 @@
},
{
"type": "ListItem",
"element_id": "b0fa5aaff0cee8574822dd8ac6537c06",
"element_id": "a98378f4a88db65dff42b7d8bd75be92",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -181,7 +181,7 @@
},
{
"type": "ListItem",
"element_id": "13f155c0754434406190f3cf49c82c3c",
"element_id": "3cb57c50002187a715e1c5048e643c65",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -201,33 +201,33 @@
},
{
"type": "ListItem",
"element_id": "178d6933ed193747b1c4aa1c048e7f94",
"element_id": "beeb50db70ce1aa76813cce98e46bd56",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "for these changes."
"text": "for these changes. Tb od Db bee Cl"
},
{
"type": "NarrativeText",
"element_id": "7685df2334a5f6c8c8099dea61a8f1b4",
"element_id": "640a100da1a3bee6f1f134c51a2c8648",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed."
"text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed"
},
{
"type": "Title",
"element_id": "61ed58fa51293f429f87e8cf1896c9e4",
"element_id": "a232d246e22a4f6bb8dcab62cffb2567",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Paperwork Reduction Act Notice"
"text": "Paperwork Reduction Act Notice We ack for thic infarenatinn te marry mye the."
},
{
"type": "Title",
@ -241,27 +241,37 @@
},
{
"type": "ListItem",
"element_id": "5f8051f8010896bab02aaf784c04ae02",
"element_id": "58f1649a32eda8b8c513e51a209666a6",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Individuals.—An individual desiring the change should sign the application. Ifthe application pertains to a husband and wife filing a joint Income tax return, the names of both should appear in the heading and both should sign Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.” Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance Company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized tosign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file, For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation, Fiduciaries.—The-form should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrx, etc, having legal authority to'sign, and his or her ttle. Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page"
"text": "Signature Individuals.—An individual desiring the change should sign the application. Ifthe application pertains to a husband and wife filing a joint Income tax return, the names of both should appear in the heading and both should sign Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.” Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance Company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized tosign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file, For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation, Fiduciaries.—The-form should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrx, etc, having legal authority to'sign, and his or her ttle. Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page"
},
{
"type": "ListItem",
"element_id": "586e989b479e4362ebe28a6954c1427b",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "If the individual or firm is also authorized to"
},
{
"type": "NarrativeText",
"element_id": "4660422c06dddc914ab634c5e4045dec",
"element_id": "446ccb7d96fea659d50aef8a6dd670df",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "We ask for this information to carry out the Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws an¢ to allow us to figure and collect the nght amount of tax. You are required to give us this information."
"text": "We ask for this information to carry out the Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws an¢ to allow us to figure and collect the right amount of tax. You are required to give us this information,"
},
{
"type": "Title",
"element_id": "a1547a4ed1611eee44b15e99120fb978",
"element_id": "226fa83297914d5195e002508d61fb1d",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -271,77 +281,77 @@
},
{
"type": "Title",
"element_id": "68a3289177b49b285e133a5267eb355f",
"element_id": "f0e951e5bcb4a6070fa6672b37822348",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Purpose of Form"
"text": "Purpose of Form Cin bce Secon te cece cget."
},
{
"type": "NarrativeText",
"element_id": "f9b8e17da7a31507773f78959378e09c",
"element_id": "5e5451e052baf894b2bdad4132f6cd2f",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "File this form to request a change in your accounting method, including the accounting treatment of any item. if you are requesting 2 change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods,"
"text": "ee File this form to request a change in your accounting method, including the accounting treatment of any item. if you are requesting 2 change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods,"
},
{
"type": "NarrativeText",
"element_id": "b3859f2f29884b1d3ba0892e52859a99",
"element_id": "cc1701e3ce9347e344b3df80d426bd21",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "When filing Form 3115, taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current. revision date of Form 3115)"
"text": "Seti aes When filing Form 3115, taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current. revision date of Form 3115)"
},
{
"type": "NarrativeText",
"element_id": "e5a95dc10d4071983b70898a21f11175",
"element_id": "b81dc18d0f8666f9bf7400a00657dc72",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Generally, applicants must complete Section A. In addition, complete the appropriate sections (B:1 through H) for which a change is desired."
"text": "POMS SANE OPFOR DA 29). Generally, applicants must complete Section A. In addition, complete the appropriate sections (B:1 through H) for which a change is desired. You must give alll relevant facts, including a"
},
{
"type": "Title",
"element_id": "5756fb398995bb6518a87637f24f426e",
"element_id": "c7502aa5b000d6446f3eca882518a260",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Time and Place for Filing"
"text": "Time and Place for Filing amarall, ammlimeete maet file snete"
},
{
"type": "NarrativeText",
"element_id": "25f830e7c39c115c9937eb9d11cfb1f2",
"element_id": "8b35e7c212710b1099b675ce9394fb47",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "State whether you desire a conference in the National Office if the Service proposes to disapprove your application"
"text": "Se NB ON State whether you desire a conference in the National Office if the Service proposes to disapprove your application."
},
{
"type": "Title",
"element_id": "8b06cd6e2bf7fc15130d5d9ed7e66283",
"element_id": "0a16a0fea889be77576c0fd88575554a",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Affiliated Groups"
"text": "Affiliated Groups Tavmayare that ara mam)"
},
{
"type": "Title",
"element_id": "242a9dba10a04654d4adef9c58ff96f6",
"element_id": "68b58298cabd9069c975b192a7183139",
"metadata": {
"data_source": {},
"filetype": "image/png",
@ -351,62 +361,62 @@
},
{
"type": "Title",
"element_id": "11c98a9cbd6a200fbc5b93fed15007ac",
"element_id": "6a8881a6e87021b2362243f7df3e4b1d",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Uniform capitalization rules and limitation on"
"text": "Uniform capitalization rules and limitation on cash method.—If you are required to char"
},
{
"type": "Title",
"element_id": "58703de56debc34a1d68e6ed6f8fd067",
"element_id": "8daeb8b48fb666f1dd54e2af283d0c22",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Specific Instructions Section A"
"text": "Specific Instructions Section A Neem Ea mama 1 !Taeahle inemes"
},
{
"type": "Title",
"element_id": "a4316c02df07840f1beb56609cb09735",
"element_id": "09203a0c6955f64ca8eb52cd6ea47034",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Late Applications"
"text": "Late Applications Me coup armlimatinm te ler"
},
{
"type": "NarrativeText",
"element_id": "39458f370b98a606db29ac6dee975e07",
"element_id": "962e3f0ceb1f0b1b08a1c19adde8d962",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Disregard the instructions under Time and Place for Filing and Late Applications. instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(2) adjustment will be taken into account and"
"text": "lethal elaine bela Disregard the instructions under Time and Place for Filing and Late Applications. instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(2) adjustment will be taken into account and the basis for that conclusion. Identify the"
},
{
"type": "Title",
"element_id": "025a65465b6fd9635316e92633b24c7e",
"element_id": "bfe98eb672d95c15a11ed3e618928b4e",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Identifying Number"
"text": "Identifying Number Ndiuidesale Am omptisoehesal"
},
{
"type": "NarrativeText",
"element_id": "9240bfa889b87dc2fb3fa746ca4eeeb4",
"element_id": "87f8128b03a72c616ee1a1bb91e11c56",
"metadata": {
"data_source": {},
"filetype": "image/png",
"page_number": 1
},
"text": "Others.-—The employer identification number of an applicant other than an individual should be entered in this block,"
"text": "—e—e—— eee Others.-—The employer identification number of an applicant other than an individual should be entered in this block,"
}
]

View File

@ -1111,13 +1111,13 @@
},
{
"type": "FigureCaption",
"element_id": "27b45633a0f31b9e01d179d70d7dc282",
"element_id": "b5ee6af3d776b0bbd2e581a3ab2ab2e1",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 4
},
"text": "5 1 os = — 10; =o ° © —\" 205 i —~é é —ip a5 — Control -2 — & 2.5 T T T 0.0000001 + —-0.00001 0.001 O14 Current Density (A/cm2)"
"text": "Potential (Vv)nm°in°}aryT T T0.00001 0.001 olCurrent Density (A/cm2)"
},
{
"type": "UncategorizedText",
@ -1141,13 +1141,13 @@
},
{
"type": "Table",
"element_id": "9270ab0a1b3ba26a16991abcd0b45dfe",
"element_id": "e2ed41967a486766ad6a122cc3aba4d5",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 4
},
"text": "Inhibitor be (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm?) Polarization Corrosion concentration (g) resistance (Q) rate (mm/year) oO 0.0335 0.0409 0.0003 24.0910 2.8163 2 1.9460 0.0596 0.0002 121.440 1.5054 4 0.0163 0.2369 0.0001 42.121 0.9476 6 0.3233 0.0540 5.39E-05 373.180 0.4318 8 0.1240 0.0556 5.46E-05 305.650 0.3772 10 0.0382 0.0086 1.24E-05 246.080 0.0919"
"text": "Inhibitorconcentration (g) bc (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm2) Polarizationresistance (Ω) Corrosionrate (mm/year) 0246810 0.03351.94600.01630.32330.12400.0382 0.04090.05960.23690.05400.05560.0086 (cid:3) 0.9393(cid:3) 0.8276(cid:3) 0.8825(cid:3) 0.8027(cid:3) 0.5896(cid:3) 0.5356 0.00030.00020.00015.39E-055.46E-051.24E-05 24.0910121.44042.121373.180305.650246.080 2.81631.50540.94760.43180.37720.0919"
},
{
"type": "UncategorizedText",
@ -1471,13 +1471,13 @@
},
{
"type": "FigureCaption",
"element_id": "273fb301b173075f79b2cbdab962e2ff",
"element_id": "6959a323ee23c858c3b1411b05db6ebf",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 5
},
"text": "SEM HV: Q0KY WD: 14.89 rmrm 9EM MAO: 209 x Det: DOE Pectomsence In nanospact"
"text": "SEM HV: Q0KY WD: 14.89 rmrmDEM MAO: 209 x Det: DOE Pecforsence In nenospact"
},
{
"type": "NarrativeText",
@ -1491,13 +1491,13 @@
},
{
"type": "FigureCaption",
"element_id": "d04d110c16a4ebc184fa130f09b8d423",
"element_id": "4f8c25cf7aefbef4af474fe62bed2b33",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 5
},
"text": "Sem ny. 200 Rv"
"text": "gEoswaeSem ny. 200 RvLitt td vegas rescanFertormarce innancesacel"
},
{
"type": "NarrativeText",
@ -1511,13 +1511,13 @@
},
{
"type": "FigureCaption",
"element_id": "520d1da08c86ce165cd2843e2dc27f98",
"element_id": "035c30f23285fdae72335b94421cf564",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 5
},
"text": "SEMHV: 20.0KV WD: 15.54 mm EM ING: ACO x Dei: OSE"
"text": "°@¢Naafe«MgsSEM HY: 20.0KV ALEC CMT LPL LL A a pO OPEM ING: ACO x"
},
{
"type": "NarrativeText",
@ -1579,16 +1579,6 @@
},
"text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [35]. The structural formula of egg shell powder is shown in Fig. 9."
},
{
"type": "FigureCaption",
"element_id": "060e14f01e484ba252e902cd5c6f94f9",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 6
},
"text": "ou H,;COCHNY OH"
},
{
"type": "UncategorizedText",
"element_id": "c07eeb615f8b0f2d544348b7f0655301",

View File

@ -791,13 +791,13 @@
},
{
"type": "Table",
"element_id": "1d8fd023cd0978f7a6500815d2ad0ef6",
"element_id": "be8fbf813482eec7fd0e2fc665b4d3bb",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 3
},
"text": "Instance size (m, n) Average number of Locations Times Vehicles Possible empty travels (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 ) (16, 3000 1087.20 1101.60 1284.60 2,684,983.60"
"text": "Instance size (m, n) Average number of (8, 1500)(8, 2000)(8, 2500)(8, 3000)(12, 1500)(12, 2000)(12, 2500)(12, 3000)(16, 1500)(16, 2000)(16, 2500)(16, 3000) Locations Times Vehicles Possible empty travels 568.40672.80923.40977.00566.00732.60875.001119.60581.80778.00879.001087.20 975.201048.001078.001113.20994.001040.601081.001107.40985.401040.601083.201101.60 652.20857.201082.401272.80642.00861.201096.001286.20667.80872.401076.401284.60 668,279.401,195,844.801,866,175.202,705,617.00674,191.001,199,659.801,878,745.202,711,180.40673,585.801,200,560.801,879,387.002,684,983.60"
},
{
"type": "UncategorizedText",

View File

@ -591,13 +591,13 @@
},
{
"type": "FigureCaption",
"element_id": "812dcaaec927a84d57af36e20adb5ded",
"element_id": "dd23a7c381d44f4b36975adaf4d2236d",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 4
},
"text": "Efficient Data Annotation Model Customization Document Images Community Platform a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY"
"text": "Model Customization Document Images Community PlatformEfficient Data Annotation ¥ DIA Model HubCustomized Model Training) ==> | Layout Detection wot) = | DIA Pipeline Sharing }4( OCR Module ) = { Layout Data stuctue ) = (storages vsatzaion )"
},
{
"type": "NarrativeText",
@ -681,14 +681,14 @@
},
{
"type": "Table",
"element_id": "34923b77ca76e1808956ade5e766f7c2",
"element_id": "71e289a268220c21575bb55a73980b83",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 5,
"text_as_html": "<table><thead><th>Dataset</th><th>| Base Model'|</th><th>| Notes</th></thead><tr><td>PubLayNet</td><td>[38] F/M</td><td>Layouts of modern scientific documents</td></tr><tr><td>PRImA [3]</td><td>M</td><td>Layouts of scanned modern magazines and scientific reports</td></tr><tr><td>Newspaper</td><td>F</td><td>Layouts of scanned US newspapers from the 20th century</td></tr><tr><td>TableBank</td><td>F</td><td>Table region on modern scientific and business document</td></tr><tr><td>HJDataset [31]</td><td>F/M</td><td>Layouts of history Japanese documents</td></tr></table>"
},
"text": "Dataset | Base Model'| Large Model | Notes PubLayNet B8]| F/M M Layouts of modern scientific documents PRImA M - nned modern magazines and scientific reports Newspapei F - canned US newspapers from the 20th century TableBank F F Table region on modern scientific and business document HJDataset F/M - Layouts of history Japanese documents"
"text": "Dataset Base Model1 Large Model Notes PubLayNet [38]PRImA [3]Newspaper [17]TableBank [18]HJDataset [31] F / MMFFF / M M--F- Layouts of modern scientific documentsLayouts of scanned modern magazines and scientific reportsLayouts of scanned US newspapers from the 20th centuryTable region on modern scientific and business documentLayouts of history Japanese documents"
},
{
"type": "UncategorizedText",
@ -852,13 +852,13 @@
},
{
"type": "FigureCaption",
"element_id": "d21661161ae2c8dc39e96ee5c660704b",
"element_id": "2f498bdd91739a7083490999507420a5",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 6
},
"text": "- ° . 3 a a 4 a 3 oo er 2 § 8 a 8 3 3 £ 4 A g a 9 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff"
"text": "33§3 fectange vada8883 Coordinate83 +*Block | [Block | [Read8 Extra features Tet | [Tye | [oder[ coordinatel textblock1 |» , see383 , textblock2 , layout] ]4A list of the layout elementsThe same transformation and operation APIs"
},
{
"type": "NarrativeText",
@ -1062,14 +1062,14 @@
},
{
"type": "Table",
"element_id": "f81d4915b54758e0d4d52af3566bb813",
"element_id": "548c38f86edc295baf869abe37a0d1cf",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 8,
"text_as_html": "<table><thead><th>Operation Name</th><th></th><th>|</th><th>Description</th></thead><tr><td>block.pad(top, bottom,</td><td>right,</td><td>left) |</td><td>Enlarge the current block according to the input</td></tr><tr><td>block.scale(fx, fy)</td><td></td><td></td><td>Scale the current block given the ratio in x and y direction</td></tr><tr><td>block.shift(dx, dy)</td><td></td><td></td><td>Move the current block with the shift distances in x and y direction</td></tr><tr><td>blocki.is_in(block2)</td><td></td><td>|</td><td>Whether block] is inside of block2</td></tr><tr><td>blocki.intersect (block2)</td><td></td><td></td><td>Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs.</td></tr><tr><td>block1i.union(block2)</td><td></td><td></td><td>Return the union region of blockl and block2. Coordinate type to be determined based on the inputs.</td></tr><tr><td>blocki.relative_to(block2)</td><td></td><td></td><td>Convert the absolute coordinates of block] to relative coordinates to block2</td></tr><tr><td>blocki.condition_on(block2)</td><td></td><td></td><td>Calculate the absolute coordinates of blockl given the canvas block2s absolute coordinates</td></tr></table>"
},
"text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio ion in x and y di block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is_in(block2) Whether block] is inside of block2 ; Return the intersection region of block and block2. block1. intersect (block2) . . . Coordinate type to be determined based on the inputs. ; Return the union region of block1 and block2. block1.union(block2) . . . Coordinate type to be determined based on the inputs. Convert the absolute coordinates of block to block1.relative_to(block2) ' ' relative coordinates to block2 . Calculate the absolute coordinates of block1 given block1.condition_on(block2) . the canvas block2s absolute coordinates block. crop_image (image) Obtain the image segments in the block region"
"text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) block.shift(dx, dy) Scale the current block given the ratioin x and y direction Move the current block with the shiftdistances in x and y direction block1.is in(block2) Whether block1 is inside of block2 block1.intersect(block2) block1.union(block2) block1.relative to(block2) block1.condition on(block2) Return the intersection region of block1 and block2.Coordinate type to be determined based on the inputs. Return the union region of block1 and block2.Coordinate type to be determined based on the inputs. Convert the absolute coordinates of block1 torelative coordinates to block2 Calculate the absolute coordinates of block1 giventhe canvas block2s absolute coordinates Obtain the image segments in the block region"
},
{
"type": "UncategorizedText",
@ -1333,13 +1333,13 @@
},
{
"type": "FigureCaption",
"element_id": "975d6cb141cb0a0313375630ae063fa8",
"element_id": "d10d70e48ed0066bd15dd133d09f61fd",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 9
},
"text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position"
"text": "a ESMode I: Showing Layout on the Original ImageayeMode Il: Drawing OCR'd Text at the Correspoding Position10g Bpunog vayoy feyds1q :1 vondoxog Burpunog vay apiH z word"
},
{
"type": "NarrativeText",
@ -1423,13 +1423,13 @@
},
{
"type": "FigureCaption",
"element_id": "2680b3c7a55754a3ba2738cb3d9d5e8b",
"element_id": "0ff9ad06a304818ae83b93c6f2b16309",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 10
},
"text": "et Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable HEE company type Column Categories (J tite we) adaress —_ (7) section Header by e * Column reading order a a (a) Illustration of the original Japanese Maximum Allowed Height BRE B>e EER eR (b) Illustration of the recreated document with dense text structure for better OCR performance"
"text": "Column reading orderMaximum Allowed HeightZ. Shen et al.Intra-column reading ordert el 1 ili |.(a) Illustration of the original Japanese document with detected layout elements highlighted in colored boxesCe tans &iia! aaae oeRRbaeienases(b) Illustration of the recreated document with dense text structure for better OCR performanceToken CategoriesMoteAddresstetNumberVaribiecompany typeColumn Categories(J tite| Aatress(tee[7] section adr"
},
{
"type": "NarrativeText",
@ -1543,13 +1543,13 @@
},
{
"type": "FigureCaption",
"element_id": "b33b2bc3b9c416673c7f74c6a00c49d8",
"element_id": "55f2474c66877608ca9b463a7076573e",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 11
},
"text": "(spe peepee, Active Learning Layout Annotate Layout Dataset | + Annotation Toolkit ¥ a Deep Leaming Layout Model Training & Inference, ¥ ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <—— Default ane Customized ¥ ee Layout Structure Visualization & Export | <—— | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules"
"text": "(spe peepee,Active Learning Layout=Annotate Layout Dataset parte4zi Deep Learning LayoutLayout Detection Model Training & Inference,Post-processin Handy Data Structures &pl 9 APIs for Layout DataText Recognition Default and Customized: r OCR Models4Visualization & Export | <——Layout StructureVisualization & StorageThe Japanese Document Helpful LayoutParserDigitization Pipeline Modules"
},
{
"type": "UncategorizedText",
@ -1723,13 +1723,13 @@
},
{
"type": "FigureCaption",
"element_id": "7d42bb6af1404a95a6e8870d5c4d07bf",
"element_id": "f58d47bde7ebddd81c4a678c918a8f1b",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 13
},
"text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line"
"text": "(2) Partial table atthe bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line"
},
{
"type": "NarrativeText",

View File

@ -1099,6 +1099,16 @@
},
"text": "6"
},
{
"type": "FigureCaption",
"element_id": "f58b520072d30c4805940f5c99a306c3",
"metadata": {
"data_source": {},
"filetype": "application/pdf",
"page_number": 9
},
"text": "an ¥3 te,ay."
},
{
"type": "NarrativeText",
"element_id": "d754d8d468346f652657279272a11897",

View File

@ -1 +1 @@
__version__ = "0.10.4-dev0" # pragma: no cover
__version__ = "0.10.4" # pragma: no cover

View File

@ -188,6 +188,7 @@ def partition_pdf_or_image(
infer_table_structure=infer_table_structure,
include_page_breaks=include_page_breaks,
ocr_languages=ocr_languages,
ocr_mode="individual_blocks",
metadata_last_modified=metadata_last_modified or last_modification_date,
**kwargs,
)
@ -219,6 +220,7 @@ def _partition_pdf_or_image_local(
infer_table_structure: bool = False,
include_page_breaks: bool = False,
ocr_languages: str = "eng",
ocr_mode: str = "entire_page",
model_name: Optional[str] = None,
metadata_last_modified: Optional[str] = None,
**kwargs,
@ -235,6 +237,7 @@ def _partition_pdf_or_image_local(
process_file_with_model_kwargs = {
"is_image": is_image,
"ocr_languages": ocr_languages,
"ocr_mode": ocr_mode,
"extract_tables": infer_table_structure,
"model_name": model_name,
}
@ -249,6 +252,7 @@ def _partition_pdf_or_image_local(
file,
is_image=is_image,
ocr_languages=ocr_languages,
ocr_mode=ocr_mode,
extract_tables=infer_table_structure,
model_name=model_name,
)