mirror of
https://github.com/allenai/olmocr.git
synced 2025-12-28 07:34:13 +00:00
removed mine_diffs_candidates.jsonl
This commit is contained in:
parent
e1a2074703
commit
d34a3576a2
@ -1,24 +0,0 @@
|
||||
{"pdf": "olmo2-pg4.pdf", "id": "olmo2-pg4_minediff_00", "type": "present", "threshold": 1.0, "text": "Table 1 Composition of the pretraining data for OLMo 2."}
|
||||
{"pdf": "olmo2-pg4.pdf", "id": "olmo2-pg4_minediff_01", "type": "present", "threshold": 1.0, "text": "From Dolma, we use the \u201cbaseline 1.0\u201d mix."}
|
||||
{"pdf": "olmo2-pg4.pdf", "id": "olmo2-pg4_minediff_02", "type": "present", "threshold": 1.0, "text": "Base: The OLMo 2 1124 Mix is composed of StarCoder (Li et al., 2023b; Kocetkov et al., 2022), peS2o (Soldaini and Lo, 2023), web text from DCLM (Li et al., 2024) and Wiki come from Dolma 1.7 (Soldaini et al., 2024). arXiv comes from Red-Pajama (Together AI, 2023), while OpenWebMath (Paster et al., 2023) and Algebraic Stack come from ProofPile II (Azerbayev et al., 2023)."}
|
||||
{"pdf": "mattsnotes.pdf", "id": "mattsnotes_minediff_00", "type": "present", "threshold": 1.0, "text": "Base: Code: The-Stack-V2"}
|
||||
{"pdf": "mattsnotes.pdf", "id": "mattsnotes_minediff_01", "type": "present", "threshold": 1.0, "text": "CodeText: SE, whatever we've scraped"}
|
||||
{"pdf": "mattsnotes.pdf", "id": "mattsnotes_minediff_02", "type": "present", "threshold": 1.0, "text": "WebText: HQDCLM"}
|
||||
{"pdf": "mattsnotes.pdf", "id": "mattsnotes_minediff_03", "type": "present", "threshold": 1.0, "text": "~85% Source code ~10% CodeText ~ 5% Webtext"}
|
||||
{"pdf": "mattsnotes.pdf", "id": "mattsnotes_minediff_04", "type": "present", "threshold": 1.0, "text": "~ 85% The-stack-V2 ~ 15% CodeText ~ 0% Webtext"}
|
||||
{"pdf": "multi_column_miss.pdf", "id": "multi_column_miss_minediff_00", "type": "present", "threshold": 1.0, "text": "INDUSTRY WATCH Corporate social responsibility and the tobacco industry: hope or hype?"}
|
||||
{"pdf": "multi_column_miss.pdf", "id": "multi_column_miss_minediff_01", "type": "present", "threshold": 1.0, "text": "This report first provides the context and development of CSR; then, from internal company documents, examines how PM came to its own version."}
|
||||
{"pdf": "multi_column_miss.pdf", "id": "multi_column_miss_minediff_02", "type": "present", "threshold": 1.0, "text": "This paper examines whether a tobacco company espousing CSR should be judged simply as a corporate entity along standards of business ethics or as an irretrievably negative force in the realm of public health, thereby rendering CSR an oxymoron."}
|
||||
{"pdf": "multi_column_miss.pdf", "id": "multi_column_miss_minediff_03", "type": "present", "threshold": 1.0, "text": "CORPORATE SOCIAL RESPONSIBILITY: THE CONTEXT The term \u201ccorporate social responsibility\u201d is in vogue at the moment but as a concept it is vague and means different things to different people.1"}
|
||||
{"pdf": "multi_column_miss.pdf", "id": "multi_column_miss_minediff_04", "type": "present", "threshold": 1.0, "text": "But the notion that corporations should be required to return more to society because of their impact on society was driven by pressures from the civil rights, peace, and environmental movements of the last half century."}
|
||||
{"pdf": "openstax_caculus_pg_273.pdf", "id": "openstax_caculus_pg_273_minediff_00", "type": "present", "threshold": 1.0, "text": "a. Find the velocity and acceleration functions. b. Determine the time intervals when the object is slowing down or speeding up."}
|
||||
{"pdf": "openstax_caculus_pg_273.pdf", "id": "openstax_caculus_pg_273_minediff_01", "type": "present", "threshold": 1.0, "text": "e. Determine how long the potato is in the air."}
|
||||
{"pdf": "openstax_caculus_pg_273.pdf", "id": "openstax_caculus_pg_273_minediff_02", "type": "present", "threshold": 1.0, "text": "a. Use the graph of the position function to determine the time intervals when the velocity is positive, negative, or zero."}
|
||||
{"pdf": "openstax_caculus_pg_273.pdf", "id": "openstax_caculus_pg_273_minediff_03", "type": "present", "threshold": 1.0, "text": "c. Use the graph of the velocity function to determine the time intervals when the acceleration is positive, negative, or zero."}
|
||||
{"pdf": "openstax_caculus_pg_273.pdf", "id": "openstax_caculus_pg_273_minediff_04", "type": "present", "threshold": 1.0, "text": ""}
|
||||
{"pdf": "lincoln_letter.pdf", "id": "lincoln_letter_minediff_00", "type": "present", "threshold": 1.0, "text": "January 10th 1864."}
|
||||
{"pdf": "lincoln_letter.pdf", "id": "lincoln_letter_minediff_01", "type": "present", "threshold": 1.0, "text": "Major General Hitchcock, Commissioner of Exchanges, is authorized and directed to offer Brigadier General Trimble, now a prisoner of war in Fort McHenry, in exchange for Major White, who is held as a prisoner at Richmond."}
|
||||
{"pdf": "lincoln_letter.pdf", "id": "lincoln_letter_minediff_02", "type": "present", "threshold": 1.0, "text": "Abraham Lincoln"}
|
||||
{"pdf": "lincoln_letter.pdf", "id": "lincoln_letter_minediff_03", "type": "present", "threshold": 1.0, "text": "He is also directed to send forward the offer of exchange by Henry m. Warfield, Esq. of Baltimore, under a flag of truce, and give him a pass to City Point."}
|
||||
{"pdf": "discoverworld_crazy_table4.pdf", "id": "discoverworld_crazy_table4_minediff_00", "type": "present", "threshold": 1.0, "text": "Table 4: Baseline model performance on each of the three scoring metrics (*task completion, task process, explanatory knowledge discovery*) across all 24 DISCOVERYWORLD tasks."}
|
||||
{"pdf": "discoverworld_crazy_table4.pdf", "id": "discoverworld_crazy_table4_minediff_01", "type": "present", "threshold": 1.0, "text": "Base: Table 5: Baseline model performance on each of the three scoring metrics (*task completion, task process, explanatory knowledge discovery*) across all 10 unit test tasks."}
|
||||
Loading…
x
Reference in New Issue
Block a user