Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							f79bd0d248
							
						
					 | 
					
						
						
							
							Cleanup review app
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-20 16:36:10 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							063d4f556a
							
						
					 | 
					
						
						
							
							Review page
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 23:28:37 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							449900a303
							
						
					 | 
					
						
						
							
							Tests
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 23:06:18 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							9e3b554f12
							
						
					 | 
					
						
						
							
							More html table parsing goodness
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 21:06:52 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							2944d3b6ef
							
						
					 | 
					
						
						
							
							More fixes
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 20:52:00 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							16ab1a4f37
							
						
					 | 
					
						
						
							
							Progress on more complicated header and footers
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 20:42:04 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							1e13ddef5a
							
						
					 | 
					
						
						
							
							Sorting results
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:57:53 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							c25e9cb084
							
						
					 | 
					
						
						
							
							Addxing some fixes
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:57:00 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							3005ebd67d
							
						
					 | 
					
						
						
							
							Normalization
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:46:07 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							8ec1ebe5ed
							
						
					 | 
					
						
						
							
							Normalization
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:40:03 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							cb4dfeba36
							
						
					 | 
					
						
						
							
							Fix
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:33:48 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							a4605e4efc
							
						
					 | 
					
						
						
							
							Fixing normalizing during table cell comparison
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:29:42 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							17979118ba
							
						
					 | 
					
						
						
							
							Lints
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:01:53 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b307f5a116
							
						
					 | 
					
						
						
							
							More robust markdown parsing
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 18:01:02 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							53444571e9
							
						
					 | 
					
						
						
							
							Tests
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:53:45 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							cac5ef13a9
							
						
					 | 
					
						
						
							
							Tests for the tests
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:44:49 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							196654ed25
							
						
					 | 
					
						
						
							
							Merge branch 'main' of https://github.com/allenai/olmocr
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:32:24 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							0a3a5efe07
							
						
					 | 
					
						
						
							
							Lints
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:32:22 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							0afacd6ac7
							
						
					 | 
					
						
						
							
							Less duped tests
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:32:06 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							9855f70fee
							
						
					 | 
					
						
						
							
							Some work on table dataset
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 17:25:22 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							14e3f6e97b
							
						
					 | 
					
						
						
							
							Small edits
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 09:27:41 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							46ffbe9324
							
						
					 | 
					
						
						
							
							smolDocling support for benchmark
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 08:36:31 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								xcvil
							
						 
					 | 
					
						
						
						
						
							
						
						
							a6a0f21c8b
							
						
					 | 
					
						
						
							
							Add argparser argument for configuring SGLang server port
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-19 14:00:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							bc41ba92e7
							
						
					 | 
					
						
						
							
							Merge branch 'main' of https://github.com/allenai/olmocr
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 22:35:46 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							ad82e5526f
							
						
					 | 
					
						
						
							
							Adding url reference for tests, some mining and cleanup scripts
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 22:35:44 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Aman Rangapur
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							f1945c1ecf
							
						
					 | 
					
						
						
							
							Merge pull request #127 from allenai/amanr/pp-doc-layout
						
						
						
						
						
						
						
						Headers and Footers pdf's 
						
						
							
						
					 | 
					
						2025-03-18 14:35:20 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							3c22cf3430
							
						
					 | 
					
						
						
							
							Lints
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 19:01:03 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							da05b4ca4f
							
						
					 | 
					
						
						
							
							Merge branch 'main' of https://github.com/allenai/olmocr
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 18:57:51 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							d620722a0e
							
						
					 | 
					
						
						
							
							Review app is much nicer now
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 18:57:50 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							5ec96476c9
							
						
					 | 
					
						
						
							
							Keyboard shorcuts
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 18:46:41 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							9df5102d34
							
						
					 | 
					
						
						
							
							review document
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 18:36:18 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							7f921f436a
							
						
					 | 
					
						
						
							
							review app
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 18:17:59 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							89b628d0bb
							
						
					 | 
					
						
						
							
							Slighty better
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 17:57:45 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							9344107994
							
						
					 | 
					
						
						
							
							pdf viewr
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 17:43:07 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							4939e41154
							
						
					 | 
					
						
						
							
							Flask based review app first attempt
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 16:53:36 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							f514f39819
							
						
					 | 
					
						
						
							
							Adding raw transformers implementation
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 09:07:48 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							93450c326d
							
						
					 | 
					
						
						
							
							Table miner
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-18 15:33:01 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							d34a3576a2
							
						
					 | 
					
						
						
							
							removed mine_diffs_candidates.jsonl
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 16:36:05 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							e1a2074703
							
						
					 | 
					
						
						
							
							removed pp_doc_layout script
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 14:24:55 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							1297c82447
							
						
					 | 
					
						
						
							
							added few examples for headers and footers
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 14:22:34 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							f6ea131596
							
						
					 | 
					
						
						
							
							Merge remote-tracking branch 'origin/main' into amanr/pp-doc-layout
						
						
						
						
						
						
						
						Merged amanr/pp-doc with main 
						
						
							
						
					 | 
					
						2025-03-17 13:55:55 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b472845f33
							
						
					 | 
					
						
						
							
							Table miners
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 20:50:21 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							b5bd179128
							
						
					 | 
					
						
						
							
							Merge remote-tracking branch 'origin/main' into amanr/pp-doc-layout
						
						
						
						
						
						
						
						merge from main 
						
						
							
						
					 | 
					
						2025-03-17 12:35:46 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								aman-17
							
						 
					 | 
					
						
						
						
						
							
						
						
							8f356a18d4
							
						
					 | 
					
						
						
							
							added pp_doc
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 12:34:45 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							aee030c42b
							
						
					 | 
					
						
						
							
							Fixing sample dataset, outputting some reports for debugging. Math is good enough for now
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 10:59:02 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							dd725636a3
							
						
					 | 
					
						
						
							
							Bump version to v0.1.60 for release
						
						
						
						
						
						
							
 v0.1.60
						
					 | 
					
						2025-03-17 08:59:18 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							baa00825b0
							
						
					 | 
					
						
						
							
							Don't go down too low in temp
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 08:48:19 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							f2951f3f78
							
						
					 | 
					
						
						
							
							Lints
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 08:47:57 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							1e42e5ea9a
							
						
					 | 
					
						
						
							
							Faster and nicer equation cache
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-17 08:47:06 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							1f8cc59b22
							
						
					 | 
					
						
						
							
							Pipeline scales temperature automatically, increases performance ~2%
						
						
						
						
						
						
							
						
					 | 
					
						2025-03-14 22:27:51 -07:00 | 
					
					
						
						
							
							
							
						
					 |