Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							568dd48509
							
						
					 | 
					
						
						
							
							Prepping for qwen2vl full training run
						
						
						
						
						
						
					 | 
					
						2024-10-05 04:04:45 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							6065da268b
							
						
					 | 
					
						
						
							
							Hopefully working better
						
						
						
						
						
						
					 | 
					
						2024-10-04 18:06:04 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							a2ff849a78
							
						
					 | 
					
						
						
							
							checkpoint on new runner for openai batches
						
						
						
						
						
						
					 | 
					
						2024-10-04 17:32:35 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							2da901d433
							
						
					 | 
					
						
						
							
							new better runopenaibatch script
						
						
						
						
						
						
					 | 
					
						2024-10-04 16:58:38 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							35ec67c427
							
						
					 | 
					
						
						
							
							Hopefully finishing touches
						
						
						
						
						
						
					 | 
					
						2024-10-04 16:10:19 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							db36608b42
							
						
					 | 
					
						
						
							
							Fix
						
						
						
						
						
						
					 | 
					
						2024-10-04 16:05:08 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							f25cb6c261
							
						
					 | 
					
						
						
							
							Fixes
						
						
						
						
						
						
					 | 
					
						2024-10-04 15:54:00 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							4630f7b1cb
							
						
					 | 
					
						
						
							
							Bugfixes
						
						
						
						
						
						
					 | 
					
						2024-10-04 15:35:52 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							e87729a653
							
						
					 | 
					
						
						
							
							New send silver script for testing
						
						
						
						
						
						
					 | 
					
						2024-10-04 15:27:43 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							6e1094ee8a
							
						
					 | 
					
						
						
							
							Support for more evals and output formats
						
						
						
						
						
						
					 | 
					
						2024-10-03 20:19:52 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							974ddd3773
							
						
					 | 
					
						
						
							
							I'm pretty sure we only need to save on rank0
						
						
						
						
						
						
					 | 
					
						2024-10-03 11:30:44 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							8f1fa4f796
							
						
					 | 
					
						
						
							
							Running a mini config again with metric
						
						
						
						
						
						
					 | 
					
						2024-10-03 11:12:30 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							046d4a4534
							
						
					 | 
					
						
						
							
							Adding eval on start and seed params
						
						
						
						
						
						
					 | 
					
						2024-10-03 10:54:25 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							2227605bfb
							
						
					 | 
					
						
						
							
							Mini train config
						
						
						
						
						
						
					 | 
					
						2024-10-03 10:32:15 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							4505a49420
							
						
					 | 
					
						
						
							
							Pinning to normal transformers version now
						
						
						
						
						
						
					 | 
					
						2024-10-03 09:00:53 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							78e3a94173
							
						
					 | 
					
						
						
							
							Adding pluto ib
						
						
						
						
						
						
					 | 
					
						2024-10-03 15:33:17 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							0ddaf9023d
							
						
					 | 
					
						
						
							
							Getting ready to launch a new training run
						
						
						
						
						
						
					 | 
					
						2024-10-02 23:04:56 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							1686790ac8
							
						
					 | 
					
						
						
							
							Checking filtering logic
						
						
						
						
						
						
					 | 
					
						2024-10-02 22:45:40 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b340ae5092
							
						
					 | 
					
						
						
							
							A few notes, starting to test dataloader with new structured response format
						
						
						
						
						
						
					 | 
					
						2024-10-02 22:17:15 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							8315162a25
							
						
					 | 
					
						
						
							
							Merge branch 'main' of https://github.com/allenai/pdelfin
						
						
						
						
						
						
					 | 
					
						2024-10-02 20:48:58 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							6d8e638152
							
						
					 | 
					
						
						
							
							Readme
						
						
						
						
						
						
					 | 
					
						2024-10-02 20:48:39 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							ad1d818816
							
						
					 | 
					
						
						
							
							Update README.md
						
						
						
						
						
						
					 | 
					
						2024-10-02 13:42:43 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							68b9ee8c90
							
						
					 | 
					
						
						
							
							Small prompt fix
						
						
						
						
						
						
					 | 
					
						2024-10-02 20:19:03 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							a5c27212f0
							
						
					 | 
					
						
						
							
							Need more token output due to structured outputs
						
						
						
						
						
						
					 | 
					
						2024-10-02 19:54:54 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							d05832ebee
							
						
					 | 
					
						
						
							
							Fixes and evals for structured outputs
						
						
						
						
						
						
					 | 
					
						2024-10-02 19:51:15 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							802632c49f
							
						
					 | 
					
						
						
							
							Building openai prompt with structured output
						
						
						
						
						
						
					 | 
					
						2024-10-02 18:10:47 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							be00ccf321
							
						
					 | 
					
						
						
							
							Switching buildsilver to use new anchor code
						
						
						
						
						
						
					 | 
					
						2024-10-02 17:29:44 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							0071cbd788
							
						
					 | 
					
						
						
							
							Appears as if the report method works really well, might need one last step to detect rotated pages
						
						
						
						
						
						
					 | 
					
						2024-10-02 16:44:39 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							5703a59e50
							
						
					 | 
					
						
						
							
							Fix for voting on multiple docs in the same eval page
						
						
						
						
						
						
					 | 
					
						2024-10-02 16:31:59 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							73fb81ef6c
							
						
					 | 
					
						
						
							
							Review page size option, fixing mkdirs in convertsilver script
						
						
						
						
						
						
					 | 
					
						2024-10-02 15:53:21 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							276465aab1
							
						
					 | 
					
						
						
							
							Adding flag to allow skipping filter
						
						
						
						
						
						
					 | 
					
						2024-10-02 15:46:12 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							549e07bed0
							
						
					 | 
					
						
						
							
							filtering out stupid ads
						
						
						
						
						
						
					 | 
					
						2024-10-02 15:36:41 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							6ef8226347
							
						
					 | 
					
						
						
							
							Can spit out anchor text for a gpt engine using pypdf, showing locations of images and text
						
						
						
						
						
						
					 | 
					
						2024-10-01 23:15:53 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							e42cecf96c
							
						
					 | 
					
						
						
							
							Adding anchor code based off of pypdf that visits each text block, hopefully so we can make it output good bboxes
						
						
						
						
						
						
					 | 
					
						2024-10-01 22:10:58 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							09e8840c56
							
						
					 | 
					
						
						
							
							coherency based anchor text
						
						
						
						
						
						
					 | 
					
						2024-10-01 20:19:03 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							28fe314539
							
						
					 | 
					
						
						
							
							prepping anchor text generation code
						
						
						
						
						
						
					 | 
					
						2024-10-01 19:59:48 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							7795f65a53
							
						
					 | 
					
						
						
							
							Fixing bug where we were not showing all the worst alignments
						
						
						
						
						
						
					 | 
					
						2024-10-01 16:56:15 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							9d6e2faf95
							
						
					 | 
					
						
						
							
							Runeval is much improved now
						
						
						
						
						
						
					 | 
					
						2024-10-01 16:46:35 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							8a66ecee25
							
						
					 | 
					
						
						
							
							Script to rerun openai prompts on the same data
						
						
						
						
						
						
					 | 
					
						2024-10-01 16:25:16 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							f99f6a6729
							
						
					 | 
					
						
						
							
							Prompt utils
						
						
						
						
						
						
					 | 
					
						2024-10-01 16:02:24 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b6543a4f65
							
						
					 | 
					
						
						
							
							Qwen checkpoint fixer script
						
						
						
						
						
						
					 | 
					
						2024-10-01 16:02:10 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							2c7323d1c4
							
						
					 | 
					
						
						
							
							Convert silver adjustments
						
						
						
						
						
						
					 | 
					
						2024-09-30 22:41:51 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							80bb0cbc23
							
						
					 | 
					
						
						
							
							Open ai to openai comparison now supported, new prompts
						
						
						
						
						
						
					 | 
					
						2024-09-30 22:08:30 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							e179453cc5
							
						
					 | 
					
						
						
							
							Fixing qwen checkpoint script
						
						
						
						
						
						
					 | 
					
						2024-09-30 20:34:06 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							963e946233
							
						
					 | 
					
						
						
							
							Convertsilver birr script can go in and out of S3 now
						
						
						
						
						
						
					 | 
					
						2024-09-30 20:06:45 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b856b4551f
							
						
					 | 
					
						
						
							
							Fixes to convertsilver to birr script
						
						
						
						
						
						
					 | 
					
						2024-09-30 19:54:30 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							da1982acb8
							
						
					 | 
					
						
						
							
							Refactoring prompts into their own new folder
						
						
						
						
						
						
					 | 
					
						2024-09-30 18:48:17 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							d74f9a352b
							
						
					 | 
					
						
						
							
							Send silver script tries to open file first, before sending an API requests
						
						
						
						
						
						
					 | 
					
						2024-09-30 18:41:50 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							1216d9c7c9
							
						
					 | 
					
						
						
							
							retrieve silver script reports errors better
						
						
						
						
						
						
					 | 
					
						2024-09-30 18:41:33 +00:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jake Poznanski
							
						 
					 | 
					
						
						
						
						
							
						
						
							b4e9d6a2b8
							
						
					 | 
					
						
						
							
							Buildsilver script suppors reservoir sampling so it can sample 100M+ paths now efficiently
						
						
						
						
						
						
					 | 
					
						2024-09-30 18:41:18 +00:00 | 
					
					
						
						
							
							
							
						
					 |