Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							c19af6ef42
							
						
					 | 
					
						
						
							
							Update map.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-19 12:27:08 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							2e5785d8d9
							
						
					 | 
					
						
						
							
							Nick: fetch sitemap timeout param
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-19 11:40:13 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							24ddcd4a6d
							
						
					 | 
					
						
						
							
							Update check-fire-engine.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 23:53:33 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e8e58729a
							
						
					 | 
					
						
						
							
							Update index.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 21:10:20 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							0ec52613e2
							
						
					 | 
					
						
						
							
							Nick:
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 21:10:11 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							88904a5cde
							
						
					 | 
					
						
						
							
							Nick: js-sdk 1.15.4
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 18:10:23 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							a7178c0c6f
							
						
					 | 
					
						
						
							
							Nick: patch js-sdk
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 18:08:50 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							56776da28d
							
						
					 | 
					
						
						
							
							Nick: json output to new format
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:54:04 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							092da888c6
							
						
					 | 
					
						
						
							
							Merge branch 'nsc/json-format'
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:52:06 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							f9d99ac652
							
						
					 | 
					
						
						
							
							Nick: fixed type on js sdk
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:51:56 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							9109e78e15
							
						
					 | 
					
						
						
							
							Merge pull request #1072 from mendableai/nsc/json-format
						
						
						
						
						
						
						
						(feat/formats) Extract format renamed to json format 
						
						
							
						
					 | 
					
						2025-01-18 17:38:52 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							5d62e8264b
							
						
					 | 
					
						
						
							
							Nick:
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:37:11 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							b030a1c5da
							
						
					 | 
					
						
						
							
							Nick: extract to json in the sdks as well
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:23:21 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							34b40f6a23
							
						
					 | 
					
						
						
							
							Nick:
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:17:42 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Eric Ciarla
							
						 
					 | 
					
						
						
						
						
							
						
						
							6383bf270a
							
						
					 | 
					
						
						
							
							Fix python sdk for extract
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:12:06 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Eric Ciarla
							
						 
					 | 
					
						
						
						
						
							
						
						
							3363b2d60e
							
						
					 | 
					
						
						
							
							Make prompt not required for extract on python sdk
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-18 17:06:28 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							146dc47954
							
						
					 | 
					
						
						
							
							feat(sdk): check crawl/batch scrape errors
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 18:06:04 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							dbc6d07871
							
						
					 | 
					
						
						
							
							fix(queue-worker): bring done add to earlier
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:46:29 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							13abb2bc0e
							
						
					 | 
					
						
						
							
							fix(crawl-redis/finishCrawl): increase logging to hunt down race condition
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:23:13 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							078c0679aa
							
						
					 | 
					
						
						
							
							fix(crawl-status): improve finished checking
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:18:36 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							e6531278f6
							
						
					 | 
					
						
						
							
							feat(v1): crawl/batch scrape errors route
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:12:04 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							dcd3d6d98d
							
						
					 | 
					
						
						
							
							fix(kickoff): mark as finished if it errors out
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:11:19 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							5992c57158
							
						
					 | 
					
						
						
							
							fix(crawler): bad urls from sitemap
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 17:07:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							237d0dc197
							
						
					 | 
					
						
						
							
							fix(requests.http): map
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 16:21:57 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							805bfa457d
							
						
					 | 
					
						
						
							
							fix(python-sdk): add JSON parse error reporting clarity
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 16:16:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							d5929af010
							
						
					 | 
					
						
						
							
							fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites)
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 16:04:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							23bb172592
							
						
					 | 
					
						
						
							
							fix(crawler): recognize sitemaps in robots.txt
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 15:45:52 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Móricz Gergő
							
						 
					 | 
					
						
						
						
						
							
						
						
							faf58dfca7
							
						
					 | 
					
						
						
							
							fix(removeUnwantedElements): post-includeTags excludeTags
						
						
						
						
						
						
						
						Fixes #700 
						
						
							
						
					 | 
					
						2025-01-17 12:41:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Móricz Gergő
							
						 
					 | 
					
						
						
						
						
							
						
						
							de08b37480
							
						
					 | 
					
						
						
							
							feat: adjust CI testing
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 11:51:46 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Móricz Gergő
							
						 
					 | 
					
						
						
						
						
							
						
						
							4a947e385f
							
						
					 | 
					
						
						
							
							fix(queue-worker): fill out time taken on failure too
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-17 11:28:37 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							8e57fdec2c
							
						
					 | 
					
						
						
							
							Update package.json
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-16 14:02:25 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							80e5acf68c
							
						
					 | 
					
						
						
							
							Nick: error details for extract
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-16 14:02:15 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							6c94db7ed0
							
						
					 | 
					
						
						
							
							fix(html,markdown): always get absolute links
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-16 16:56:13 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							e824303d87
							
						
					 | 
					
						
						
							
							feat(html): always pick largest image from srcset
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-16 16:51:33 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							655753cd27
							
						
					 | 
					
						
						
							
							fix(url): allow domains with ports
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-16 16:30:14 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							de14c0a45d
							
						
					 | 
					
						
						
							
							Update package.json
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 18:12:34 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							c3937996b1
							
						
					 | 
					
						
						
							
							feat(js-sdk): add further options to checkxstatus
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 20:16:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							cbe67d89a5
							
						
					 | 
					
						
						
							
							feat(queue-worker): proactive job cancel
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 19:02:20 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							ec039dcb8f
							
						
					 | 
					
						
						
							
							fix(blocklist): unblock
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 18:54:26 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							dde3aebac4
							
						
					 | 
					
						
						
							
							fix(v1/crawl-status): fix stuck on 0 jobs
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 18:51:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gergő Móricz
							
						 
					 | 
					
						
						
						
						
							
						
						
							ce2f6ff884
							
						
					 | 
					
						
						
							
							fix(queue-worker/billing): fix crawl overbilling
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 17:22:52 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							db89e365eb
							
						
					 | 
					
						
						
							
							Update check-fire-engine.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-15 01:16:42 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							957eea4113
							
						
					 | 
					
						
						
							
							Nick: extract without a schema should work as expected
						
						
						
						
						
						
							
 v.1.3.0
						
					 | 
					
						2025-01-14 11:37:00 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							61e6af2b16
							
						
					 | 
					
						
						
							
							Nick: streaming callback experimental
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 02:13:42 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							23d3257a57
							
						
					 | 
					
						
						
							
							Merge branch 'nsc/__experimental_streamSteps'
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 02:00:58 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							c323c64671
							
						
					 | 
					
						
						
							
							Update extract-redis.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 02:00:47 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							2dc87a2e1c
							
						
					 | 
					
						
						
							
							Update extraction-service.ts
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 01:59:52 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
							
							
						
						
						
							
						
						
							0496b793c4
							
						
					 | 
					
						
						
							
							Merge pull request #1063 from mendableai/nsc/__experimental_streamSteps
						
						
						
						
						
						
						
						__experimental_streamSteps 
						
						
							
						
					 | 
					
						2025-01-14 01:48:13 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							033e9bbf29
							
						
					 | 
					
						
						
							
							Nick: __experimental_streamSteps
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 01:45:50 -03:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nicolas
							
						 
					 | 
					
						
						
						
						
							
						
						
							558a7f4c08
							
						
					 | 
					
						
						
							
							Update package.json
						
						
						
						
						
						
							
						
					 | 
					
						2025-01-14 01:35:29 -03:00 | 
					
					
						
						
							
							
							
						
					 |