"example":"Payment required to access this resource."
}
}
}
}
}
},
"429":{
"description":"Too many requests",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
}
}
}
}
}
}
}
}
},
"/batch/scrape":{
"post":{
"summary":"Scrape multiple URLs and optionally extract information using an LLM",
"operationId":"scrapeAndExtractFromUrls",
"tags":["Scraping"],
"security":[
{
"bearerAuth":[]
}
],
"requestBody":{
"required":true,
"content":{
"application/json":{
"schema":{
"allOf":[
{
"type":"object",
"properties":{
"urls":{
"type":"array",
"items":{
"type":"string",
"format":"uri",
"description":"The URL to scrape"
}
},
"webhook":{
"type":"object",
"description":"A webhook specification object.",
"properties":{
"url":{
"type":"string",
"description":"The URL to send the webhook to. This will trigger for batch scrape started (batch_scrape.started), every page scraped (batch_scrape.page) and when the batch scrape is completed (batch_scrape.completed or batch_scrape.failed). The response will be the same as the `/scrape` endpoint."
},
"headers":{
"type":"object",
"description":"Headers to send to the webhook URL.",
"additionalProperties":{
"type":"string"
}
},
"metadata":{
"type":"object",
"description":"Custom metadata that will be included in all webhook payloads for this crawl",
"additionalProperties":true
},
"events":{
"type":"array",
"description":"Type of events that should be sent to the webhook URL. (default: all)",
"items":{
"type":"string",
"enum":["completed","page","failed","started"]
}
}
},
"required":["url"]
},
"ignoreInvalidURLs":{
"type":"boolean",
"default":false,
"description":"If invalid URLs are specified in the urls array, they will be ignored. Instead of them failing the entire request, a batch scrape using the remaining valid URLs will be created, and the invalid URLs will be returned in the invalidURLs field of the response."
"description":"URL pathname regex patterns that exclude matching URLs from the crawl. For example, if you set \"excludePaths\": [\"blog/.*\"] for the base URL firecrawl.dev, any results matching that pattern will be excluded, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
"description":"URL pathname regex patterns that include matching URLs in the crawl. Only the paths that match the specified patterns will be included in the response. For example, if you set \"includePaths\": [\"blog/.*\"] for the base URL firecrawl.dev, only results matching that pattern will be included, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
"description":"Maximum depth to crawl based on discovery order. The root site and sitemapped pages has a discovery depth of 0. For example, if you set it to 1, and you set ignoreSitemap, you will only crawl the entered URL and all URLs that are linked on that page."
"description":"Allows the crawler to follow internal links to sibling or parent URLs, not just child paths.\n\nfalse: Only crawls deeper (child) URLs.\n→ e.g. /features/feature-1 → /features/feature-1/tips ✅\n→ Won't follow /pricing or / ❌\n\ntrue: Crawls any internal links, including siblings and parents.\n→ e.g. /features/feature-1 → /pricing, /, etc. ✅\n\nUse true for broader internal coverage beyond nested paths.",
"description":"The URL to send the webhook to. This will trigger for crawl started (crawl.started), every page crawled (crawl.page) and when the crawl is completed (crawl.completed or crawl.failed). The response will be the same as the `/scrape` endpoint."
},
"headers":{
"type":"object",
"description":"Headers to send to the webhook URL.",
"additionalProperties":{
"type":"string"
}
},
"metadata":{
"type":"object",
"description":"Custom metadata that will be included in all webhook payloads for this crawl",
"description":"Search query to use for mapping. During the Alpha phase, the 'smart' part of the search functionality is limited to 1000 search results. However, if map finds more results, there is no limit applied."
"summary":"Extract structured data from pages using LLMs",
"operationId":"extractData",
"tags":["Extraction"],
"security":[
{
"bearerAuth":[]
}
],
"requestBody":{
"required":true,
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"urls":{
"type":"array",
"items":{
"type":"string",
"format":"uri",
"description":"The URLs to extract data from. URLs should be in glob format."
}
},
"prompt":{
"type":"string",
"description":"Prompt to guide the extraction process"
},
"schema":{
"type":"object",
"description":"Schema to define the structure of the extracted data. Must conform to [JSON Schema](https://json-schema.org/)."
},
"enableWebSearch":{
"type":"boolean",
"description":"When true, the extraction will use web search to find additional data",
"default":false
},
"ignoreSitemap":{
"type":"boolean",
"description":"When true, sitemap.xml files will be ignored during website scanning",
"default":false
},
"includeSubdomains":{
"type":"boolean",
"description":"When true, subdomains of the provided URLs will also be scanned",
"default":true
},
"showSources":{
"type":"boolean",
"description":"When true, the sources used to extract the data will be included in the response as `sources` key",
"default":false
},
"scrapeOptions":{
"$ref":"#/components/schemas/ScrapeOptions"
},
"ignoreInvalidURLs":{
"type":"boolean",
"default":false,
"description":"If invalid URLs are specified in the urls array, they will be ignored. Instead of them failing the entire request, an extract using the remaining valid URLs will be performed, and the invalid URLs will be returned in the invalidURLs field of the response."
}
},
"required":["urls"]
}
}
}
},
"responses":{
"200":{
"description":"Successful extraction",
"content":{
"application/json":{
"schema":{
"$ref":"#/components/schemas/ExtractResponse"
}
}
}
},
"400":{
"description":"Invalid request",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Invalid input data."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
"description":"Excludes URLs from the search results that are invalid for other Firecrawl endpoints. This helps reduce errors if you are piping data from search into other Firecrawl API endpoints.",
"description":"Formats to include in the output.",
"default":["markdown"]
},
"onlyMainContent":{
"type":"boolean",
"description":"Only return the main content of the page excluding headers, navs, footers, etc.",
"default":true
},
"includeTags":{
"type":"array",
"items":{
"type":"string"
},
"description":"Tags to include in the output."
},
"excludeTags":{
"type":"array",
"items":{
"type":"string"
},
"description":"Tags to exclude from the output."
},
"maxAge":{
"type":"integer",
"description":"Returns a cached version of the page if it is younger than this age in milliseconds. If a cached version of the page is older than this value, the page will be scraped. If you do not need extremely fresh data, enabling this can speed up your scrapes by 500%. Defaults to 0, which disables caching.",
"default":0
},
"headers":{
"type":"object",
"description":"Headers to send with the request. Can be used to send cookies, user-agent, etc."
},
"waitFor":{
"type":"integer",
"description":"Specify a delay in milliseconds before fetching the content, allowing the page sufficient time to load.",
"default":0
},
"mobile":{
"type":"boolean",
"description":"Set to true if you want to emulate scraping from a mobile device. Useful for testing responsive pages and taking mobile screenshots.",
"default":false
},
"skipTlsVerification":{
"type":"boolean",
"description":"Skip TLS certificate verification when making requests",
"default":false
},
"timeout":{
"type":"integer",
"description":"Timeout in milliseconds for the request",
"default":30000
},
"parsePDF":{
"type":"boolean",
"description":"Controls how PDF files are processed during scraping. When true, the PDF content is extracted and converted to markdown format, with billing based on the number of pages (1 credit per page). When false, the PDF file is returned in base64 encoding with a flat rate of 1 credit total.",
"default":true
},
"jsonOptions":{
"type":"object",
"description":"JSON options object",
"properties":{
"schema":{
"type":"object",
"description":"The schema to use for the extraction (Optional). Must conform to [JSON Schema](https://json-schema.org/)."
},
"systemPrompt":{
"type":"string",
"description":"The system prompt to use for the extraction (Optional)"
},
"prompt":{
"type":"string",
"description":"The prompt to use for the extraction without a schema (Optional)"
}
}
},
"actions":{
"type":"array",
"description":"Actions to perform on the page before grabbing the content",
"items":{
"oneOf":[
{
"type":"object",
"title":"Wait",
"properties":{
"type":{
"type":"string",
"enum":["wait"],
"description":"Wait for a specified amount of milliseconds"
},
"milliseconds":{
"type":"integer",
"minimum":1,
"description":"Number of milliseconds to wait"
},
"selector":{
"type":"string",
"description":"Query selector to find the element by",
"example":"#my-element"
}
},
"required":["type"]
},
{
"type":"object",
"title":"Screenshot",
"properties":{
"type":{
"type":"string",
"enum":["screenshot"],
"description":"Take a screenshot. The links will be in the response's `actions.screenshots` array."
},
"fullPage":{
"type":"boolean",
"description":"Should the screenshot be full-page or viewport sized?",
"default":false
}
},
"required":["type"]
},
{
"type":"object",
"title":"Click",
"properties":{
"type":{
"type":"string",
"enum":["click"],
"description":"Click on an element"
},
"selector":{
"type":"string",
"description":"Query selector to find the element by",
"example":"#load-more-button"
},
"all":{
"type":"boolean",
"description":"Clicks all elements matched by the selector, not just the first one. Does not throw an error if no elements match the selector.",
"default":false
}
},
"required":["type","selector"]
},
{
"type":"object",
"title":"Write text",
"properties":{
"type":{
"type":"string",
"enum":["write"],
"description":"Write text into an input field, text area, or contenteditable element. Note: You must first focus the element using a 'click' action before writing. The text will be typed character by character to simulate keyboard input."
},
"text":{
"type":"string",
"description":"Text to type",
"example":"Hello, world!"
}
},
"required":["type","text"]
},
{
"type":"object",
"title":"Press a key",
"description":"Press a key on the page. See https://asawicki.info/nosense/doc/devices/keyboard/key_codes.html for key codes.",
"properties":{
"type":{
"type":"string",
"enum":["press"],
"description":"Press a key on the page"
},
"key":{
"type":"string",
"description":"Key to press",
"example":"Enter"
}
},
"required":["type","key"]
},
{
"type":"object",
"title":"Scroll",
"properties":{
"type":{
"type":"string",
"enum":["scroll"],
"description":"Scroll the page or a specific element"
},
"direction":{
"type":"string",
"enum":["up","down"],
"description":"Direction to scroll",
"default":"down"
},
"selector":{
"type":"string",
"description":"Query selector for the element to scroll",
"example":"#my-element"
}
},
"required":["type"]
},
{
"type":"object",
"title":"Scrape",
"properties":{
"type":{
"type":"string",
"enum":["scrape"],
"description":"Scrape the current page content, returns the url and the html."
}
},
"required":["type"]
},
{
"type":"object",
"title":"Execute JavaScript",
"properties":{
"type":{
"type":"string",
"enum":["executeJavascript"],
"description":"Execute JavaScript code on the page"
"description":"Location settings for the request. When specified, this will use an appropriate proxy if available and emulate the corresponding language and timezone settings. Defaults to 'US' if not specified.",
"properties":{
"country":{
"type":"string",
"description":"ISO 3166-1 alpha-2 country code (e.g., 'US', 'AU', 'DE', 'JP')",
"pattern":"^[A-Z]{2}$",
"default":"US"
},
"languages":{
"type":"array",
"description":"Preferred languages and locales for the request in order of priority. Defaults to the language of the specified location. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language",
"items":{
"type":"string",
"example":"en-US"
}
}
}
},
"removeBase64Images":{
"type":"boolean",
"description":"Removes all base 64 images from the output, which may be overwhelmingly long. The image's alt text remains in the output, but the URL is replaced with a placeholder."
},
"blockAds":{
"type":"boolean",
"description":"Enables ad-blocking and cookie popup blocking.",
"default":true
},
"proxy":{
"type":"string",
"enum":["basic","stealth","auto"],
"description":"Specifies the type of proxy to use.\n\n - **basic**: Proxies for scraping sites with none to basic anti-bot solutions. Fast and usually works.\n - **stealth**: Stealth proxies for scraping sites with advanced anti-bot solutions. Slower, but more reliable on certain sites. Costs up to 5 credits per request.\n - **auto**: Firecrawl will automatically retry scraping with stealth proxies if the basic proxy fails. If the retry with stealth is successful, 5 credits will be billed for the scrape. If the first attempt with basic is successful, only the regular cost will be billed.\n\nIf you do not specify a proxy, Firecrawl will default to basic."
},
"changeTrackingOptions":{
"type":"object",
"description":"Options for change tracking (Beta). Only applicable when 'changeTracking' is included in formats. The 'markdown' format must also be specified when using change tracking.",
"properties":{
"modes":{
"type":"array",
"items":{
"type":"string",
"enum":["git-diff","json"]
},
"description":"The mode to use for change tracking. 'git-diff' provides a detailed diff, and 'json' compares extracted JSON data."
},
"schema":{
"type":"object",
"description":"Schema for JSON extraction when using 'json' mode. Defines the structure of data to extract and compare. Must conform to [JSON Schema](https://json-schema.org/)."
},
"prompt":{
"type":"string",
"description":"Prompt to use for change tracking when using 'json' mode. If not provided, the default prompt will be used."
},
"tag":{
"type":"string",
"nullable":true,
"default":null,
"description":"Tag to use for change tracking. Tags can separate change tracking history into separate \"branches\", where change tracking with a specific tagwill only compare to scrapes made in the same tag. If not provided, the default tag (null) will be used."
}
}
},
"storeInCache":{
"type":"boolean",
"description":"If true, the page will be stored in the Firecrawl index and cache. Setting this to false is useful if your scraping activity may have data protection concerns. Using some parameters associated with sensitive scraping (actions, headers) will force this parameter to be false.",
"description":"Change tracking information if `changeTracking` is in `formats`. Only present when the `changeTracking` format is requested.",
"properties":{
"previousScrapeAt":{
"type":"string",
"format":"date-time",
"nullable":true,
"description":"The timestamp of the previous scrape that the current page is being compared against. Null if no previous scrape exists."
},
"changeStatus":{
"type":"string",
"enum":["new","same","changed","removed"],
"description":"The result of the comparison between the two page versions. 'new' means this page did not exist before, 'same' means content has not changed, 'changed' means content has changed, 'removed' means the page was removed."
},
"visibility":{
"type":"string",
"enum":["visible","hidden"],
"description":"The visibility of the current page/URL. 'visible' means the URL was discovered through an organic route (links or sitemap), 'hidden' means the URL was discovered through memory from previous crawls."
},
"diff":{
"type":"string",
"nullable":true,
"description":"Git-style diff of changes when using 'git-diff' mode. Only present when the mode is set to 'git-diff'."
},
"json":{
"type":"object",
"nullable":true,
"description":"JSON comparison results when using 'json' mode. Only present when the mode is set to 'json'. This will emit a list of all the keys and their values from the `previous` and `current` scrapes based on the type defined in the `schema`. Example [here](/features/change-tracking)"
"description":"If ignoreInvalidURLs is true, this is an array containing the invalid URLs that were specified in the request. If there were no invalid URLs, this will be an empty array. If ignoreInvalidURLs is false, this field will be undefined."
"description":"If ignoreInvalidURLs is true, this is an array containing the invalid URLs that were specified in the request. If there were no invalid URLs, this will be an empty array. If ignoreInvalidURLs is false, this field will be undefined."