"description":"The URL to send the webhook to. This will trigger for batch scrape started (batch_scrape.started), every page scraped (batch_scrape.page) and when the batch scrape is completed (batch_scrape.completed or batch_scrape.failed). The response will be the same as the `/scrape` endpoint."
},
"headers":{
"type":"object",
"description":"Headers to send to the webhook URL.",
"additionalProperties":{
"type":"string"
}
},
"metadata":{
"type":"object",
"description":"Custom metadata that will be included in all webhook payloads for this crawl",
"additionalProperties":true
},
"events":{
"type":"array",
"description":"Type of events that should be sent to the webhook URL. (default: all)",
"description":"If invalid URLs are specified in the urls array, they will be ignored. Instead of them failing the entire request, a batch scrape using the remaining valid URLs will be created, and the invalid URLs will be returned in the invalidURLs field of the response."
"example":"Payment required to access this resource."
}
}
}
}
}
},
"429":{
"description":"Too many requests",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
}
}
}
}
}
}
}
}
},
"/crawl":{
"post":{
"summary":"Crawl multiple URLs based on options",
"operationId":"crawlUrls",
"tags":["Crawling"],
"security":[
{
"bearerAuth":[]
}
],
"requestBody":{
"required":true,
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"url":{
"type":"string",
"format":"uri",
"description":"The base URL to start crawling from"
},
"excludePaths":{
"type":"array",
"items":{
"type":"string"
},
"description":"URL pathname regex patterns that exclude matching URLs from the crawl. For example, if you set \"excludePaths\": [\"blog/.*\"] for the base URL firecrawl.dev, any results matching that pattern will be excluded, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
},
"includePaths":{
"type":"array",
"items":{
"type":"string"
},
"description":"URL pathname regex patterns that include matching URLs in the crawl. Only the paths that match the specified patterns will be included in the response. For example, if you set \"includePaths\": [\"blog/.*\"] for the base URL firecrawl.dev, only results matching that pattern will be included, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
},
"maxDepth":{
"type":"integer",
"description":"Maximum depth to crawl relative to the base URL. Basically, the max number of slashes the pathname of a scraped URL may contain.",
"default":10
},
"maxDiscoveryDepth":{
"type":"integer",
"description":"Maximum depth to crawl based on discovery order. The root site and sitemapped pages has a discovery depth of 0. For example, if you set it to 1, and you set ignoreSitemap, you will only crawl the entered URL and all URLs that are linked on that page."
},
"ignoreSitemap":{
"type":"boolean",
"description":"Ignore the website sitemap when crawling",
"default":false
},
"ignoreQueryParameters":{
"type":"boolean",
"description":"Do not re-scrape the same path with different (or none) query parameters",
"default":false
},
"limit":{
"type":"integer",
"description":"Maximum number of pages to crawl. Default limit is 10000.",
"default":10000
},
"allowBackwardLinks":{
"type":"boolean",
"description":"Enables the crawler to navigate from a specific URL to previously linked pages.",
"default":false
},
"allowExternalLinks":{
"type":"boolean",
"description":"Allows the crawler to follow links to external websites.",
"default":false
},
"delay":{
"type":"number",
"description":"Delay in seconds between scrapes. This helps respect website rate limits."
},
"webhook":{
"type":"object",
"description":"A webhook specification object.",
"properties":{
"url":{
"type":"string",
"description":"The URL to send the webhook to. This will trigger for crawl started (crawl.started), every page crawled (crawl.page) and when the crawl is completed (crawl.completed or crawl.failed). The response will be the same as the `/scrape` endpoint."
},
"headers":{
"type":"object",
"description":"Headers to send to the webhook URL.",
"additionalProperties":{
"type":"string"
}
},
"metadata":{
"type":"object",
"description":"Custom metadata that will be included in all webhook payloads for this crawl",
"additionalProperties":true
},
"events":{
"type":"array",
"description":"Type of events that should be sent to the webhook URL. (default: all)",
"items":{
"type":"string",
"enum":["completed","page","failed","started"]
}
}
},
"required":["url"]
},
"scrapeOptions":{
"$ref":"#/components/schemas/ScrapeOptions"
}
},
"required":["url"]
}
}
}
},
"responses":{
"200":{
"description":"Successful response",
"content":{
"application/json":{
"schema":{
"$ref":"#/components/schemas/CrawlResponse"
}
}
}
},
"402":{
"description":"Payment required",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Payment required to access this resource."
}
}
}
}
}
},
"429":{
"description":"Too many requests",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
}
}
}
}
}
}
}
}
},
"/map":{
"post":{
"summary":"Map multiple URLs based on options",
"operationId":"mapUrls",
"tags":["Mapping"],
"security":[
{
"bearerAuth":[]
}
],
"requestBody":{
"required":true,
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"url":{
"type":"string",
"format":"uri",
"description":"The base URL to start crawling from"
},
"search":{
"type":"string",
"description":"Search query to use for mapping. During the Alpha phase, the 'smart' part of the search functionality is limited to 1000 search results. However, if map finds more results, there is no limit applied."
},
"ignoreSitemap":{
"type":"boolean",
"description":"Ignore the website sitemap when crawling.",
"default":true
},
"sitemapOnly":{
"type":"boolean",
"description":"Only return links found in the website sitemap",
"default":false
},
"includeSubdomains":{
"type":"boolean",
"description":"Include subdomains of the website",
"default":false
},
"limit":{
"type":"integer",
"description":"Maximum number of links to return",
"default":5000,
"maximum":30000
},
"timeout":{
"type":"integer",
"description":"Timeout in milliseconds. There is no timeout by default."
}
},
"required":["url"]
}
}
}
},
"responses":{
"200":{
"description":"Successful response",
"content":{
"application/json":{
"schema":{
"$ref":"#/components/schemas/MapResponse"
}
}
}
},
"402":{
"description":"Payment required",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Payment required to access this resource."
}
}
}
}
}
},
"429":{
"description":"Too many requests",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
}
}
}
}
}
}
}
}
},
"/extract":{
"post":{
"summary":"Extract structured data from pages using LLMs",
"operationId":"extractData",
"tags":["Extraction"],
"security":[
{
"bearerAuth":[]
}
],
"requestBody":{
"required":true,
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"urls":{
"type":"array",
"items":{
"type":"string",
"format":"uri",
"description":"The URLs to extract data from. URLs should be in glob format."
}
},
"prompt":{
"type":"string",
"description":"Prompt to guide the extraction process"
},
"schema":{
"type":"object",
"description":"Schema to define the structure of the extracted data",
"properties":{
"property1":{
"type":"string",
"description":"Description of property1"
},
"property2":{
"type":"integer",
"description":"Description of property2"
}
},
"required":["property1","property2"]
},
"enableWebSearch":{
"type":"boolean",
"description":"When true, the extraction will use web search to find additional data",
"default":false
},
"ignoreSitemap":{
"type":"boolean",
"description":"When true, sitemap.xml files will be ignored during website scanning",
"default":false
},
"includeSubdomains":{
"type":"boolean",
"description":"When true, subdomains of the provided URLs will also be scanned",
"default":true
},
"showSources":{
"type":"boolean",
"description":"When true, the sources used to extract the data will be included in the response as `sources` key",
"default":false
},
"scrapeOptions":{
"$ref":"#/components/schemas/ScrapeOptions"
}
},
"required":["urls"]
}
}
}
},
"responses":{
"200":{
"description":"Successful extraction",
"content":{
"application/json":{
"schema":{
"$ref":"#/components/schemas/ExtractResponse"
}
}
}
},
"400":{
"description":"Invalid request",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"Invalid input data."
}
}
}
}
}
},
"500":{
"description":"Server error",
"content":{
"application/json":{
"schema":{
"type":"object",
"properties":{
"error":{
"type":"string",
"example":"An unexpected error occurred on the server."
"description":"Write text into an input field, text area, or contenteditable element. Note: You must first focus the element using a 'click' action before writing. The text will be typed character by character to simulate keyboard input."
"description":"Location settings for the request. When specified, this will use an appropriate proxy if available and emulate the corresponding language and timezone settings. Defaults to 'US' if not specified.",
"properties":{
"country":{
"type":"string",
"description":"ISO 3166-1 alpha-2 country code (e.g., 'US', 'AU', 'DE', 'JP')",
"pattern":"^[A-Z]{2}$",
"default":"US"
},
"languages":{
"type":"array",
"description":"Preferred languages and locales for the request in order of priority. Defaults to the language of the specified location. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language",
"description":"Removes all base 64 images from the output, which may be overwhelmingly long. The image's alt text remains in the output, but the URL is replaced with a placeholder."
},
"blockAds":{
"type":"boolean",
"description":"Enables ad-blocking and cookie popup blocking.",
"description":"Specifies the type of proxy to use.\n\n - **basic**: Proxies for scraping sites with none to basic anti-bot solutions. Fast and usually works.\n - **stealth**: Stealth proxies for scraping sites with advanced anti-bot solutions. Slower, but more reliable on certain sites. Starting May 8th, stealth will cost 5 credits per request.\n\nIf you do not specify a proxy, Firecrawl will default to basic."
"description":"Options for change tracking (Beta). Only applicable when 'changeTracking' is included in formats. The 'markdown' format must also be specified when using change tracking.",
"description":"Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
"nullable":true
},
"warning":{
"type":"string",
"nullable":true,
"description":"Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
},
"changeTracking":{
"type":"object",
"nullable":true,
"description":"Change tracking information if `changeTracking` is in `formats`. Only present when the `changeTracking` format is requested.",
"properties":{
"previousScrapeAt":{
"type":"string",
"format":"date-time",
"nullable":true,
"description":"The timestamp of the previous scrape that the current page is being compared against. Null if no previous scrape exists."
},
"changeStatus":{
"type":"string",
"enum":["new","same","changed","removed"],
"description":"The result of the comparison between the two page versions. 'new' means this page did not exist before, 'same' means content has not changed, 'changed' means content has changed, 'removed' means the page was removed."
},
"visibility":{
"type":"string",
"enum":["visible","hidden"],
"description":"The visibility of the current page/URL. 'visible' means the URL was discovered through an organic route (links or sitemap), 'hidden' means the URL was discovered through memory from previous crawls."
},
"diff":{
"type":"string",
"nullable":true,
"description":"Git-style diff of changes when using 'git-diff' mode. Only present when the mode is set to 'git-diff'."
},
"json":{
"type":"object",
"nullable":true,
"description":"JSON comparison results when using 'json' mode. Only present when the mode is set to 'json'. This will emit a list of all the keys and their values from the `previous` and `current` scrapes based on the type defined in the `schema`. Example [here](/features/change-tracking)"
}
}
}
}
}
}
},
"CrawlStatusResponseObj":{
"type":"object",
"properties":{
"status":{
"type":"string",
"description":"The current status of the crawl. Can be `scraping`, `completed`, or `failed`."
},
"total":{
"type":"integer",
"description":"The total number of pages that were attempted to be crawled."
},
"completed":{
"type":"integer",
"description":"The number of pages that have been successfully crawled."
},
"creditsUsed":{
"type":"integer",
"description":"The number of credits used for the crawl."
},
"expiresAt":{
"type":"string",
"format":"date-time",
"description":"The date and time when the crawl will expire."
},
"next":{
"type":"string",
"nullable":true,
"description":"The URL to retrieve the next 10MB of data. Returned if the crawl is not completed or if the response is larger than 10MB."
},
"data":{
"type":"array",
"description":"The data of the crawl.",
"items":{
"type":"object",
"properties":{
"markdown":{
"type":"string"
},
"html":{
"type":"string",
"nullable":true,
"description":"HTML version of the content on page if `includeHtml` is true"
},
"rawHtml":{
"type":"string",
"nullable":true,
"description":"Raw HTML content of the page if `includeRawHtml` is true"
},
"links":{
"type":"array",
"items":{
"type":"string"
},
"description":"List of links on the page if `includeLinks` is true"
},
"screenshot":{
"type":"string",
"nullable":true,
"description":"Screenshot of the page if `includeScreenshot` is true"
},
"metadata":{
"type":"object",
"properties":{
"title":{
"type":"string"
},
"description":{
"type":"string"
},
"language":{
"type":"string",
"nullable":true
},
"sourceURL":{
"type":"string",
"format":"uri"
},
"<any other metadata> ":{
"type":"string"
},
"statusCode":{
"type":"integer",
"description":"The status code of the page"
},
"error":{
"type":"string",
"nullable":true,
"description":"The error message of the page"
}
}
}
}
}
}
}
},
"CrawlErrorsResponseObj":{
"type":"object",
"properties":{
"errors":{
"type":"array",
"description":"Errored scrape jobs and error details",
"items":{
"type":"object",
"properties":{
"id":{
"type":"string"
},
"timestamp":{
"type":"string",
"nullable":true,
"description":"ISO timestamp of failure"
},
"url":{
"type":"string",
"description":"Scraped URL"
},
"error":{
"type":"string",
"description":"Error message"
}
}
}
},
"robotsBlocked":{
"type":"array",
"description":"List of URLs that were attempted in scraping but were blocked by robots.txt",
"items":{"type":"string"}
}
}
},
"BatchScrapeStatusResponseObj":{
"type":"object",
"properties":{
"status":{
"type":"string",
"description":"The current status of the batch scrape. Can be `scraping`, `completed`, or `failed`."
},
"total":{
"type":"integer",
"description":"The total number of pages that were attempted to be scraped."
},
"completed":{
"type":"integer",
"description":"The number of pages that have been successfully scraped."
},
"creditsUsed":{
"type":"integer",
"description":"The number of credits used for the batch scrape."
},
"expiresAt":{
"type":"string",
"format":"date-time",
"description":"The date and time when the batch scrape will expire."
},
"next":{
"type":"string",
"nullable":true,
"description":"The URL to retrieve the next 10MB of data. Returned if the batch scrape is not completed or if the response is larger than 10MB."
},
"data":{
"type":"array",
"description":"The data of the batch scrape.",
"items":{
"type":"object",
"properties":{
"markdown":{
"type":"string"
},
"html":{
"type":"string",
"nullable":true,
"description":"HTML version of the content on page if `includeHtml` is true"
},
"rawHtml":{
"type":"string",
"nullable":true,
"description":"Raw HTML content of the page if `includeRawHtml` is true"
},
"links":{
"type":"array",
"items":{
"type":"string"
},
"description":"List of links on the page if `includeLinks` is true"
},
"screenshot":{
"type":"string",
"nullable":true,
"description":"Screenshot of the page if `includeScreenshot` is true"
"description":"If ignoreInvalidURLs is true, this is an array containing the invalid URLs that were specified in the request. If there were no invalid URLs, this will be an empty array. If ignoreInvalidURLs is false, this field will be undefined."