firecrawl/apps/rust-sdk/src/crawl.rs

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use crate::{document::Document, scrape::{ScrapeFormats, ScrapeOptions}, FirecrawlApp, FirecrawlError, API_VERSION};

#[derive(Deserialize, Serialize, Clone, Copy, Debug)]
pub enum CrawlScrapeFormats {
    /// Will result in a copy of the Markdown content of the page.
    #[serde(rename = "markdown")]
    Markdown,

    /// Will result in a copy of the filtered, content-only HTML of the page.
    #[serde(rename = "html")]
    HTML,

    /// Will result in a copy of the raw HTML of the page.
    #[serde(rename = "rawHtml")]
    RawHTML,

    /// Will result in a Vec of URLs found on the page.
    #[serde(rename = "links")]
    Links,

    /// Will result in a URL to a screenshot of the page.
    /// 
    /// Can not be used in conjunction with `CrawlScrapeFormats::ScreenshotFullPage`.
    #[serde(rename = "screenshot")]
    Screenshot,

    /// Will result in a URL to a full-page screenshot of the page.
    /// 
    /// Can not be used in conjunction with `CrawlScrapeFormats::Screenshot`.
    #[serde(rename = "screenshot@fullPage")]
    ScreenshotFullPage,
}

impl From<CrawlScrapeFormats> for ScrapeFormats {
    fn from(value: CrawlScrapeFormats) -> Self {
        match value {
            CrawlScrapeFormats::Markdown => Self::Markdown,
            CrawlScrapeFormats::HTML => Self::HTML,
            CrawlScrapeFormats::RawHTML => Self::RawHTML,
            CrawlScrapeFormats::Links => Self::Links,
            CrawlScrapeFormats::Screenshot => Self::Screenshot,
            CrawlScrapeFormats::ScreenshotFullPage => Self::ScreenshotFullPage,
        }
    }
}

#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
    pub formats: Option<Vec<CrawlScrapeFormats>>,

    /// Only extract the main content of the page, excluding navigation and other miscellaneous content. (default: `true`)
    pub only_main_content: Option<bool>,

    /// HTML tags to exclusively include.
    /// 
    /// For example, if you pass `div`, you will only get content from `<div>`s and their children.
    pub include_tags: Option<Vec<String>>,

    /// HTML tags to exclude.
    /// 
    /// For example, if you pass `img`, you will never get image URLs in your results.
    pub exclude_tags: Option<Vec<String>>,

    /// Additional HTTP headers to use when loading the page.
    pub headers: Option<HashMap<String, String>>,

    // Amount of time to wait after loading the page, and before grabbing the content, in milliseconds. (default: `0`)
    pub wait_for: Option<u32>,

    // Timeout before returning an error, in milliseconds. (default: `60000`)
    pub timeout: Option<u32>,
}

impl From<CrawlScrapeOptions> for ScrapeOptions {
    fn from(value: CrawlScrapeOptions) -> Self {
        ScrapeOptions {
            formats: value.formats.map(|formats| formats.into_iter().map(|x| x.into()).collect()),
            only_main_content: value.only_main_content,
            include_tags: value.include_tags,
            exclude_tags: value.exclude_tags,
            headers: value.headers,
            wait_for: value.wait_for,
            timeout: value.timeout,
            ..Default::default()
        }
    }
}

#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlOptions {
    /// Options to pass through to the scraper.
    pub scrape_options: Option<CrawlScrapeOptions>,

    /// URL RegEx patterns to (exclusively) include.
    /// 
    /// For example, if you specified `"blog"`, only pages that have `blog` somewhere in the URL would be crawled.
    pub include_paths: Option<Vec<String>>,

    /// URL RegEx patterns to exclude.
    /// 
    /// For example, if you specified `"blog"`, pages that have `blog` somewhere in the URL would not be crawled.
    pub exclude_paths: Option<Vec<String>>,

    /// Maximum URL depth to crawl, relative to the base URL. (default: `2`)
    pub max_depth: Option<u32>,

    /// Tells the crawler to ignore the sitemap when crawling. (default: `true`)
    pub ignore_sitemap: Option<bool>,

    /// Maximum number of pages to crawl. (default: `10`)
    pub limit: Option<u32>,

    /// Allows the crawler to navigate links that are backwards in the URL hierarchy. (default: `false`)
    pub allow_backward_links: Option<bool>,

    /// Allows the crawler to follow links to external URLs. (default: `false`)
    pub allow_external_links: Option<bool>,

    /// URL to send Webhook crawl events to.
    pub webhook: Option<String>,

    /// Idempotency key to send to the crawl endpoint.
    #[serde(skip)]
    pub idempotency_key: Option<String>,

    /// When using `FirecrawlApp::crawl_url`, this is how often the status of the job should be checked, in milliseconds. (default: `2000`)
    #[serde(skip)]
    pub poll_interval: Option<u64>,
}

#[derive(Deserialize, Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
struct CrawlRequestBody {
    url: String,

    #[serde(flatten)]
    options: CrawlOptions,
}

#[derive(Deserialize, Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
struct CrawlResponse {
    /// This will always be `true` due to `FirecrawlApp::handle_response`.
    /// No need to expose.
    success: bool,

    /// The resulting document.
    data: Document,
}

#[derive(Deserialize, Serialize, Debug, PartialEq, Eq, Clone, Copy)]
#[serde(rename_all = "camelCase")]
pub enum CrawlStatusTypes {
    /// The crawl job is in progress.
    Scraping,

    /// The crawl job has been completed successfully.
    Completed,

    /// The crawl job has failed.
    Failed,

    /// The crawl job has been cancelled.
    Cancelled,
}

#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlStatus {
    /// The status of the crawl.
    pub status: CrawlStatusTypes,

    /// Number of pages that will be scraped in total. This number may grow as the crawler discovers new pages.
    pub total: u32,

    /// Number of pages that have been successfully scraped.
    pub completed: u32,

    /// Amount of credits used by the crawl job.
    pub credits_used: u32,

    /// Expiry time of crawl data. After this date, the crawl data will be unavailable from the API.
    pub expires_at: String, // TODO: parse into date

    /// URL to call to get the next batch of documents.
    /// Unless you are sidestepping the SDK, you do not need to deal with this.
    pub next: Option<String>,

    /// List of documents returned by the crawl
    pub data: Vec<Document>,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlAsyncResponse {
    success: bool,

    /// Crawl ID
    pub id: String,

    /// URL to get the status of the crawl job
    pub url: String,
}

impl FirecrawlApp {
    /// Initiates a crawl job for a URL using the Firecrawl API.
    pub async fn crawl_url_async(
        &self,
        url: impl AsRef<str>,
        options: Option<CrawlOptions>,
    ) -> Result<CrawlAsyncResponse, FirecrawlError> {
        let body = CrawlRequestBody {
            url: url.as_ref().to_string(),
            options: options.unwrap_or_default(),
        };
        
        let headers = self.prepare_headers(body.options.idempotency_key.as_ref());

        let response = self
            .client
            .post(&format!("{}{}/crawl", self.api_url, API_VERSION))
            .headers(headers.clone())
            .json(&body)
            .send()
            .await
            .map_err(|e| FirecrawlError::HttpError(format!("Crawling {:?}", url.as_ref()), e))?;

        self.handle_response::<CrawlAsyncResponse>(response, "start crawl job").await
    }

    /// Performs a crawl job for a URL using the Firecrawl API, waiting for the end result. This may take a long time depending on the size of the target page and your options (namely `CrawlOptions.limit`).
    pub async fn crawl_url(
        &self,
        url: impl AsRef<str>,
        options: impl Into<Option<CrawlOptions>>,
    ) -> Result<CrawlStatus, FirecrawlError> {
        let options = options.into();
        let poll_interval = options.as_ref().and_then(|x| x.poll_interval).unwrap_or(2000);
        let res = self.crawl_url_async(url, options).await?;

        self.monitor_job_status(&res.id, poll_interval).await
    }

    async fn check_crawl_status_next(&self, next: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {
        let response = self
            .client
            .get(next.as_ref())
            .headers(self.prepare_headers(None))
            .send()
            .await
            .map_err(|e| FirecrawlError::HttpError(format!("Paginating crawl using URL {:?}", next.as_ref()), e))?;

        self.handle_response(response, format!("Paginating crawl using URL {:?}", next.as_ref())).await
    }

    /// Checks for the status of a crawl, based on the crawl's ID. To be used in conjunction with `FirecrawlApp::crawl_url_async`.
    pub async fn check_crawl_status(&self, id: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {
        let response = self
            .client
            .get(&format!(
                "{}{}/crawl/{}",
                self.api_url, API_VERSION, id.as_ref()
            ))
            .headers(self.prepare_headers(None))
            .send()
            .await
            .map_err(|e| FirecrawlError::HttpError(format!("Checking status of crawl {}", id.as_ref()), e))?;

        let mut status: CrawlStatus = self.handle_response(response, format!("Checking status of crawl {}", id.as_ref())).await?;

        if status.status == CrawlStatusTypes::Completed {
            while let Some(next) = status.next {
                let new_status = self.check_crawl_status_next(next).await?;
                status.data.extend_from_slice(&new_status.data);
                status.next = new_status.next;
            }
        }

        Ok(status)
    }

    async fn monitor_job_status(
        &self,
        id: &str,
        poll_interval: u64,
    ) -> Result<CrawlStatus, FirecrawlError> {
        loop {
            let status_data = self.check_crawl_status(id).await?;
            match status_data.status {
                CrawlStatusTypes::Completed => {
                    break Ok(status_data);
                }
                CrawlStatusTypes::Scraping => {
                    tokio::time::sleep(tokio::time::Duration::from_millis(poll_interval)).await;
                }
                CrawlStatusTypes::Failed => {
                    break Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job failed."
                    ), status_data));
                }
                CrawlStatusTypes::Cancelled => {
                    break Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job was cancelled."
                    ), status_data));
                }
            }
        }
    }
}
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`use std::collections::HashMap;`

			`use serde::{Deserialize, Serialize};`

			`use crate::{document::Document, scrape::{ScrapeFormats, ScrapeOptions}, FirecrawlApp, FirecrawlError, API_VERSION};`

			`#[derive(Deserialize, Serialize, Clone, Copy, Debug)]`
			`pub enum CrawlScrapeFormats {`
			`/// Will result in a copy of the Markdown content of the page.`
			`#[serde(rename = "markdown")]`
			`Markdown,`

			`/// Will result in a copy of the filtered, content-only HTML of the page.`
			`#[serde(rename = "html")]`
			`HTML,`

			`/// Will result in a copy of the raw HTML of the page.`
			`#[serde(rename = "rawHtml")]`
			`RawHTML,`

			`/// Will result in a Vec of URLs found on the page.`
			`#[serde(rename = "links")]`
			`Links,`

			`/// Will result in a URL to a screenshot of the page.`
			`///`
			/// Can not be used in conjunction with `CrawlScrapeFormats::ScreenshotFullPage`.
			`#[serde(rename = "screenshot")]`
			`Screenshot,`

			`/// Will result in a URL to a full-page screenshot of the page.`
			`///`
			/// Can not be used in conjunction with `CrawlScrapeFormats::Screenshot`.
			`#[serde(rename = "screenshot@fullPage")]`
			`ScreenshotFullPage,`
			`}`

			`impl From<CrawlScrapeFormats> for ScrapeFormats {`
			`fn from(value: CrawlScrapeFormats) -> Self {`
			`match value {`
			`CrawlScrapeFormats::Markdown => Self::Markdown,`
			`CrawlScrapeFormats::HTML => Self::HTML,`
			`CrawlScrapeFormats::RawHTML => Self::RawHTML,`
			`CrawlScrapeFormats::Links => Self::Links,`
			`CrawlScrapeFormats::Screenshot => Self::Screenshot,`
			`CrawlScrapeFormats::ScreenshotFullPage => Self::ScreenshotFullPage,`
			`}`
			`}`
			`}`

			`#[serde_with::skip_serializing_none]`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`#[derive(Deserialize, Serialize, Debug, Default, Clone)]`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`#[serde(rename_all = "camelCase")]`
			`pub struct CrawlScrapeOptions {`
			/// Formats to extract from the page. (default: `[ Markdown ]`)
			`pub formats: Option<Vec<CrawlScrapeFormats>>,`

			/// Only extract the main content of the page, excluding navigation and other miscellaneous content. (default: `true`)
			`pub only_main_content: Option<bool>,`

			`/// HTML tags to exclusively include.`
			`///`
			/// For example, if you pass `div`, you will only get content from `<div>`s and their children.
			`pub include_tags: Option<Vec<String>>,`

			`/// HTML tags to exclude.`
			`///`
			/// For example, if you pass `img`, you will never get image URLs in your results.
			`pub exclude_tags: Option<Vec<String>>,`

			`/// Additional HTTP headers to use when loading the page.`
			`pub headers: Option<HashMap<String, String>>,`

			// Amount of time to wait after loading the page, and before grabbing the content, in milliseconds. (default: `0`)
			`pub wait_for: Option<u32>,`

			// Timeout before returning an error, in milliseconds. (default: `60000`)
			`pub timeout: Option<u32>,`
			`}`

			`impl From<CrawlScrapeOptions> for ScrapeOptions {`
			`fn from(value: CrawlScrapeOptions) -> Self {`
			`ScrapeOptions {`
			`formats: value.formats.map(\|formats\| formats.into_iter().map(\|x\| x.into()).collect()),`
			`only_main_content: value.only_main_content,`
			`include_tags: value.include_tags,`
			`exclude_tags: value.exclude_tags,`
			`headers: value.headers,`
			`wait_for: value.wait_for,`
			`timeout: value.timeout,`
			`..Default::default()`
			`}`
			`}`
			`}`

			`#[serde_with::skip_serializing_none]`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`#[derive(Deserialize, Serialize, Debug, Default, Clone)]`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`#[serde(rename_all = "camelCase")]`
			`pub struct CrawlOptions {`
			`/// Options to pass through to the scraper.`
			`pub scrape_options: Option<CrawlScrapeOptions>,`

			`/// URL RegEx patterns to (exclusively) include.`
			`///`
			/// For example, if you specified `"blog"`, only pages that have `blog` somewhere in the URL would be crawled.
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`pub include_paths: Option<Vec<String>>,`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00
			`/// URL RegEx patterns to exclude.`
			`///`
			/// For example, if you specified `"blog"`, pages that have `blog` somewhere in the URL would not be crawled.
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`pub exclude_paths: Option<Vec<String>>,`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00
			/// Maximum URL depth to crawl, relative to the base URL. (default: `2`)
			`pub max_depth: Option<u32>,`

			/// Tells the crawler to ignore the sitemap when crawling. (default: `true`)
			`pub ignore_sitemap: Option<bool>,`

			/// Maximum number of pages to crawl. (default: `10`)
			`pub limit: Option<u32>,`

			/// Allows the crawler to navigate links that are backwards in the URL hierarchy. (default: `false`)
			`pub allow_backward_links: Option<bool>,`

			/// Allows the crawler to follow links to external URLs. (default: `false`)
			`pub allow_external_links: Option<bool>,`

			`/// URL to send Webhook crawl events to.`
			`pub webhook: Option<String>,`

			`/// Idempotency key to send to the crawl endpoint.`
			`#[serde(skip)]`
			`pub idempotency_key: Option<String>,`

			/// When using `FirecrawlApp::crawl_url`, this is how often the status of the job should be checked, in milliseconds. (default: `2000`)
			`#[serde(skip)]`
			`pub poll_interval: Option<u64>,`
			`}`

			`#[derive(Deserialize, Serialize, Debug, Default)]`
			`#[serde(rename_all = "camelCase")]`
			`struct CrawlRequestBody {`
			`url: String,`

			`#[serde(flatten)]`
			`options: CrawlOptions,`
			`}`

			`#[derive(Deserialize, Serialize, Debug, Default)]`
			`#[serde(rename_all = "camelCase")]`
			`struct CrawlResponse {`
			/// This will always be `true` due to `FirecrawlApp::handle_response`.
			`/// No need to expose.`
			`success: bool,`

			`/// The resulting document.`
			`data: Document,`
			`}`

			`#[derive(Deserialize, Serialize, Debug, PartialEq, Eq, Clone, Copy)]`
			`#[serde(rename_all = "camelCase")]`
			`pub enum CrawlStatusTypes {`
			`/// The crawl job is in progress.`
			`Scraping,`

			`/// The crawl job has been completed successfully.`
			`Completed,`

			`/// The crawl job has failed.`
			`Failed,`

			`/// The crawl job has been cancelled.`
			`Cancelled,`
			`}`

			`#[serde_with::skip_serializing_none]`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`#[derive(Deserialize, Serialize, Debug, Clone)]`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`#[serde(rename_all = "camelCase")]`
			`pub struct CrawlStatus {`
			`/// The status of the crawl.`
			`pub status: CrawlStatusTypes,`

			`/// Number of pages that will be scraped in total. This number may grow as the crawler discovers new pages.`
			`pub total: u32,`

			`/// Number of pages that have been successfully scraped.`
			`pub completed: u32,`

			`/// Amount of credits used by the crawl job.`
			`pub credits_used: u32,`

			`/// Expiry time of crawl data. After this date, the crawl data will be unavailable from the API.`
			`pub expires_at: String, // TODO: parse into date`

			`/// URL to call to get the next batch of documents.`
			`/// Unless you are sidestepping the SDK, you do not need to deal with this.`
			`pub next: Option<String>,`

			`/// List of documents returned by the crawl`
			`pub data: Vec<Document>,`
			`}`

			`#[derive(Deserialize, Serialize, Debug, Clone)]`
			`#[serde(rename_all = "camelCase")]`
			`pub struct CrawlAsyncResponse {`
			`success: bool,`

			`/// Crawl ID`
			`pub id: String,`

			`/// URL to get the status of the crawl job`
			`pub url: String,`
			`}`

			`impl FirecrawlApp {`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`/// Initiates a crawl job for a URL using the Firecrawl API.`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`pub async fn crawl_url_async(`
			`&self,`
			`url: impl AsRef<str>,`
			`options: Option<CrawlOptions>,`
			`) -> Result<CrawlAsyncResponse, FirecrawlError> {`
			`let body = CrawlRequestBody {`
			`url: url.as_ref().to_string(),`
			`options: options.unwrap_or_default(),`
			`};`

			`let headers = self.prepare_headers(body.options.idempotency_key.as_ref());`

			`let response = self`
			`.client`
			`.post(&format!("{}{}/crawl", self.api_url, API_VERSION))`
			`.headers(headers.clone())`
			`.json(&body)`
			`.send()`
			`.await`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`.map_err(\|e\| FirecrawlError::HttpError(format!("Crawling {:?}", url.as_ref()), e))?;`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00
			`self.handle_response::<CrawlAsyncResponse>(response, "start crawl job").await`
			`}`

Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			/// Performs a crawl job for a URL using the Firecrawl API, waiting for the end result. This may take a long time depending on the size of the target page and your options (namely `CrawlOptions.limit`).
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`pub async fn crawl_url(`
			`&self,`
			`url: impl AsRef<str>,`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`options: impl Into<Option<CrawlOptions>>,`
			`) -> Result<CrawlStatus, FirecrawlError> {`
			`let options = options.into();`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`let poll_interval = options.as_ref().and_then(\|x\| x.poll_interval).unwrap_or(2000);`
			`let res = self.crawl_url_async(url, options).await?;`

			`self.monitor_job_status(&res.id, poll_interval).await`
			`}`

feat(sdk/rust/crawl): paginate through results 2024-09-20 19:40:32 +02:00			`async fn check_crawl_status_next(&self, next: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {`
			`let response = self`
			`.client`
			`.get(next.as_ref())`
			`.headers(self.prepare_headers(None))`
			`.send()`
			`.await`
			`.map_err(\|e\| FirecrawlError::HttpError(format!("Paginating crawl using URL {:?}", next.as_ref()), e))?;`

			`self.handle_response(response, format!("Paginating crawl using URL {:?}", next.as_ref())).await`
			`}`

Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			/// Checks for the status of a crawl, based on the crawl's ID. To be used in conjunction with `FirecrawlApp::crawl_url_async`.
			`pub async fn check_crawl_status(&self, id: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`let response = self`
			`.client`
			`.get(&format!(`
			`"{}{}/crawl/{}",`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`self.api_url, API_VERSION, id.as_ref()`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`))`
			`.headers(self.prepare_headers(None))`
			`.send()`
			`.await`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`.map_err(\|e\| FirecrawlError::HttpError(format!("Checking status of crawl {}", id.as_ref()), e))?;`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00
Update docs and example 2024-09-20 20:08:33 +02:00			`let mut status: CrawlStatus = self.handle_response(response, format!("Checking status of crawl {}", id.as_ref())).await?;`

			`if status.status == CrawlStatusTypes::Completed {`
			`while let Some(next) = status.next {`
			`let new_status = self.check_crawl_status_next(next).await?;`
			`status.data.extend_from_slice(&new_status.data);`
			`status.next = new_status.next;`
			`}`
			`}`

			`Ok(status)`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`}`

			`async fn monitor_job_status(`
			`&self,`
			`id: &str,`
			`poll_interval: u64,`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`) -> Result<CrawlStatus, FirecrawlError> {`
Update docs and example 2024-09-20 20:08:33 +02:00			`loop {`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`let status_data = self.check_crawl_status(id).await?;`
			`match status_data.status {`
			`CrawlStatusTypes::Completed => {`
feat(sdk/rust/crawl): paginate through results 2024-09-20 19:40:32 +02:00			`break Ok(status_data);`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`}`
			`CrawlStatusTypes::Scraping => {`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`tokio::time::sleep(tokio::time::Duration::from_millis(poll_interval)).await;`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`}`
			`CrawlStatusTypes::Failed => {`
feat(sdk/rust/crawl): paginate through results 2024-09-20 19:40:32 +02:00			`break Err(FirecrawlError::CrawlJobFailed(format!(`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`"Crawl job failed."`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`), status_data));`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`}`
			`CrawlStatusTypes::Cancelled => {`
feat(sdk/rust/crawl): paginate through results 2024-09-20 19:40:32 +02:00			`break Err(FirecrawlError::CrawlJobFailed(format!(`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`"Crawl job was cancelled."`
Rust SDK 1.0.0 2024-09-20 19:36:07 +02:00			`), status_data));`
feat(sdk/rust): first batch of changes for 1.0.0 2024-09-19 22:22:57 +02:00			`}`
			`}`
			`}`
			`}`
			`}`