use serde::{Deserialize, Serialize}; use serde_json::Value; #[serde_with::skip_serializing_none] #[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct DocumentMetadata { // firecrawl specific #[serde(rename = "sourceURL")] pub source_url: String, pub status_code: u16, pub error: Option, // basic meta tags pub title: Option, pub description: Option, pub language: Option, pub keywords: Option, pub robots: Option, // og: namespace pub og_title: Option, pub og_description: Option, pub og_url: Option, pub og_image: Option, pub og_audio: Option, pub og_determiner: Option, pub og_locale: Option, pub og_locale_alternate: Option>, pub og_site_name: Option, pub og_video: Option, // article: namespace pub article_section: Option, pub article_tag: Option, pub published_time: Option, pub modified_time: Option, // dc./dcterms. namespace pub dcterms_keywords: Option, pub dc_description: Option, pub dc_subject: Option, pub dcterms_subject: Option, pub dcterms_audience: Option, pub dc_type: Option, pub dcterms_type: Option, pub dc_date: Option, pub dc_date_created: Option, pub dcterms_created: Option, } #[serde_with::skip_serializing_none] #[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct Document { /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default) pub markdown: Option, /// The HTML of the page, present if `ScrapeFormats::HTML` is present in `ScrapeOptions.formats`. /// /// This contains HTML that has non-content tags removed. If you need the original HTML, use `ScrapeFormats::RawHTML`. pub html: Option, /// The raw HTML of the page, present if `ScrapeFormats::RawHTML` is present in `ScrapeOptions.formats`. /// /// This contains the original, untouched HTML on the page. If you only need human-readable content, use `ScrapeFormats::HTML`. pub raw_html: Option, /// The URL to the screenshot of the page, present if `ScrapeFormats::Screenshot` or `ScrapeFormats::ScreenshotFullPage` is present in `ScrapeOptions.formats`. pub screenshot: Option, /// A list of the links on the page, present if `ScrapeFormats::Links` is present in `ScrapeOptions.formats`. pub links: Option>, /// The extracted data from the page, present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`. /// If `ScrapeOptions.extract.schema` is `Some`, this `Value` is guaranteed to match the provided schema. pub extract: Option, /// The metadata from the page. pub metadata: DocumentMetadata, /// Can be present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`. /// The warning message will contain any errors encountered during the extraction. pub warning: Option, }