firecrawl/apps/api/src/lib/entities.ts

103 lines
2.3 KiB
TypeScript
Raw Normal View History

2024-04-15 17:01:47 -04:00
export interface Progress {
current: number;
total: number;
status: string;
metadata?: {
sourceURL?: string;
[key: string]: any;
};
currentDocumentUrl?: string;
2024-05-04 12:30:12 -07:00
currentDocument?: Document;
2024-04-15 17:01:47 -04:00
}
2024-04-17 18:24:46 -07:00
export type PageOptions = {
onlyMainContent?: boolean;
2024-04-23 15:28:32 -07:00
fallback?: boolean;
fetchPageContent?: boolean;
2024-04-23 16:45:06 -07:00
2024-04-17 18:24:46 -07:00
};
2024-04-23 15:44:11 -07:00
export type ExtractorOptions = {
mode: "markdown" | "llm-extraction";
extractionPrompt?: string;
extractionSchema?: Record<string, any>;
}
2024-04-23 15:44:11 -07:00
export type SearchOptions = {
limit?: number;
2024-04-23 16:45:06 -07:00
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
2024-04-23 15:44:11 -07:00
};
2024-04-17 18:24:46 -07:00
export type WebScraperOptions = {
urls: string[];
mode: "single_urls" | "sitemap" | "crawl";
crawlerOptions?: {
returnOnlyUrls?: boolean;
includes?: string[];
excludes?: string[];
maxCrawledLinks?: number;
limit?: number;
generateImgAltText?: boolean;
replaceAllPathsWithAbsolutePaths?: boolean;
2024-04-17 18:24:46 -07:00
};
pageOptions?: PageOptions;
extractorOptions?: ExtractorOptions;
2024-04-17 18:24:46 -07:00
concurrentRequests?: number;
};
2024-04-20 11:59:42 -07:00
export interface DocumentUrl {
url: string;
}
2024-04-15 17:01:47 -04:00
export class Document {
id?: string;
2024-04-23 17:14:34 -07:00
url?: string; // Used only in /search for now
2024-04-15 17:01:47 -04:00
content: string;
markdown?: string;
2024-04-30 09:20:15 -07:00
html?: string;
llm_extraction?: Record<string, any>;
2024-04-15 17:01:47 -04:00
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: {
sourceURL?: string;
[key: string]: any;
};
childrenLinks?: string[];
2024-04-18 11:43:57 -03:00
provider?: string;
2024-04-15 17:01:47 -04:00
constructor(data: Partial<Document>) {
if (!data.content) {
throw new Error("Missing required fields");
}
this.content = data.content;
this.createdAt = data.createdAt || new Date();
this.updatedAt = data.updatedAt || new Date();
this.type = data.type || "unknown";
this.metadata = data.metadata || { sourceURL: "" };
this.markdown = data.markdown || "";
this.childrenLinks = data.childrenLinks || undefined;
2024-04-18 11:43:57 -03:00
this.provider = data.provider || undefined;
2024-04-15 17:01:47 -04:00
}
}
2024-04-24 10:11:01 -07:00
export class SearchResult {
url: string;
title: string;
description: string;
constructor(url: string, title: string, description: string) {
this.url = url;
this.title = title;
this.description = description;
}
toString(): string {
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
}
}