firecrawl/apps/api/src/scraper/scrapeURL/lib/fetch.ts

import { Logger } from "winston";
import { z, ZodError } from "zod";
import { v4 as uuid } from "uuid";
import * as Sentry from "@sentry/node";

export type RobustFetchParams<Schema extends z.Schema<any>> = {
  url: string;
  logger: Logger;
  method: "GET" | "POST" | "DELETE" | "PUT";
  body?: any;
  headers?: Record<string, string>;
  schema?: Schema;
  dontParseResponse?: boolean;
  ignoreResponse?: boolean;
  ignoreFailure?: boolean;
  requestId?: string;
  tryCount?: number;
  tryCooldown?: number;
};

export async function robustFetch<
  Schema extends z.Schema<any>,
  Output = z.infer<Schema>,
>({
  url,
  logger,
  method = "GET",
  body,
  headers,
  schema,
  ignoreResponse = false,
  ignoreFailure = false,
  requestId = uuid(),
  tryCount = 1,
  tryCooldown,
}: RobustFetchParams<Schema>): Promise<Output> {
  const params = {
    url,
    logger,
    method,
    body,
    headers,
    schema,
    ignoreResponse,
    ignoreFailure,
    tryCount,
    tryCooldown,
  };

  let request: Response;
  try {
    request = await fetch(url, {
      method,
      headers: {
        ...(body instanceof FormData
          ? {}
          : body !== undefined
            ? {
                "Content-Type": "application/json",
              }
            : {}),
        ...(headers !== undefined ? headers : {}),
      },
      ...(body instanceof FormData
        ? {
            body,
          }
        : body !== undefined
          ? {
              body: JSON.stringify(body),
            }
          : {}),
    });
  } catch (error) {
    if (!ignoreFailure) {
      Sentry.captureException(error);
      if (tryCount > 1) {
        logger.debug(
          "Request failed, trying " + (tryCount - 1) + " more times",
          { params, error, requestId },
        );
        return await robustFetch({
          ...params,
          requestId,
          tryCount: tryCount - 1,
        });
      } else {
        logger.debug("Request failed", { params, error, requestId });
        throw new Error("Request failed", {
          cause: {
            params,
            requestId,
            error,
          },
        });
      }
    } else {
      return null as Output;
    }
  }

  if (ignoreResponse === true) {
    return null as Output;
  }

  const response = {
    status: request.status,
    headers: request.headers,
    body: await request.text(), // NOTE: can this throw an exception?
  };

  if (request.status >= 300) {
    if (tryCount > 1) {
      logger.debug(
        "Request sent failure status, trying " + (tryCount - 1) + " more times",
        { params, request, response, requestId },
      );
      if (tryCooldown !== undefined) {
        await new Promise((resolve) =>
          setTimeout(() => resolve(null), tryCooldown),
        );
      }
      return await robustFetch({
        ...params,
        requestId,
        tryCount: tryCount - 1,
      });
    } else {
      logger.debug("Request sent failure status", {
        params,
        request,
        response,
        requestId,
      });
      throw new Error("Request sent failure status", {
        cause: {
          params,
          request,
          response,
          requestId,
        },
      });
    }
  }

  let data: Output;
  try {
    data = JSON.parse(response.body);
  } catch (error) {
    logger.debug("Request sent malformed JSON", {
      params,
      request,
      response,
      requestId,
    });
    throw new Error("Request sent malformed JSON", {
      cause: {
        params,
        request,
        response,
        requestId,
      },
    });
  }

  if (schema) {
    try {
      data = schema.parse(data);
    } catch (error) {
      if (error instanceof ZodError) {
        logger.debug("Response does not match provided schema", {
          params,
          request,
          response,
          requestId,
          error,
          schema,
        });
        throw new Error("Response does not match provided schema", {
          cause: {
            params,
            request,
            response,
            requestId,
            error,
            schema,
          },
        });
      } else {
        logger.debug("Parsing response with provided schema failed", {
          params,
          request,
          response,
          requestId,
          error,
          schema,
        });
        throw new Error("Parsing response with provided schema failed", {
          cause: {
            params,
            request,
            response,
            requestId,
            error,
            schema,
          },
        });
      }
    }
  }

  return data;
}