fix: implement per-document cost tracking architecture

- Create DocumentWithCostTracking interface for proper return type - Modify scrapeSearchResult to return individual costTracking instances - Update billing logic to use separate costTracking per document - Fix deep research service to handle new interface structure - Add stealth mode and PDF billing test cases - Prevents shared state accumulation in billing calculations Addresses GitHub comments from mogery and micahstairs: - Reworks cost tracking to return separate instances per document - Restores bypassBilling: false flag as requested - Implements proper per-document billing calculation Co-Authored-By: Micah Stairs <micah@sideguide.dev>
2025-06-27 00:41:33 +00:00 · 2025-06-26 13:43:53 +00:00 · 2025-06-26 13:43:53 +00:00 · 90469270e0
commit 90469270e0
parent 9336e275f4
2 changed files with 81 additions and 37 deletions
--- a/apps/api/src/controllers/v1/search.ts
+++ b/apps/api/src/controllers/v1/search.ts
@ -24,6 +24,11 @@ import type { Logger } from "winston";
 import { CostTracking } from "../../lib/extract/extraction-service";
 import { calculateCreditsToBeBilled } from "../../lib/scrape-billing";

+interface DocumentWithCostTracking {
+  document: Document;
+  costTracking: CostTracking;
+}
+
 // Used for deep research
 export async function searchAndScrapeSearchResult(
  query: string,
@ -34,16 +39,15 @@ export async function searchAndScrapeSearchResult(
    scrapeOptions: ScrapeOptions;
  },
  logger: Logger,
-  costTracking: CostTracking,
  flags: TeamFlags,
-): Promise<Document[]> {
+): Promise<DocumentWithCostTracking[]> {
  try {
    const searchResults = await search({
      query,
      num_results: 5,
    });

-    const documents = await Promise.all(
+    const documentsWithCostTracking = await Promise.all(
      searchResults.map((result) =>
        scrapeSearchResult(
          {
@ -53,13 +57,12 @@ export async function searchAndScrapeSearchResult(
          },
          options,
          logger,
-          costTracking,
          flags,
        ),
      ),
    );

-    return documents;
+    return documentsWithCostTracking;
  } catch (error) {
    return [];
  }
@ -74,17 +77,18 @@ async function scrapeSearchResult(
    scrapeOptions: ScrapeOptions;
  },
  logger: Logger,
-  costTracking: CostTracking,
  flags: TeamFlags,
  directToBullMQ: boolean = false,
  isSearchPreview: boolean = false,
-): Promise<Document> {
+): Promise<DocumentWithCostTracking> {
  const jobId = uuidv4();
  const jobPriority = await getJobPriority({
    team_id: options.teamId,
    basePriority: 10,
  });
  
+  const costTracking = new CostTracking();
+
  try {
    if (isUrlBlocked(searchResult.url, flags)) {
      throw new Error("Could not scrape url: " + BLOCKLISTED_URL_MESSAGE);
@ -104,7 +108,7 @@ async function scrapeSearchResult(
          ...options.scrapeOptions,
          maxAge: 4 * 60 * 60 * 1000,
        },
-        internalOptions: { teamId: options.teamId, bypassBilling: true },
+        internalOptions: { teamId: options.teamId, bypassBilling: false },
        origin: options.origin,
        is_scrape: true,
        startTime: Date.now(),
@ -117,6 +121,19 @@ async function scrapeSearchResult(

    const doc: Document = await waitForJob(jobId, options.timeout);
    
+    const actualCostTracking = new CostTracking();
+    const credits = await calculateCreditsToBeBilled(options.scrapeOptions, doc, actualCostTracking);
+    actualCostTracking.addCall({
+      type: "other",
+      metadata: {
+        module: "search",
+        operation: "scrape",
+        url: searchResult.url
+      },
+      cost: credits,
+      model: "search-scrape"
+    });
+
    logger.info("Scrape job completed", {
      scrapeId: jobId,
      url: searchResult.url,
@ -125,13 +142,17 @@ async function scrapeSearchResult(
    });
    await getScrapeQueue().remove(jobId);

-    // Move SERP results to top level
-    return {
+    const document = {
      title: searchResult.title,
      description: searchResult.description,
      url: searchResult.url,
      ...doc,
    };
+
+    return {
+      document,
+      costTracking: actualCostTracking,
+    };
  } catch (error) {
    logger.error(`Error in scrapeSearchResult: ${error}`, {
      scrapeId: jobId,
@ -143,8 +164,8 @@ async function scrapeSearchResult(
    if (error?.message?.includes("Could not scrape url")) {
      statusCode = 403;
    }
-    // Return a minimal document with SERP results at top level
-    return {
+    
+    const document: Document = {
      title: searchResult.title,
      description: searchResult.description,
      url: searchResult.url,
@ -154,6 +175,11 @@ async function scrapeSearchResult(
        proxyUsed: "basic",
      },
    };
+
+    return {
+      document,
+      costTracking: new CostTracking(),
+    };
  }
 }

@ -174,9 +200,11 @@ export async function searchController(
    data: [],
  };
  const startTime = new Date().getTime();
-  const costTracking = new CostTracking();
  const isSearchPreview = process.env.SEARCH_PREVIEW_TOKEN !== undefined && process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
  
+  let credits_billed = 0;
+  let allDocsWithCostTracking: DocumentWithCostTracking[] = [];
+
  try {
    req.body = searchRequestSchema.parse(req.body);

@ -242,18 +270,18 @@ export async function searchController(
            scrapeOptions: req.body.scrapeOptions,
          },
          logger,
-          costTracking,
          req.acuc?.flags ?? null,
          (req.acuc?.price_credits ?? 0) <= 3000,
          isSearchPreview,
        ),
      );

-      const docs = await Promise.all(scrapePromises);
+      const docsWithCostTracking = await Promise.all(scrapePromises);
      logger.info("Scraping completed", {
-        num_docs: docs.length,
+        num_docs: docsWithCostTracking.length,
      });

+      const docs = docsWithCostTracking.map(item => item.document);
      const filteredDocs = docs.filter(
        (doc) =>
          doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
@ -269,18 +297,34 @@ export async function searchController(
      } else {
        responseData.data = filteredDocs;
      }
-    }

-    let credits_billed = 0;
-    try {
-      credits_billed = await Promise.all(
-        responseData.data.map(async (document) => {
-          return await calculateCreditsToBeBilled(req.body.scrapeOptions, document, costTracking);
-        })
-      ).then(credits => credits.reduce((sum, credit) => sum + credit, 0));
-    } catch (error) {
-      logger.error("Error calculating credits for billing", { error });
-      credits_billed = responseData.data.length;
+      const finalDocsForBilling = responseData.data;
+      
+      const creditPromises = finalDocsForBilling.map(async (finalDoc) => {
+        const matchingDocWithCost = docsWithCostTracking.find(item => 
+          item.document.url === finalDoc.url
+        );
+        
+        if (matchingDocWithCost) {
+          return await calculateCreditsToBeBilled(
+            req.body.scrapeOptions, 
+            matchingDocWithCost.document, 
+            matchingDocWithCost.costTracking
+          );
+        } else {
+          return 1;
+        }
+      });
+      
+      try {
+        const individualCredits = await Promise.all(creditPromises);
+        credits_billed = individualCredits.reduce((sum, credit) => sum + credit, 0);
+      } catch (error) {
+        logger.error("Error calculating credits for billing", { error });
+        credits_billed = responseData.data.length;
+      }
+
+      allDocsWithCostTracking = docsWithCostTracking;
    }

    // Bill team once for all successful results
@ -317,7 +361,7 @@ export async function searchController(
        scrapeOptions: req.body.scrapeOptions,
        origin: req.body.origin,
        integration: req.body.integration,
-        cost_tracking: costTracking,
+        cost_tracking: allDocsWithCostTracking.length > 0 ? allDocsWithCostTracking[0].costTracking : new CostTracking(),
        credits_billed,
      },
      false,
--- a/apps/api/src/lib/deep-research/deep-research-service.ts
+++ b/apps/api/src/lib/deep-research/deep-research-service.ts
@ -134,7 +134,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
            maxAge: 4 * 60 * 60 * 1000,
            storeInCache: true,
          },
-        }, logger, costTracking, acuc?.flags ?? null);
+        }, logger, acuc?.flags ?? null);
        return response.length > 0 ? response : [];
      });

@ -164,10 +164,10 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
      // Filter out already seen URLs and track new ones
      const newSearchResults: typeof searchResults = [];
      for (const result of searchResults) {
-        if (!result.url || state.hasSeenUrl(result.url)) {
+        if (!result.document.url || state.hasSeenUrl(result.document.url)) {
          continue;
        }
-        state.addSeenUrl(result.url);
+        state.addSeenUrl(result.document.url);
        
        urlsAnalyzed++;
        if (urlsAnalyzed >= maxUrls) {
@ -183,10 +183,10 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
      }

      await state.addSources(newSearchResults.map((result) => ({
-        url: result.url ?? "",
-        title: result.title ?? "",
-        description: result.description ?? "",
-        icon: result.metadata?.favicon ?? "",
+        url: result.document.url ?? "",
+        title: result.document.title ?? "",
+        description: result.document.description ?? "",
+        icon: result.document.metadata?.favicon ?? "",
      })));
      logger.debug(
        "[Deep Research] New unique results count:",
@ -218,8 +218,8 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {

      await state.addFindings(
        newSearchResults.map((result) => ({
-          text: result.markdown ?? "",
-          source: result.url ?? "",
+          text: result.document.markdown ?? "",
+          source: result.document.url ?? "",
        })),
      );