fix/check files on crawl

This commit is contained in:
rafaelsideguide 2024-10-14 15:44:45 -03:00
parent e40036caf7
commit 180801225b

View File

@ -136,6 +136,10 @@ export class WebCrawler {
return false; return false;
} }
if (this.isFile(link)) {
return false;
}
return true; return true;
}) })
.slice(0, limit); .slice(0, limit);
@ -478,7 +482,14 @@ export class WebCrawler {
".webp", ".webp",
".inc" ".inc"
]; ];
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
try {
const urlWithoutQuery = url.split('?')[0].toLowerCase();
return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
} catch (error) {
Logger.error(`Error processing URL in isFile: ${error}`);
return false;
}
} }
private isSocialMediaOrEmail(url: string): boolean { private isSocialMediaOrEmail(url: string): boolean {