Merge pull request #485 from mendableai/bugfix/issue-435

[Bug] Issue with crawl going beyond Limit
This commit is contained in:
Nicolas 2024-07-31 18:10:29 -04:00 committed by GitHub
commit 72eebb0aea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 4 additions and 3 deletions

View File

@ -164,7 +164,7 @@ export class WebCrawler {
concurrencyLimit,
inProgress
);
if (
urls.length === 0 &&
this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0
@ -420,9 +420,10 @@ export class WebCrawler {
".woff",
".ttf",
".woff2",
".webp"
".webp",
".inc"
];
return fileExtensions.some((ext) => url.endsWith(ext));
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
}
private isSocialMediaOrEmail(url: string): boolean {