Merge pull request #485 from mendableai/bugfix/issue-435
[Bug] Issue with crawl going beyond Limit
This commit is contained in:
commit
72eebb0aea
|
@ -164,7 +164,7 @@ export class WebCrawler {
|
||||||
concurrencyLimit,
|
concurrencyLimit,
|
||||||
inProgress
|
inProgress
|
||||||
);
|
);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
urls.length === 0 &&
|
urls.length === 0 &&
|
||||||
this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0
|
this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0
|
||||||
|
@ -420,9 +420,10 @@ export class WebCrawler {
|
||||||
".woff",
|
".woff",
|
||||||
".ttf",
|
".ttf",
|
||||||
".woff2",
|
".woff2",
|
||||||
".webp"
|
".webp",
|
||||||
|
".inc"
|
||||||
];
|
];
|
||||||
return fileExtensions.some((ext) => url.endsWith(ext));
|
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
|
||||||
}
|
}
|
||||||
|
|
||||||
private isSocialMediaOrEmail(url: string): boolean {
|
private isSocialMediaOrEmail(url: string): boolean {
|
||||||
|
|
Loading…
Reference in New Issue