From 8e3c2b28550aafcdf4627724940a3b951672c496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 23:30:19 +0200 Subject: [PATCH] fix(crawler): verify URL --- apps/api/src/scraper/WebScraper/crawler.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 67f1c22..92b9ae4 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -108,7 +108,12 @@ export class WebCrawler { // Normalize the initial URL and the link to account for www and non-www versions const normalizedInitialUrl = new URL(this.initialUrl); - const normalizedLink = new URL(link); + let normalizedLink; + try { + normalizedLink = new URL(link); + } catch (_) { + return false; + } const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, ''); const linkHostname = normalizedLink.hostname.replace(/^www\./, '');