Merge pull request #273 from phoehnel/dev/thorw-error-on-empty-results

Add error message, if returned search HTML does not contain required elements
This commit is contained in:
Towfiq I. 2025-02-23 22:23:04 +06:00 committed by GitHub
commit 56d8b660c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 5 deletions

View File

@ -16,6 +16,13 @@ const proxy:ScraperSettings = {
const $ = cheerio.load(content);
let lastPosition = 0;
const hasValidContent = $('body').find('#main')
if (hasValidContent.length == 0) {
const msg = '[ERROR] Scraped search results from proxy do not adhere to expected format. Unable to parse results';
console.log(msg);
throw new Error(msg);
}
const mainContent = $('body').find('#main');
const children = $(mainContent).find('h3');

View File

@ -127,11 +127,15 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
refreshedResults.error = scraperError || 'Unknown Error';
if (settings.scraper_type === 'proxy' && error && error.response && error.response.statusText) {
refreshedResults.error = `[${error.response.status}] ${error.response.statusText}`;
} else if (settings.scraper_type === 'proxy' && error) {
refreshedResults.error = error;
}
console.log('[ERROR] Scraping Keyword : ', keyword.keyword, '. Error: ', error && error.response && error.response.statusText);
console.log('[ERROR] Scraping Keyword : ', keyword.keyword);
if (!(error && error.response && error.response.statusText)) {
console.log('[ERROR_MESSAGE]: ', error);
} else {
console.log('[ERROR_MESSAGE]: ', error && error.response && error.response.statusText);
}
}
@ -148,9 +152,17 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
const extractedResult = [];
const $ = cheerio.load(content);
const hasValidContent = [...$('body').find('#search'), ...$('body').find('#rso')];
if (hasValidContent.length == 0) {
const msg = '[ERROR] Scraped search results do not adhere to expected format. Unable to parse results';
console.log(msg);
throw new Error(msg);
}
const hasNumberofResult = $('body').find('#search > div > div');
const searchResultItems = hasNumberofResult.find('h3');
let lastPosition = 0;
console.log('Scraped search results contain ', searchResultItems.length, ' desktop results.');
for (let i = 0; i < searchResultItems.length; i += 1) {
if (searchResultItems[i]) {
@ -161,11 +173,12 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
extractedResult.push({ title, url, position: lastPosition });
}
}
}
}
// Mobile Scraper
if (extractedResult.length === 0 && device === 'mobile') {
// Mobile Scraper
if (extractedResult.length === 0 && device === 'mobile') {
const items = $('body').find('#rso > div');
console.log('Scraped search results contain ', items.length, ' mobile results.');
for (let i = 0; i < items.length; i += 1) {
const item = $(items[i]);
const linkDom = item.find('a[role="presentation"]');
@ -181,7 +194,7 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
}
}
return extractedResult;
return extractedResult;
};
/**