mirror of
https://github.com/towfiqi/serpbear
synced 2025-06-26 18:15:54 +00:00
Merge pull request #273 from phoehnel/dev/thorw-error-on-empty-results
Add error message, if returned search HTML does not contain required elements
This commit is contained in:
commit
56d8b660c5
@ -16,6 +16,13 @@ const proxy:ScraperSettings = {
|
||||
|
||||
const $ = cheerio.load(content);
|
||||
let lastPosition = 0;
|
||||
const hasValidContent = $('body').find('#main')
|
||||
if (hasValidContent.length == 0) {
|
||||
const msg = '[ERROR] Scraped search results from proxy do not adhere to expected format. Unable to parse results';
|
||||
console.log(msg);
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
const mainContent = $('body').find('#main');
|
||||
const children = $(mainContent).find('h3');
|
||||
|
||||
|
@ -127,11 +127,15 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
|
||||
refreshedResults.error = scraperError || 'Unknown Error';
|
||||
if (settings.scraper_type === 'proxy' && error && error.response && error.response.statusText) {
|
||||
refreshedResults.error = `[${error.response.status}] ${error.response.statusText}`;
|
||||
} else if (settings.scraper_type === 'proxy' && error) {
|
||||
refreshedResults.error = error;
|
||||
}
|
||||
|
||||
console.log('[ERROR] Scraping Keyword : ', keyword.keyword, '. Error: ', error && error.response && error.response.statusText);
|
||||
console.log('[ERROR] Scraping Keyword : ', keyword.keyword);
|
||||
if (!(error && error.response && error.response.statusText)) {
|
||||
console.log('[ERROR_MESSAGE]: ', error);
|
||||
} else {
|
||||
console.log('[ERROR_MESSAGE]: ', error && error.response && error.response.statusText);
|
||||
}
|
||||
}
|
||||
|
||||
@ -148,9 +152,17 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
|
||||
const extractedResult = [];
|
||||
|
||||
const $ = cheerio.load(content);
|
||||
const hasValidContent = [...$('body').find('#search'), ...$('body').find('#rso')];
|
||||
if (hasValidContent.length == 0) {
|
||||
const msg = '[ERROR] Scraped search results do not adhere to expected format. Unable to parse results';
|
||||
console.log(msg);
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
const hasNumberofResult = $('body').find('#search > div > div');
|
||||
const searchResultItems = hasNumberofResult.find('h3');
|
||||
let lastPosition = 0;
|
||||
console.log('Scraped search results contain ', searchResultItems.length, ' desktop results.');
|
||||
|
||||
for (let i = 0; i < searchResultItems.length; i += 1) {
|
||||
if (searchResultItems[i]) {
|
||||
@ -161,11 +173,12 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
|
||||
extractedResult.push({ title, url, position: lastPosition });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mobile Scraper
|
||||
if (extractedResult.length === 0 && device === 'mobile') {
|
||||
// Mobile Scraper
|
||||
if (extractedResult.length === 0 && device === 'mobile') {
|
||||
const items = $('body').find('#rso > div');
|
||||
console.log('Scraped search results contain ', items.length, ' mobile results.');
|
||||
for (let i = 0; i < items.length; i += 1) {
|
||||
const item = $(items[i]);
|
||||
const linkDom = item.find('a[role="presentation"]');
|
||||
@ -181,7 +194,7 @@ export const extractScrapedResult = (content: string, device: string): SearchRes
|
||||
}
|
||||
}
|
||||
|
||||
return extractedResult;
|
||||
return extractedResult;
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user