serpbear/utils/refresh.ts

137 lines
5.7 KiB
TypeScript

import { performance } from 'perf_hooks';
import { setTimeout as sleep } from 'timers/promises';
import { RefreshResult, removeFromRetryQueue, retryScrape, scrapeKeywordFromGoogle } from './scraper';
import parseKeywords from './parseKeywords';
import Keyword from '../database/models/keyword';
/**
* Refreshes the Keywords position by Scraping Google Search Result by
* Determining whether the keywords should be scraped in Parallel or not
* @param {Keyword[]} rawkeyword - Keywords to scrape
* @param {SettingsType} settings - The App Settings that contain the Scraper settings
* @returns {Promise}
*/
const refreshAndUpdateKeywords = async (rawkeyword:Keyword[], settings:SettingsType): Promise<KeywordType[]> => {
const keywords:KeywordType[] = rawkeyword.map((el) => el.get({ plain: true }));
if (!rawkeyword || rawkeyword.length === 0) { return []; }
const start = performance.now();
const updatedKeywords: KeywordType[] = [];
if (['scrapingant', 'serpapi', 'searchapi'].includes(settings.scraper_type)) {
const refreshedResults = await refreshParallel(keywords, settings);
if (refreshedResults.length > 0) {
for (const keyword of rawkeyword) {
const refreshedkeywordData = refreshedResults.find((k) => k && k.ID === keyword.ID);
if (refreshedkeywordData) {
const updatedkeyword = await updateKeywordPosition(keyword, refreshedkeywordData, settings);
updatedKeywords.push(updatedkeyword);
}
}
}
} else {
for (const keyword of rawkeyword) {
console.log('START SCRAPE: ', keyword.keyword);
const updatedkeyword = await refreshAndUpdateKeyword(keyword, settings);
updatedKeywords.push(updatedkeyword);
if (keywords.length > 0 && settings.scrape_delay && settings.scrape_delay !== '0') {
await sleep(parseInt(settings.scrape_delay, 10));
}
}
}
const end = performance.now();
console.log(`time taken: ${end - start}ms`);
return updatedKeywords;
};
/**
* Scrape Serp for given keyword and update the position in DB.
* @param {Keyword} keyword - Keywords to scrape
* @param {SettingsType} settings - The App Settings that contain the Scraper settings
* @returns {Promise<KeywordType>}
*/
const refreshAndUpdateKeyword = async (keyword: Keyword, settings: SettingsType): Promise<KeywordType> => {
const currentkeyword = keyword.get({ plain: true });
const refreshedkeywordData = await scrapeKeywordFromGoogle(currentkeyword, settings);
const updatedkeyword = refreshedkeywordData ? await updateKeywordPosition(keyword, refreshedkeywordData, settings) : currentkeyword;
return updatedkeyword;
};
/**
* Processes the scraped data for the given keyword and updates the keyword serp position in DB.
* @param {Keyword} keywordRaw - Keywords to Update
* @param {RefreshResult} udpatedkeyword - scraped Data for that Keyword
* @param {SettingsType} settings - The App Settings that contain the Scraper settings
* @returns {Promise<KeywordType>}
*/
export const updateKeywordPosition = async (keywordRaw:Keyword, udpatedkeyword: RefreshResult, settings: SettingsType): Promise<KeywordType> => {
const keywordPrased = parseKeywords([keywordRaw.get({ plain: true })]);
const keyword = keywordPrased[0];
// const udpatedkeyword = refreshed;
let updated = keyword;
if (udpatedkeyword && keyword) {
const newPos = udpatedkeyword.position;
const { history } = keyword;
const theDate = new Date();
const dateKey = `${theDate.getFullYear()}-${theDate.getMonth() + 1}-${theDate.getDate()}`;
history[dateKey] = newPos;
const updatedVal = {
position: newPos,
updating: false,
url: udpatedkeyword.url,
lastResult: udpatedkeyword.result,
history,
lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : theDate.toJSON(),
lastUpdateError: udpatedkeyword.error
? JSON.stringify({ date: theDate.toJSON(), error: `${udpatedkeyword.error}`, scraper: settings.scraper_type })
: 'false',
};
// If failed, Add to Retry Queue Cron
if (udpatedkeyword.error && settings?.scrape_retry) {
await retryScrape(keyword.ID);
} else {
await removeFromRetryQueue(keyword.ID);
}
// Update the Keyword Position in Database
try {
await keywordRaw.update({
...updatedVal,
lastResult: Array.isArray(udpatedkeyword.result) ? JSON.stringify(udpatedkeyword.result) : udpatedkeyword.result,
history: JSON.stringify(history),
});
console.log('[SUCCESS] Updating the Keyword: ', keyword.keyword);
updated = { ...keyword, ...updatedVal, lastUpdateError: JSON.parse(updatedVal.lastUpdateError) };
} catch (error) {
console.log('[ERROR] Updating SERP for Keyword', keyword.keyword, error);
}
}
return updated;
};
/**
* Scrape Google Keyword Search Result in Parallel.
* @param {KeywordType[]} keywords - Keywords to scrape
* @param {SettingsType} settings - The App Settings that contain the Scraper settings
* @returns {Promise}
*/
const refreshParallel = async (keywords:KeywordType[], settings:SettingsType) : Promise<RefreshResult[]> => {
const promises: Promise<RefreshResult>[] = keywords.map((keyword) => {
return scrapeKeywordFromGoogle(keyword, settings);
});
return Promise.all(promises).then((promiseData) => {
console.log('ALL DONE!!!');
return promiseData;
}).catch((err) => {
console.log(err);
return [];
});
};
export default refreshAndUpdateKeywords;