import { performance } from 'perf_hooks'; import { setTimeout as sleep } from 'timers/promises'; import { RefreshResult, removeFromRetryQueue, retryScrape, scrapeKeywordFromGoogle } from './scraper'; import parseKeywords from './parseKeywords'; import Keyword from '../database/models/keyword'; /** * Refreshes the Keywords position by Scraping Google Search Result by * Determining whether the keywords should be scraped in Parallel or not * @param {Keyword[]} rawkeyword - Keywords to scrape * @param {SettingsType} settings - The App Settings that contain the Scraper settings * @returns {Promise} */ const refreshAndUpdateKeywords = async (rawkeyword:Keyword[], settings:SettingsType): Promise => { const keywords:KeywordType[] = rawkeyword.map((el) => el.get({ plain: true })); if (!rawkeyword || rawkeyword.length === 0) { return []; } const start = performance.now(); const updatedKeywords: KeywordType[] = []; if (['scrapingant', 'serpapi', 'searchapi'].includes(settings.scraper_type)) { const refreshedResults = await refreshParallel(keywords, settings); if (refreshedResults.length > 0) { for (const keyword of rawkeyword) { const refreshedkeywordData = refreshedResults.find((k) => k && k.ID === keyword.ID); if (refreshedkeywordData) { const updatedkeyword = await updateKeywordPosition(keyword, refreshedkeywordData, settings); updatedKeywords.push(updatedkeyword); } } } } else { for (const keyword of rawkeyword) { console.log('START SCRAPE: ', keyword.keyword); const updatedkeyword = await refreshAndUpdateKeyword(keyword, settings); updatedKeywords.push(updatedkeyword); if (keywords.length > 0 && settings.scrape_delay && settings.scrape_delay !== '0') { await sleep(parseInt(settings.scrape_delay, 10)); } } } const end = performance.now(); console.log(`time taken: ${end - start}ms`); return updatedKeywords; }; /** * Scrape Serp for given keyword and update the position in DB. * @param {Keyword} keyword - Keywords to scrape * @param {SettingsType} settings - The App Settings that contain the Scraper settings * @returns {Promise} */ const refreshAndUpdateKeyword = async (keyword: Keyword, settings: SettingsType): Promise => { const currentkeyword = keyword.get({ plain: true }); const refreshedkeywordData = await scrapeKeywordFromGoogle(currentkeyword, settings); const updatedkeyword = refreshedkeywordData ? await updateKeywordPosition(keyword, refreshedkeywordData, settings) : currentkeyword; return updatedkeyword; }; /** * Processes the scraped data for the given keyword and updates the keyword serp position in DB. * @param {Keyword} keywordRaw - Keywords to Update * @param {RefreshResult} udpatedkeyword - scraped Data for that Keyword * @param {SettingsType} settings - The App Settings that contain the Scraper settings * @returns {Promise} */ export const updateKeywordPosition = async (keywordRaw:Keyword, udpatedkeyword: RefreshResult, settings: SettingsType): Promise => { const keywordPrased = parseKeywords([keywordRaw.get({ plain: true })]); const keyword = keywordPrased[0]; // const udpatedkeyword = refreshed; let updated = keyword; if (udpatedkeyword && keyword) { const newPos = udpatedkeyword.position; const { history } = keyword; const theDate = new Date(); const dateKey = `${theDate.getFullYear()}-${theDate.getMonth() + 1}-${theDate.getDate()}`; history[dateKey] = newPos; const updatedVal = { position: newPos, updating: false, url: udpatedkeyword.url, lastResult: udpatedkeyword.result, history, lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : theDate.toJSON(), lastUpdateError: udpatedkeyword.error ? JSON.stringify({ date: theDate.toJSON(), error: `${udpatedkeyword.error}`, scraper: settings.scraper_type }) : 'false', }; // If failed, Add to Retry Queue Cron if (udpatedkeyword.error && settings?.scrape_retry) { await retryScrape(keyword.ID); } else { await removeFromRetryQueue(keyword.ID); } // Update the Keyword Position in Database try { await keywordRaw.update({ ...updatedVal, lastResult: Array.isArray(udpatedkeyword.result) ? JSON.stringify(udpatedkeyword.result) : udpatedkeyword.result, history: JSON.stringify(history), }); console.log('[SUCCESS] Updating the Keyword: ', keyword.keyword); updated = { ...keyword, ...updatedVal, lastUpdateError: JSON.parse(updatedVal.lastUpdateError) }; } catch (error) { console.log('[ERROR] Updating SERP for Keyword', keyword.keyword, error); } } return updated; }; /** * Scrape Google Keyword Search Result in Parallel. * @param {KeywordType[]} keywords - Keywords to scrape * @param {SettingsType} settings - The App Settings that contain the Scraper settings * @returns {Promise} */ const refreshParallel = async (keywords:KeywordType[], settings:SettingsType) : Promise => { const promises: Promise[] = keywords.map((keyword) => { return scrapeKeywordFromGoogle(keyword, settings); }); return Promise.all(promises).then((promiseData) => { console.log('ALL DONE!!!'); return promiseData; }).catch((err) => { console.log(err); return []; }); }; export default refreshAndUpdateKeywords;