From 1d6b2be95aa133b7998f5cf098f15aa32f5badd2 Mon Sep 17 00:00:00 2001 From: towfiqi Date: Thu, 2 Nov 2023 21:51:06 +0600 Subject: [PATCH] feat: Refresh All feature now shows update real-time --- pages/api/cron.ts | 2 +- pages/api/insight.ts | 5 +- pages/api/keywords.ts | 2 +- pages/api/refresh.ts | 57 +---------------------- utils/refresh.ts | 105 +++++++++++++++++++++++++++++++++++++----- utils/scraper.ts | 6 +-- 6 files changed, 103 insertions(+), 74 deletions(-) diff --git a/pages/api/cron.ts b/pages/api/cron.ts index 985db09..504df01 100644 --- a/pages/api/cron.ts +++ b/pages/api/cron.ts @@ -3,7 +3,7 @@ import db from '../../database/database'; import Keyword from '../../database/models/keyword'; import { getAppSettings } from './settings'; import verifyUser from '../../utils/verifyUser'; -import { refreshAndUpdateKeywords } from './refresh'; +import refreshAndUpdateKeywords from '../../utils/refresh'; type CRONRefreshRes = { started: boolean diff --git a/pages/api/insight.ts b/pages/api/insight.ts index 9e2f2d3..497f4a2 100644 --- a/pages/api/insight.ts +++ b/pages/api/insight.ts @@ -37,7 +37,10 @@ const getDomainSearchConsoleInsight = async (req: NextApiRequest, res: NextApiRe // First try and read the Local SC Domain Data file. const localSCData = await readLocalSCData(domainname); - if (localSCData && localSCData.stats && localSCData.stats.length) { + const oldFetchedDate = localSCData.lastFetched; + const fetchTimeDiff = new Date().getTime() - (oldFetchedDate ? new Date(oldFetchedDate as string).getTime() : 0); + + if (localSCData && localSCData.stats && localSCData.stats.length && fetchTimeDiff <= 86400000) { const response = getInsightFromSCData(localSCData); return res.status(200).json({ data: response }); } diff --git a/pages/api/keywords.ts b/pages/api/keywords.ts index 901bc7e..10be1cc 100644 --- a/pages/api/keywords.ts +++ b/pages/api/keywords.ts @@ -2,11 +2,11 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { Op } from 'sequelize'; import db from '../../database/database'; import Keyword from '../../database/models/keyword'; -import { refreshAndUpdateKeywords } from './refresh'; import { getAppSettings } from './settings'; import verifyUser from '../../utils/verifyUser'; import parseKeywords from '../../utils/parseKeywords'; import { integrateKeywordSCData, readLocalSCData } from '../../utils/searchConsole'; +import refreshAndUpdateKeywords from '../../utils/refresh'; type KeywordsGetResponse = { keywords?: KeywordType[], diff --git a/pages/api/refresh.ts b/pages/api/refresh.ts index 10d462b..fd63d7a 100644 --- a/pages/api/refresh.ts +++ b/pages/api/refresh.ts @@ -2,11 +2,10 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { Op } from 'sequelize'; import db from '../../database/database'; import Keyword from '../../database/models/keyword'; -import refreshKeywords from '../../utils/refresh'; +import refreshAndUpdateKeywords from '../../utils/refresh'; import { getAppSettings } from './settings'; import verifyUser from '../../utils/verifyUser'; import parseKeywords from '../../utils/parseKeywords'; -import { removeFromRetryQueue, retryScrape } from '../../utils/scraper'; type KeywordsRefreshRes = { keywords?: KeywordType[] @@ -63,57 +62,3 @@ const refresTheKeywords = async (req: NextApiRequest, res: NextApiResponse { - const formattedKeywords = initKeywords.map((el) => el.get({ plain: true })); - const refreshed: any = await refreshKeywords(formattedKeywords, settings); - // const fetchKeywords = await refreshKeywords(initialKeywords.map( k=> k.keyword )); - const updatedKeywords: KeywordType[] = []; - - for (const keywordRaw of initKeywords) { - const keywordPrased = parseKeywords([keywordRaw.get({ plain: true })]); - const keyword = keywordPrased[0]; - const udpatedkeyword = refreshed.find((item:any) => item.ID && item.ID === keyword.ID); - - if (udpatedkeyword && keyword) { - const newPos = udpatedkeyword.position; - const newPosition = newPos !== false ? newPos : keyword.position; - const { history } = keyword; - const theDate = new Date(); - history[`${theDate.getFullYear()}-${theDate.getMonth() + 1}-${theDate.getDate()}`] = newPosition; - - const updatedVal = { - position: newPosition, - updating: false, - url: udpatedkeyword.url, - lastResult: udpatedkeyword.result, - history, - lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : theDate.toJSON(), - lastUpdateError: udpatedkeyword.error - ? JSON.stringify({ date: theDate.toJSON(), error: `${udpatedkeyword.error}`, scraper: settings.scraper_type }) - : 'false', - }; - updatedKeywords.push({ ...keyword, ...{ ...updatedVal, lastUpdateError: JSON.parse(updatedVal.lastUpdateError) } }); - - // If failed, Add to Retry Queue Cron - if (udpatedkeyword.error) { - await retryScrape(keyword.ID); - } else { - await removeFromRetryQueue(keyword.ID); - } - - // Update the Keyword Position in Database - try { - await keywordRaw.update({ - ...updatedVal, - lastResult: Array.isArray(udpatedkeyword.result) ? JSON.stringify(udpatedkeyword.result) : udpatedkeyword.result, - history: JSON.stringify(history), - }); - console.log('[SUCCESS] Updating the Keyword: ', keyword.keyword); - } catch (error) { - console.log('[ERROR] Updating SERP for Keyword', keyword.keyword, error); - } - } - } - return updatedKeywords; -}; diff --git a/utils/refresh.ts b/utils/refresh.ts index e4eef78..df8a20f 100644 --- a/utils/refresh.ts +++ b/utils/refresh.ts @@ -1,27 +1,38 @@ import { performance } from 'perf_hooks'; import { setTimeout as sleep } from 'timers/promises'; -import { RefreshResult, scrapeKeywordFromGoogle } from './scraper'; +import { RefreshResult, removeFromRetryQueue, retryScrape, scrapeKeywordFromGoogle } from './scraper'; +import parseKeywords from './parseKeywords'; +import Keyword from '../database/models/keyword'; /** * Refreshes the Keywords position by Scraping Google Search Result by * Determining whether the keywords should be scraped in Parallel or not - * @param {KeywordType[]} keywords - Keywords to scrape + * @param {Keyword[]} rawkeyword - Keywords to scrape * @param {SettingsType} settings - The App Settings that contain the Scraper settings * @returns {Promise} */ -const refreshKeywords = async (keywords:KeywordType[], settings:SettingsType): Promise => { - if (!keywords || keywords.length === 0) { return []; } +const refreshAndUpdateKeywords = async (rawkeyword:Keyword[], settings:SettingsType): Promise => { + const keywords:KeywordType[] = rawkeyword.map((el) => el.get({ plain: true })); + if (!rawkeyword || rawkeyword.length === 0) { return []; } const start = performance.now(); - - let refreshedResults: RefreshResult[] = []; + const updatedKeywords: KeywordType[] = []; if (['scrapingant', 'serpapi'].includes(settings.scraper_type)) { - refreshedResults = await refreshParallel(keywords, settings); + const refreshedResults = await refreshParallel(keywords, settings); + if (refreshedResults.length > 0) { + for (const keyword of rawkeyword) { + const refreshedkeywordData = refreshedResults.find((k) => k && k.ID === keyword.id); + if (refreshedkeywordData) { + const updatedkeyword = await updateKeywordPosition(keyword, refreshedkeywordData, settings); + updatedKeywords.push(updatedkeyword); + } + } + } } else { - for (const keyword of keywords) { + for (const keyword of rawkeyword) { console.log('START SCRAPE: ', keyword.keyword); - const refreshedkeywordData = await scrapeKeywordFromGoogle(keyword, settings); - refreshedResults.push(refreshedkeywordData); + const updatedkeyword = await refreshAndUpdateKeyword(keyword, settings); + updatedKeywords.push(updatedkeyword); if (keywords.length > 0 && settings.scrape_delay && settings.scrape_delay !== '0') { await sleep(parseInt(settings.scrape_delay, 10)); } @@ -30,7 +41,77 @@ const refreshKeywords = async (keywords:KeywordType[], settings:SettingsType): P const end = performance.now(); console.log(`time taken: ${end - start}ms`); - return refreshedResults; + return updatedKeywords; +}; + +/** + * Scrape Serp for given keyword and update the position in DB. + * @param {Keyword} keyword - Keywords to scrape + * @param {SettingsType} settings - The App Settings that contain the Scraper settings + * @returns {Promise} + */ +const refreshAndUpdateKeyword = async (keyword: Keyword, settings: SettingsType): Promise => { + const currentkeyword = keyword.get({ plain: true }); + const refreshedkeywordData = await scrapeKeywordFromGoogle(currentkeyword, settings); + const updatedkeyword = refreshedkeywordData ? await updateKeywordPosition(keyword, refreshedkeywordData, settings) : currentkeyword; + return updatedkeyword; +}; + +/** + * Processes the scraped data for the given keyword and updates the keyword serp position in DB. + * @param {Keyword} keywordRaw - Keywords to Update + * @param {RefreshResult} udpatedkeyword - scraped Data for that Keyword + * @param {SettingsType} settings - The App Settings that contain the Scraper settings + * @returns {Promise} + */ +export const updateKeywordPosition = async (keywordRaw:Keyword, udpatedkeyword: RefreshResult, settings: SettingsType): Promise => { + const keywordPrased = parseKeywords([keywordRaw.get({ plain: true })]); + const keyword = keywordPrased[0]; + // const udpatedkeyword = refreshed; + let updated = keyword; + + if (udpatedkeyword && keyword) { + const newPos = udpatedkeyword.position; + const newPosition = newPos !== 0 ? newPos : keyword.position; + const { history } = keyword; + const theDate = new Date(); + const dateKey = `${theDate.getFullYear()}-${theDate.getMonth() + 1}-${theDate.getDate()}`; + history[dateKey] = newPosition; + + const updatedVal = { + position: newPosition, + updating: false, + url: udpatedkeyword.url, + lastResult: udpatedkeyword.result, + history, + lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : theDate.toJSON(), + lastUpdateError: udpatedkeyword.error + ? JSON.stringify({ date: theDate.toJSON(), error: `${udpatedkeyword.error}`, scraper: settings.scraper_type }) + : 'false', + }; + + // If failed, Add to Retry Queue Cron + if (udpatedkeyword.error) { + await retryScrape(keyword.ID); + } else { + await removeFromRetryQueue(keyword.ID); + } + + // Update the Keyword Position in Database + try { + await keywordRaw.update({ + ...updatedVal, + lastResult: Array.isArray(udpatedkeyword.result) ? JSON.stringify(udpatedkeyword.result) : udpatedkeyword.result, + history: JSON.stringify(history), + }); + console.log('[SUCCESS] Updating the Keyword: ', keyword.keyword); + updated = { ...keyword, ...updatedVal, lastUpdateError: JSON.parse(updatedVal.lastUpdateError) }; + } catch (error) { + console.log('[ERROR] Updating SERP for Keyword', keyword.keyword, error); + } + } + + return updated; }; /** @@ -53,4 +134,4 @@ const refreshParallel = async (keywords:KeywordType[], settings:SettingsType) : }); }; -export default refreshKeywords; +export default refreshAndUpdateKeywords; diff --git a/utils/scraper.ts b/utils/scraper.ts index d090be6..c89c882 100644 --- a/utils/scraper.ts +++ b/utils/scraper.ts @@ -12,14 +12,14 @@ type SearchResult = { } type SERPObject = { - postion:number|boolean, + postion:number, url:string } export type RefreshResult = false | { ID: number, keyword: string, - position:number | boolean, + position:number, url: string, result: SearchResult[], error?: boolean | string @@ -192,7 +192,7 @@ export const extractScrapedResult = (content: string, device: string): SearchRes * @returns {SERPObject} */ export const getSerp = (domain:string, result:SearchResult[]) : SERPObject => { - if (result.length === 0 || !domain) { return { postion: false, url: '' }; } + if (result.length === 0 || !domain) { return { postion: 0, url: '' }; } const foundItem = result.find((item) => { const itemDomain = item.url.replace('www.', '').match(/^(?:https?:)?(?:\/\/)?([^/?]+)/i); return itemDomain && itemDomain.includes(domain.replace('www.', ''));