13 Commits

Author SHA1 Message Date
towfiqi
d7279512cf chore(release): 2.0.6 2024-11-15 10:43:12 +06:00
towfiqi
4fef1a9abc fix: Ensures Docker build uses matching npm package versions from package.json 2024-11-14 23:11:09 +06:00
towfiqi
aeed1f8559 fix: Resolves broken Docker build due to croner package version mismatch.
closes #247
2024-11-14 23:08:54 +06:00
towfiqi
12eac2b012 fix: Resolves Google Ads search volume data loading issue. 2024-11-14 18:31:40 +06:00
towfiqi
649f412303 fix: Resolves broken Proxy Scraper functionality.
closes #248
2024-11-14 18:30:56 +06:00
towfiqi
a2edabbdf9 chore: Upgrades vulnerable dependecies. 2024-11-14 18:28:43 +06:00
towfiqi
3690e97fe7 chore(release): 2.0.5 2024-11-12 16:29:07 +06:00
towfiqi
17fb2c40cc fix: Resolves "Add Domain" UI confusion. 2024-11-12 16:27:32 +06:00
towfiqi
faa3519a29 fix: Fixes misaligned Keywords table UI content. 2024-11-12 16:23:44 +06:00
towfiqi
bc02c929ba fix: Resolves broken Scrapingrobot scraper on new installs.
closes #243
2024-11-12 16:20:48 +06:00
towfiqi
d9d7c6347e fix: Fixes broken scrape result issue for keywords with special characters.
closes #221
2024-11-12 09:18:40 +06:00
towfiqi
c5714c00ae chore(release): 2.0.4 2024-11-10 12:54:43 +06:00
towfiqi
1bef7587cc fix: Fixes Docker build issue. 2024-11-10 12:54:32 +06:00
19 changed files with 1992 additions and 1065 deletions

View File

@@ -2,6 +2,33 @@
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
### [2.0.6](https://github.com/towfiqi/serpbear/compare/v2.0.5...v2.0.6) (2024-11-15)
### Bug Fixes
* Ensures Docker build uses matching npm package versions from package.json ([4fef1a9](https://github.com/towfiqi/serpbear/commit/4fef1a9abc737da67ab1ea0c4efce8194890545e))
* Resolves broken Docker build due to croner package version mismatch. ([aeed1f8](https://github.com/towfiqi/serpbear/commit/aeed1f8559e044bf658d930a22fa91f38cfedc6b)), closes [#247](https://github.com/towfiqi/serpbear/issues/247)
* Resolves broken Proxy Scraper functionality. ([649f412](https://github.com/towfiqi/serpbear/commit/649f412303dd50127b3736740962863f735f76eb)), closes [#248](https://github.com/towfiqi/serpbear/issues/248)
* Resolves Google Ads search volume data loading issue. ([12eac2b](https://github.com/towfiqi/serpbear/commit/12eac2b01235e9eae06882d6a2c50c793b890661))
### [2.0.5](https://github.com/towfiqi/serpbear/compare/v2.0.4...v2.0.5) (2024-11-12)
### Bug Fixes
* Fixes broken scrape result issue for keywords with special characters. ([d9d7c63](https://github.com/towfiqi/serpbear/commit/d9d7c6347e99e35f1e48f20b1e8f999612d69a72)), closes [#221](https://github.com/towfiqi/serpbear/issues/221)
* Fixes misaligned Keywords table UI content. ([faa3519](https://github.com/towfiqi/serpbear/commit/faa3519a29fc61ef8bd2ce9275a6674f1c7946e0))
* Resolves "Add Domain" UI confusion. ([17fb2c4](https://github.com/towfiqi/serpbear/commit/17fb2c40cc6b57ab2fe6aeb940dececd1a83411f))
* Resolves broken Scrapingrobot scraper on new installs. ([bc02c92](https://github.com/towfiqi/serpbear/commit/bc02c929ba7efd6b4b6a09495af7310c155a26bd)), closes [#243](https://github.com/towfiqi/serpbear/issues/243)
### [2.0.4](https://github.com/towfiqi/serpbear/compare/v2.0.3...v2.0.4) (2024-11-10)
### Bug Fixes
* Fixes Docker build issue. ([1bef758](https://github.com/towfiqi/serpbear/commit/1bef7587cccada6df48cfa3d208bf123a5d00c30))
### [2.0.3](https://github.com/towfiqi/serpbear/compare/v2.0.2...v2.0.3) (2024-11-10)

View File

@@ -1,5 +1,6 @@
FROM node:lts-alpine AS deps
FROM node:22.11.0-alpine3.20 AS deps
ENV NPM_VERSION=10.3.0
RUN npm install -g npm@"${NPM_VERSION}"
WORKDIR /app
COPY package.json ./
@@ -7,8 +8,10 @@ RUN npm install
COPY . .
FROM node:lts-alpine AS builder
FROM node:22.11.0-alpine3.20 AS builder
WORKDIR /app
ENV NPM_VERSION=10.3.0
RUN npm install -g npm@"${NPM_VERSION}"
COPY --from=deps /app ./
RUN rm -rf /app/data
RUN rm -rf /app/__tests__
@@ -16,9 +19,11 @@ RUN rm -rf /app/__mocks__
RUN npm run build
FROM node:lts-alpine AS runner
FROM node:22.11.0-alpine3.20 AS runner
WORKDIR /app
ENV NODE_ENV production
ENV NPM_VERSION=10.3.0
RUN npm install -g npm@"${NPM_VERSION}"
ENV NODE_ENV=production
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
RUN set -xe && mkdir -p /app/data && chown nextjs:nodejs /app/data
@@ -35,7 +40,7 @@ COPY --from=builder --chown=nextjs:nodejs /app/.sequelizerc ./.sequelizerc
COPY --from=builder --chown=nextjs:nodejs /app/entrypoint.sh ./entrypoint.sh
RUN rm package.json
RUN npm init -y
RUN npm i cryptr dotenv croner @googleapis/searchconsole sequelize-cli @isaacs/ttlcache
RUN npm i cryptr@6.0.3 dotenv@16.0.3 croner@9.0.0 @googleapis/searchconsole@1.0.5 sequelize-cli@6.6.2 @isaacs/ttlcache@1.4.1
RUN npm i -g concurrently
USER nextjs

View File

@@ -1,22 +1,26 @@
import { render } from '@testing-library/react';
import { rest } from 'msw';
import { http } from 'msw';
import * as React from 'react';
import { QueryClient, QueryClientProvider } from 'react-query';
export const handlers = [
rest.get(
http.get(
'*/react-query',
(req, res, ctx) => {
return res(
ctx.status(200),
ctx.json({
({ request, params }) => {
return new Response(
JSON.stringify({
name: 'mocked-react-query',
}),
{
status: 200,
headers: {
'Content-Type': 'application/json',
},
},
);
},
),
];
const createTestQueryClient = () => new QueryClient({
defaultOptions: {
queries: {

View File

@@ -36,7 +36,7 @@ const AddDomain = ({ closeModal, domains = [] }: AddDomainProps) => {
}
});
if (invalidDomains.length > 0) {
setNewDomainError(`Please Insert Valid Domain URL. Invalid URLs: ${invalidDomains.join(', ')}`);
setNewDomainError(`Please Insert Valid Website URL. ${invalidDomains.length > 1 ? `Invalid URLs: ${invalidDomains.join(', ')}` : ''}`);
} else if (domainsTobeAdded.length > 0) {
console.log('domainsTobeAdded :', domainsTobeAdded);
addMutate(domainsTobeAdded);
@@ -51,11 +51,11 @@ const AddDomain = ({ closeModal, domains = [] }: AddDomainProps) => {
return (
<Modal closeModal={() => { closeModal(false); }} title={'Add New Domain'}>
<div data-testid="adddomain_modal">
<h4 className='text-sm mt-4'>Domain URL</h4>
<h4 className='text-sm mt-4 pb-2'>Website URL(s)</h4>
<textarea
className={`w-full h-40 border rounded border-gray-200 p-4 outline-none
focus:border-indigo-300 ${newDomainError ? ' border-red-400 focus:border-red-400' : ''}`}
placeholder="Type or Paste URLs here. Insert Each URL in a New line."
placeholder={'Type or Paste URLs here. Insert Each URL in a New line. eg: \nhttps://mysite.com/ \nhttps://anothersite.com/ '}
value={newDomain}
autoFocus={true}
onChange={handleDomainInput}>

View File

@@ -97,7 +97,7 @@ const Keyword = (props: KeywordProps) => {
className={`keyword relative py-5 px-4 text-gray-600 border-b-[1px] border-gray-200 lg:py-4 lg:px-6 lg:border-0
lg:flex lg:justify-between lg:items-center ${selected ? ' bg-indigo-50 keyword--selected' : ''} ${lastItem ? 'border-b-0' : ''}`}>
<div className=' w-3/4 font-semibold cursor-pointer lg:flex-1 lg:shrink-0 lg:basis-20 lg:w-auto lg:flex lg:items-center'>
<div className=' w-3/4 font-semibold cursor-pointer lg:flex-1 lg:shrink-0 lg:basis-28 lg:w-auto lg:flex lg:items-center'>
<button
className={`p-0 mr-2 leading-[0px] inline-block rounded-sm pt-0 px-[1px] pb-[3px] border
${selected ? ' bg-blue-700 border-blue-700 text-white' : 'text-transparent'}`}

View File

@@ -66,7 +66,10 @@ const KeywordFilters = (props: KeywordFilterProps) => {
const optionObject:{label:string, value:string}[] = [];
if (!isConsole) {
const allCountries = keywords.reduce((acc: string[], keyword) => [...acc, keyword.country], []).filter((t) => t && t.trim() !== '');
const allCountries = Array.from(keywords as KeywordType[])
.map((keyword) => keyword.country)
.reduce<string[]>((acc, country) => [...acc, country], [])
.filter((t) => t && t.trim() !== '');
[...new Set(allCountries)].forEach((c) => optionObject.push({ label: countries[c][0], value: c }));
} else {
Object.keys(countries).forEach((countryISO:string) => {

11
cron.js
View File

@@ -1,7 +1,8 @@
/* eslint-disable no-new */
const Cryptr = require('cryptr');
const { promises } = require('fs');
const { readFile } = require('fs');
const Cron = require('croner');
const { Cron } = require('croner');
require('dotenv').config({ path: './.env.local' });
const getAppSettings = async () => {
@@ -71,7 +72,7 @@ const runAppCronJobs = () => {
const scrape_interval = settings.scrape_interval || 'daily';
if (scrape_interval !== 'never') {
const scrapeCronTime = generateCronTime(scrape_interval);
Cron(scrapeCronTime, () => {
new Cron(scrapeCronTime, () => {
// console.log('### Running Keyword Position Cron Job!');
const fetchOpts = { method: 'POST', headers: { Authorization: `Bearer ${process.env.APIKEY}` } };
fetch(`${process.env.NEXT_PUBLIC_APP_URL}/api/cron`, fetchOpts)
@@ -89,7 +90,7 @@ const runAppCronJobs = () => {
if (notif_interval) {
const cronTime = generateCronTime(notif_interval === 'daily' ? 'daily_morning' : notif_interval);
if (cronTime) {
Cron(cronTime, () => {
new Cron(cronTime, () => {
// console.log('### Sending Notification Email...');
const fetchOpts = { method: 'POST', headers: { Authorization: `Bearer ${process.env.APIKEY}` } };
fetch(`${process.env.NEXT_PUBLIC_APP_URL}/api/notify`, fetchOpts)
@@ -106,7 +107,7 @@ const runAppCronJobs = () => {
// Run Failed scraping CRON (Every Hour)
const failedCronTime = generateCronTime('hourly');
Cron(failedCronTime, () => {
new Cron(failedCronTime, () => {
// console.log('### Retrying Failed Scrapes...');
readFile(`${process.cwd()}/data/failed_queue.json`, { encoding: 'utf-8' }, (err, data) => {
@@ -135,7 +136,7 @@ const runAppCronJobs = () => {
// Run Google Search Console Scraper Daily
if (process.env.SEARCH_CONSOLE_PRIVATE_KEY && process.env.SEARCH_CONSOLE_CLIENT_EMAIL) {
const searchConsoleCRONTime = generateCronTime('daily');
Cron(searchConsoleCRONTime, () => {
new Cron(searchConsoleCRONTime, () => {
const fetchOpts = { method: 'POST', headers: { Authorization: `Bearer ${process.env.APIKEY}` } };
fetch(`${process.env.NEXT_PUBLIC_APP_URL}/api/searchconsole`, fetchOpts)
.then((res) => res.json())

2904
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "serpbear",
"version": "2.0.3",
"version": "2.0.6",
"private": true,
"scripts": {
"dev": "next dev",
@@ -18,16 +18,16 @@
"release": "standard-version"
},
"dependencies": {
"@googleapis/searchconsole": "^1.0.0",
"@googleapis/searchconsole": "^1.0.5",
"@isaacs/ttlcache": "^1.4.1",
"@types/react-transition-group": "^4.4.5",
"axios": "^1.1.3",
"axios": "^1.7.7",
"axios-retry": "^3.3.1",
"chart.js": "^3.9.1",
"cheerio": "^1.0.0-rc.12",
"cheerio": "^1.0.0",
"concurrently": "^7.6.0",
"cookies": "^0.8.0",
"croner": "^5.3.5",
"croner": "^9.0.0",
"cryptr": "^6.0.3",
"dayjs": "^1.11.5",
"dotenv": "^16.0.3",
@@ -35,7 +35,6 @@
"https-proxy-agent": "^5.0.1",
"isomorphic-fetch": "^3.0.0",
"jsonwebtoken": "^9.0.2",
"msw": "^0.49.0",
"next": "^12.3.4",
"nodemailer": "^6.9.9",
"react": "18.2.0",
@@ -50,11 +49,11 @@
"reflect-metadata": "^0.1.13",
"sequelize": "^6.34.0",
"sequelize-typescript": "^2.1.6",
"sqlite3": "^5.1.6",
"umzug": "^3.6.1"
"sqlite3": "^5.1.7",
"umzug": "^3.8.2"
},
"devDependencies": {
"@testing-library/jest-dom": "^6.1.4",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^14.0.0",
"@types/cookies": "^0.7.7",
"@types/cryptr": "^4.0.1",
@@ -74,15 +73,16 @@
"jest": "^29.7.0",
"jest-environment-jsdom": "^29.7.0",
"jest-fetch-mock": "^3.0.3",
"msw": "^2.6.4",
"next-router-mock": "^0.9.10",
"postcss": "^8.4.31",
"postcss": "^8.4.49",
"prettier": "^2.7.1",
"resize-observer-polyfill": "^1.5.1",
"sass": "^1.55.0",
"sass": "^1.80.7",
"sequelize-cli": "^6.6.2",
"standard-version": "^9.5.0",
"stylelint-config-standard": "^29.0.0",
"tailwindcss": "^3.1.8",
"typescript": "4.8.4"
"tailwindcss": "^3.4.14",
"typescript": "^4.8.4"
}
}

View File

@@ -36,14 +36,13 @@ const updatekeywordVolume = async (req: NextApiRequest, res: NextApiResponse<Key
keywordsToSend = parseKeywords(allKeywords.map((e) => e.get({ plain: true })));
}
if (domain) {
// const allDomain = domain === 'all';
// const allKeywords:Keyword[] = allDomain ? await Keyword.findAll() : await Keyword.findAll(allDomain ? {} : { where: { domain } });
// keywordsToSend = parseKeywords(allKeywords.map((e) => e.get({ plain: true })));
const allDomain = domain === 'all';
const allKeywords:Keyword[] = allDomain ? await Keyword.findAll() : await Keyword.findAll(allDomain ? {} : { where: { domain } });
keywordsToSend = parseKeywords(allKeywords.map((e) => e.get({ plain: true })));
}
if (keywordsToSend.length > 0) {
const keywordsVolumeData = await getKeywordsVolume(keywordsToSend);
// console.log('keywordsVolumeData :', keywordsVolumeData);
if (keywordsVolumeData.error) {
return res.status(400).json({ keywords: [], error: keywordsVolumeData.error });
}

View File

@@ -20,11 +20,10 @@ const hasdata:ScraperSettings = {
scrapeURL: (keyword, settings) => {
const country = keyword.country || 'US';
const countryName = countries[country][0];
const location = keyword.city && countryName ? `&location=${encodeURI(`${keyword.city},${countryName}`)}` : '';
return `https://api.scrape-it.cloud/scrape/google/serp?q=${encodeURI(keyword.keyword)}${location}&num=100&gl=${country.toLowerCase()}&deviceType=${keyword.device}`;
const location = keyword.city && countryName ? `&location=${encodeURIComponent(`${keyword.city},${countryName}`)}` : '';
return `https://api.scrape-it.cloud/scrape/google/serp?q=${encodeURIComponent(keyword.keyword)}${location}&num=100&gl=${country.toLowerCase()}&deviceType=${keyword.device}`;
},
resultObjectKey: 'organicResults',
serpExtractor: (content) => {
const extractedResult = [];
const results: HasDataResult[] = (typeof content === 'string') ? JSON.parse(content) : content as HasDataResult[];

View File

@@ -1,4 +1,4 @@
import cheerio from 'cheerio';
import * as cheerio from 'cheerio';
const proxy:ScraperSettings = {
id: 'proxy',

View File

@@ -20,8 +20,8 @@ const searchapi:ScraperSettings = {
scrapeURL: (keyword) => {
const country = keyword.country || 'US';
const countryName = countries[country][0];
const location = keyword.city && countryName ? `&location=${encodeURI(`${keyword.city},${countryName}`)}` : '';
return `https://www.searchapi.io/api/v1/search?engine=google&q=${encodeURI(keyword.keyword)}&num=100&gl=${country}&device=${keyword.device}${location}`;
const location = keyword.city && countryName ? `&location=${encodeURIComponent(`${keyword.city},${countryName}`)}` : '';
return `https://www.searchapi.io/api/v1/search?engine=google&q=${encodeURIComponent(keyword.keyword)}&num=100&gl=${country}&device=${keyword.device}${location}`;
},
resultObjectKey: 'organic_results',
serpExtractor: (content) => {

View File

@@ -19,8 +19,8 @@ const serpapi:ScraperSettings = {
},
scrapeURL: (keyword, settings) => {
const countryName = countries[keyword.country || 'US'][0];
const location = keyword.city && keyword.country ? `&location=${encodeURI(`${keyword.city},${countryName}`)}` : '';
return `https://serpapi.com/search?q=${encodeURI(keyword.keyword)}&num=100&gl=${keyword.country}&device=${keyword.device}${location}&api_key=${settings.scaping_api}`;
const location = keyword.city && keyword.country ? `&location=${encodeURIComponent(`${keyword.city},${countryName}`)}` : '';
return `https://serpapi.com/search?q=${encodeURIComponent(keyword.keyword)}&num=100&gl=${keyword.country}&device=${keyword.device}${location}&api_key=${settings.scaping_api}`;
},
resultObjectKey: 'organic_results',
serpExtractor: (content) => {

View File

@@ -12,7 +12,8 @@ const serper:ScraperSettings = {
scrapeURL: (keyword, settings, countryData) => {
const country = keyword.country || 'US';
const lang = countryData[country][2];
return `https://google.serper.dev/search?q=${encodeURI(keyword.keyword)}&gl=${country}&hl=${lang}&num=100&apiKey=${settings.scaping_api}`;
console.log('Serper URL :', `https://google.serper.dev/search?q=${encodeURIComponent(keyword.keyword)}&gl=${country}&hl=${lang}&num=100&apiKey=${settings.scaping_api}`);
return `https://google.serper.dev/search?q=${encodeURIComponent(keyword.keyword)}&gl=${country}&hl=${lang}&num=100&apiKey=${settings.scaping_api}`;
},
resultObjectKey: 'organic',
serpExtractor: (content) => {

View File

@@ -20,7 +20,7 @@ const serply:ScraperSettings = {
},
scrapeURL: (keyword) => {
const country = scraperCountries.includes(keyword.country.toUpperCase()) ? keyword.country : 'US';
return `https://api.serply.io/v1/search/q=${encodeURI(keyword.keyword)}&num=100&hl=${country}`;
return `https://api.serply.io/v1/search/q=${encodeURIComponent(keyword.keyword)}&num=100&hl=${country}`;
},
resultObjectKey: 'result',
serpExtractor: (content) => {

View File

@@ -15,10 +15,10 @@ const spaceSerp:ScraperSettings = {
scrapeURL: (keyword, settings, countryData) => {
const country = keyword.country || 'US';
const countryName = countries[country][0];
const location = keyword.city ? `&location=${encodeURI(`${keyword.city},${countryName}`)}` : '';
const location = keyword.city ? `&location=${encodeURIComponent(`${keyword.city},${countryName}`)}` : '';
const device = keyword.device === 'mobile' ? '&device=mobile' : '';
const lang = countryData[country][2];
return `https://api.spaceserp.com/google/search?apiKey=${settings.scaping_api}&q=${encodeURI(keyword.keyword)}&pageSize=100&gl=${country}&hl=${lang}${location}${device}&resultBlocks=`;
return `https://api.spaceserp.com/google/search?apiKey=${settings.scaping_api}&q=${encodeURIComponent(keyword.keyword)}&pageSize=100&gl=${country}&hl=${lang}${location}${device}&resultBlocks=`;
},
resultObjectKey: 'organic_results',
serpExtractor: (content) => {

View File

@@ -15,11 +15,11 @@ const valueSerp:ScraperSettings = {
scrapeURL: (keyword, settings, countryData) => {
const country = keyword.country || 'US';
const countryName = countries[country][0];
const location = keyword.city ? `&location=${encodeURI(`${keyword.city},${countryName}`)}` : '';
const location = keyword.city ? `&location=${encodeURIComponent(`${keyword.city},${countryName}`)}` : '';
const device = keyword.device === 'mobile' ? '&device=mobile' : '';
const lang = countryData[country][2];
console.log(`https://api.valueserp.com/search?api_key=${settings.scaping_api}&q=${encodeURI(keyword.keyword)}&gl=${country}&hl=${lang}${device}${location}&num=100&output=json&include_answer_box=false&include_advertiser_info=false`);
return `https://api.valueserp.com/search?api_key=${settings.scaping_api}&q=${encodeURI(keyword.keyword)}&gl=${country}&hl=${lang}${device}${location}&num=100&output=json&include_answer_box=false&include_advertiser_info=false`;
console.log(`https://api.valueserp.com/search?api_key=${settings.scaping_api}&q=${encodeURIComponent(keyword.keyword)}&gl=${country}&hl=${lang}${device}${location}&num=100&output=json&include_answer_box=false&include_advertiser_info=false`);
return `https://api.valueserp.com/search?api_key=${settings.scaping_api}&q=${encodeURIComponent(keyword.keyword)}&gl=${country}&hl=${lang}${device}${location}&num=100&output=json&include_answer_box=false&include_advertiser_info=false`;
},
resultObjectKey: 'organic_results',
serpExtractor: (content) => {

View File

@@ -1,5 +1,5 @@
import axios, { AxiosResponse, CreateAxiosDefaults } from 'axios';
import cheerio from 'cheerio';
import * as cheerio from 'cheerio';
import { readFile, writeFile } from 'fs/promises';
import HttpsProxyAgent from 'https-proxy-agent';
import countries from './countries';
@@ -124,7 +124,7 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
throw new Error(res);
}
} catch (error:any) {
refreshedResults.error = scraperError;
refreshedResults.error = scraperError || 'Unknown Error';
if (settings.scraper_type === 'proxy' && error && error.response && error.response.statusText) {
refreshedResults.error = `[${error.response.status}] ${error.response.statusText}`;
}