mirror of
https://github.com/towfiqi/serpbear
synced 2025-06-26 18:15:54 +00:00
added support for serplyio
This commit is contained in:
parent
a45237b230
commit
f51380442b
@ -35,6 +35,7 @@ The App uses third party website scrapers like ScrapingAnt, ScrapingRobot or You
|
|||||||
| SerpWatch.io | $29/mo | 7500/mo | Yes |
|
| SerpWatch.io | $29/mo | 7500/mo | Yes |
|
||||||
| Serpwatcher.com | $49/mo| 3000/mo | No |
|
| Serpwatcher.com | $49/mo| 3000/mo | No |
|
||||||
| whatsmyserp.com | $49/mo| 30,000/mo| No |
|
| whatsmyserp.com | $49/mo| 30,000/mo| No |
|
||||||
|
| serply.io | $49/mo | 5000/mo | Yes |
|
||||||
|
|
||||||
(*) Free upto a limit. If you are using ScrapingAnt you can lookup 10,000 times per month for free.
|
(*) Free upto a limit. If you are using ScrapingAnt you can lookup 10,000 times per month for free.
|
||||||
|
|
||||||
|
@ -105,6 +105,7 @@ const Settings = ({ closeSettings }:SettingsProps) => {
|
|||||||
{ label: 'Proxy', value: 'proxy' },
|
{ label: 'Proxy', value: 'proxy' },
|
||||||
{ label: 'ScrapingAnt.com', value: 'scrapingant' },
|
{ label: 'ScrapingAnt.com', value: 'scrapingant' },
|
||||||
{ label: 'ScrapingRobot.com', value: 'scrapingrobot' },
|
{ label: 'ScrapingRobot.com', value: 'scrapingrobot' },
|
||||||
|
{ label: 'serply.io', value: 'serply' },
|
||||||
];
|
];
|
||||||
|
|
||||||
const tabStyle = 'inline-block px-4 py-1 rounded-full mr-3 cursor-pointer text-sm';
|
const tabStyle = 'inline-block px-4 py-1 rounded-full mr-3 cursor-pointer text-sm';
|
||||||
@ -150,7 +151,7 @@ const Settings = ({ closeSettings }:SettingsProps) => {
|
|||||||
minWidth={270}
|
minWidth={270}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{['scrapingant', 'scrapingrobot'].includes(settings.scraper_type) && (
|
{['scrapingant', 'scrapingrobot', 'serply'].includes(settings.scraper_type) && (
|
||||||
<div className="settings__section__input mr-3">
|
<div className="settings__section__input mr-3">
|
||||||
<label className={labelStyle}>Scraper API Key or Token</label>
|
<label className={labelStyle}>Scraper API Key or Token</label>
|
||||||
<input
|
<input
|
||||||
|
@ -26,6 +26,12 @@ export type RefreshResult = false | {
|
|||||||
error?: boolean
|
error?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface SerplyResult {
|
||||||
|
title: string,
|
||||||
|
link: string,
|
||||||
|
realPosition: number,
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a SERP Scraper client promise based on the app settings.
|
* Creates a SERP Scraper client promise based on the app settings.
|
||||||
* @param {KeywordType} keyword - the keyword to get the SERP for.
|
* @param {KeywordType} keyword - the keyword to get the SERP for.
|
||||||
@ -34,7 +40,7 @@ export type RefreshResult = false | {
|
|||||||
*/
|
*/
|
||||||
export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Promise<AxiosResponse|Response> | false => {
|
export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Promise<AxiosResponse|Response> | false => {
|
||||||
let apiURL = ''; let client: Promise<AxiosResponse|Response> | false = false;
|
let apiURL = ''; let client: Promise<AxiosResponse|Response> | false = false;
|
||||||
const headers = {
|
const headers: any = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
|
||||||
Accept: 'application/json; charset=utf8;',
|
Accept: 'application/json; charset=utf8;',
|
||||||
@ -59,6 +65,20 @@ export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Pr
|
|||||||
apiURL = `https://api.scrapingrobot.com/?url=https%3A%2F%2Fwww.google.com%2Fsearch%3Fnum%3D100%26hl%3D${lang}%26q%3D${encodeURI(keyword.keyword)}&token=${settings.scaping_api}&proxyCountry=${country}&render=false${keyword.device === 'mobile' ? '&mobile=true' : ''}`;
|
apiURL = `https://api.scrapingrobot.com/?url=https%3A%2F%2Fwww.google.com%2Fsearch%3Fnum%3D100%26hl%3D${lang}%26q%3D${encodeURI(keyword.keyword)}&token=${settings.scaping_api}&proxyCountry=${country}&render=false${keyword.device === 'mobile' ? '&mobile=true' : ''}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Serply.io docs https://docs.serply.io/api
|
||||||
|
if (settings && settings.scraper_type === 'serply' && settings.scaping_api) {
|
||||||
|
const scraperCountries = ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'];
|
||||||
|
const country = scraperCountries.includes(keyword.country.toUpperCase()) ? keyword.country : 'US';
|
||||||
|
if (keyword.device === 'mobile') {
|
||||||
|
headers['X-User-Agent'] = 'mobile';
|
||||||
|
} else {
|
||||||
|
headers['X-User-Agent'] = 'desktop';
|
||||||
|
}
|
||||||
|
headers['X-Proxy-Location'] = country;
|
||||||
|
headers['X-Api-Key'] = settings.scaping_api
|
||||||
|
apiURL = `https://api.serply.io/v1/search/q=${encodeURI(keyword.keyword)}&num=100&hl=${country}`;
|
||||||
|
}
|
||||||
|
|
||||||
if (settings && settings.scraper_type === 'proxy' && settings.proxy) {
|
if (settings && settings.scraper_type === 'proxy' && settings.proxy) {
|
||||||
const axiosConfig: CreateAxiosDefaults = {};
|
const axiosConfig: CreateAxiosDefaults = {};
|
||||||
axiosConfig.headers = headers;
|
axiosConfig.headers = headers;
|
||||||
@ -76,6 +96,7 @@ export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Pr
|
|||||||
const axiosClient = axios.create(axiosConfig);
|
const axiosClient = axios.create(axiosConfig);
|
||||||
client = axiosClient.get(`https://www.google.com/search?num=100&q=${encodeURI(keyword.keyword)}`);
|
client = axiosClient.get(`https://www.google.com/search?num=100&q=${encodeURI(keyword.keyword)}`);
|
||||||
} else {
|
} else {
|
||||||
|
console.log(`calling ${apiURL}`);
|
||||||
client = fetch(apiURL, { method: 'GET', headers }).then((res) => res.json());
|
client = fetch(apiURL, { method: 'GET', headers }).then((res) => res.json());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,9 +124,9 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const res:any = await scraperClient;
|
const res:any = await scraperClient;
|
||||||
if (res && (res.data || res.html)) {
|
if (res && (res.data || res.html || res.results)) {
|
||||||
// writeFile('result.txt', res.data, { encoding: 'utf-8' });
|
// writeFile('result.txt', res.data, { encoding: 'utf-8' });
|
||||||
const extracted = extractScrapedResult(res.data || res.html, settings.scraper_type);
|
const extracted = extractScrapedResult(res.data || res.html || res.results, settings.scraper_type);
|
||||||
const serp = getSerp(keyword.domain, extracted);
|
const serp = getSerp(keyword.domain, extracted);
|
||||||
refreshedResults = { ID: keyword.ID, keyword: keyword.keyword, position: serp.postion, url: serp.url, result: extracted, error: false };
|
refreshedResults = { ID: keyword.ID, keyword: keyword.keyword, position: serp.postion, url: serp.url, result: extracted, error: false };
|
||||||
console.log('SERP: ', keyword.keyword, serp.postion, serp.url);
|
console.log('SERP: ', keyword.keyword, serp.postion, serp.url);
|
||||||
@ -123,10 +144,25 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
|
|||||||
* @param {string} scraper_type - the type of scraper (Proxy or Scraper)
|
* @param {string} scraper_type - the type of scraper (Proxy or Scraper)
|
||||||
* @returns {SearchResult[]}
|
* @returns {SearchResult[]}
|
||||||
*/
|
*/
|
||||||
export const extractScrapedResult = (content:string, scraper_type:string): SearchResult[] => {
|
export const extractScrapedResult = (content: string, scraper_type:string): SearchResult[] => {
|
||||||
const extractedResult = [];
|
const extractedResult = [];
|
||||||
const $ = cheerio.load(content);
|
|
||||||
|
|
||||||
|
if (scraper_type === 'serply') {
|
||||||
|
// results already in json
|
||||||
|
const results: SerplyResult[] = (typeof content === 'string') ? JSON.parse(content) : content as SerplyResult[];
|
||||||
|
for (const result of results) {
|
||||||
|
if (result.title && result.link) {
|
||||||
|
extractedResult.push({
|
||||||
|
title: result.title,
|
||||||
|
url: result.link,
|
||||||
|
position: result.realPosition,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return extractedResult;
|
||||||
|
}
|
||||||
|
const $ = cheerio.load(content);
|
||||||
const hasNumberofResult = $('body').find('#search > div > div');
|
const hasNumberofResult = $('body').find('#search > div > div');
|
||||||
const searchResult = hasNumberofResult.children();
|
const searchResult = hasNumberofResult.children();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user