mirror of
https://github.com/towfiqi/serpbear
synced 2025-06-26 18:15:54 +00:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
57182f17f6 | ||
|
|
d6da18fb01 | ||
|
|
dd6a801ffd | ||
|
|
e1799fb2f3 | ||
|
|
c8ee418822 | ||
|
|
e7ab7d2db2 | ||
|
|
efb565ba00 | ||
|
|
a11b0f223c | ||
|
|
e6136db742 | ||
|
|
d01b65db04 | ||
|
|
f51380442b | ||
|
|
691055811c | ||
|
|
6d7cfec953 | ||
|
|
8c8064f222 | ||
|
|
3d1c690076 | ||
|
|
1ed298f633 | ||
|
|
38dc164514 |
31
CHANGELOG.md
31
CHANGELOG.md
@@ -2,6 +2,37 @@
|
||||
|
||||
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
||||
|
||||
### [0.1.5](https://github.com/towfiqi/serpbear/compare/v0.1.4...v0.1.5) (2022-12-03)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* keyword not in first 100 now shows >100 ([e1799fb](https://github.com/towfiqi/serpbear/commit/e1799fb2f35ab8c0f65eb90e66dcda10b8cb6f16))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* domains with - were not loading the keywords. ([efb565b](https://github.com/towfiqi/serpbear/commit/efb565ba0086d1b3e69ea71456a892ca254856f7)), closes [#11](https://github.com/towfiqi/serpbear/issues/11)
|
||||
* failed scrape messes up lastResult data in db ([dd6a801](https://github.com/towfiqi/serpbear/commit/dd6a801ffda3eacda957dd20d2c97fb6197fbdc2))
|
||||
* First search result items were being skipped. ([d6da18f](https://github.com/towfiqi/serpbear/commit/d6da18fb0135e23dd869d1fb500e12ee2e782bfa)), closes [#13](https://github.com/towfiqi/serpbear/issues/13)
|
||||
* removes empty spaces when adding domain. ([a11b0f2](https://github.com/towfiqi/serpbear/commit/a11b0f223c0647537ab23564df1d2f0b29eef4ae))
|
||||
|
||||
### [0.1.4](https://github.com/towfiqi/serpbear/compare/v0.1.3...v0.1.4) (2022-12-01)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* Failed scrape now shows error details in UI. ([8c8064f](https://github.com/towfiqi/serpbear/commit/8c8064f222ea8177b26b6dd28866d1f421faca39))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Domains with www weren't loading keywords. ([3d1c690](https://github.com/towfiqi/serpbear/commit/3d1c690076a03598f0ac3f3663d905479d945897)), closes [#8](https://github.com/towfiqi/serpbear/issues/8)
|
||||
* Emails were sending serps of previous day. ([6910558](https://github.com/towfiqi/serpbear/commit/691055811c2ae70ce1b878346300048c1e23f2eb))
|
||||
* Fixes Broken ScrapingRobot Integration. ([1ed298f](https://github.com/towfiqi/serpbear/commit/1ed298f633a9ae5b402b431f1e50b35ffd44a6dc))
|
||||
* scraper fails if matched domain has www ([38dc164](https://github.com/towfiqi/serpbear/commit/38dc164514b066b2007f2f3b2ae68005621963cc)), closes [#6](https://github.com/towfiqi/serpbear/issues/6) [#7](https://github.com/towfiqi/serpbear/issues/7)
|
||||
* scraper fails when result has domain w/o www ([6d7cfec](https://github.com/towfiqi/serpbear/commit/6d7cfec95304fa7a61beaab07f7cd6af215255c3))
|
||||
|
||||
### [0.1.3](https://github.com/towfiqi/serpbear/compare/v0.1.2...v0.1.3) (2022-12-01)
|
||||
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ The App uses third party website scrapers like ScrapingAnt, ScrapingRobot or You
|
||||
| SerpWatch.io | $29/mo | 7500/mo | Yes |
|
||||
| Serpwatcher.com | $49/mo| 3000/mo | No |
|
||||
| whatsmyserp.com | $49/mo| 30,000/mo| No |
|
||||
| serply.io | $49/mo | 5000/mo | Yes |
|
||||
|
||||
(*) Free upto a limit. If you are using ScrapingAnt you can lookup 10,000 times per month for free.
|
||||
|
||||
|
||||
@@ -13,10 +13,10 @@ const AddDomain = ({ closeModal }: AddDomainProps) => {
|
||||
|
||||
const addDomain = () => {
|
||||
// console.log('ADD NEW DOMAIN', newDomain);
|
||||
if (/^[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9](?:\.[a-zA-Z]{2,})+$/.test(newDomain)) {
|
||||
if (/^[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9](?:\.[a-zA-Z]{2,})+$/.test(newDomain.trim())) {
|
||||
setNewDomainError(false);
|
||||
// TODO: Domain Action
|
||||
addMutate(newDomain);
|
||||
addMutate(newDomain.trim());
|
||||
} else {
|
||||
setNewDomainError(true);
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ type KeywordProps = {
|
||||
const Keyword = (props: KeywordProps) => {
|
||||
const { keywordData, refreshkeyword, favoriteKeyword, removeKeyword, selectKeyword, selected, showKeywordDetails, manageTags, lastItem } = props;
|
||||
const {
|
||||
keyword, domain, ID, position, url = '', lastUpdated, country, sticky, history = {}, updating = false, lastUpdateError = 'false',
|
||||
keyword, domain, ID, position, url = '', lastUpdated, country, sticky, history = {}, updating = false, lastUpdateError = false,
|
||||
} = keywordData;
|
||||
const [showOptions, setShowOptions] = useState(false);
|
||||
const [showPositionError, setPositionError] = useState(false);
|
||||
@@ -50,7 +50,7 @@ const Keyword = (props: KeywordProps) => {
|
||||
|
||||
const renderPosition = () => {
|
||||
if (position === 0) {
|
||||
return <span title='Not in Top 100'>{'-'}</span>;
|
||||
return <span className='text-gray-400' title='Not in Top 100'>{'>100'}</span>;
|
||||
}
|
||||
if (updating) {
|
||||
return <span title='Updating Keyword Position'><Icon type="loading" /></span>;
|
||||
@@ -77,7 +77,7 @@ const Keyword = (props: KeywordProps) => {
|
||||
<span className={`fflag fflag-${country} w-[18px] h-[12px] mr-2`} title={countries[country][0]} />{keyword}
|
||||
</a>
|
||||
{sticky && <button className='ml-2 relative top-[2px]' title='Favorite'><Icon type="star-filled" size={16} color="#fbd346" /></button>}
|
||||
{lastUpdateError !== 'false'
|
||||
{lastUpdateError && lastUpdateError.date
|
||||
&& <button className='ml-2 relative top-[2px]' onClick={() => setPositionError(true)}>
|
||||
<Icon type="error" size={18} color="#FF3672" />
|
||||
</button>
|
||||
@@ -133,16 +133,19 @@ const Keyword = (props: KeywordProps) => {
|
||||
</ul>
|
||||
)}
|
||||
</div>
|
||||
{lastUpdateError !== 'false' && showPositionError
|
||||
&& <div className=' absolute mt-[-70px] p-2 bg-white z-30 border border-red-200 rounded w-[220px] left-4 shadow-sm text-xs'>
|
||||
{lastUpdateError && lastUpdateError.date && showPositionError && (
|
||||
<div className=' absolute mt-[-70px] p-2 bg-white z-30 border border-red-200 rounded w-[220px] left-4 shadow-sm text-xs lg:bottom-12'>
|
||||
Error Updating Keyword position (Tried <TimeAgo
|
||||
title={dayjs(lastUpdateError).format('DD-MMM-YYYY, hh:mm:ss A')}
|
||||
date={lastUpdateError} />)
|
||||
title={dayjs(lastUpdateError.date).format('DD-MMM-YYYY, hh:mm:ss A')}
|
||||
date={lastUpdateError.date} />)
|
||||
<i className='absolute top-0 right-0 ml-2 p-2 font-semibold not-italic cursor-pointer' onClick={() => setPositionError(false)}>
|
||||
<Icon type="close" size={16} color="#999" />
|
||||
</i>
|
||||
<div className=' border-t-[1px] border-red-100 mt-2 pt-1'>
|
||||
{lastUpdateError.scraper && <strong className='capitalize'>{lastUpdateError.scraper}: </strong>}{lastUpdateError.error}
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -105,6 +105,7 @@ const Settings = ({ closeSettings }:SettingsProps) => {
|
||||
{ label: 'Proxy', value: 'proxy' },
|
||||
{ label: 'ScrapingAnt.com', value: 'scrapingant' },
|
||||
{ label: 'ScrapingRobot.com', value: 'scrapingrobot' },
|
||||
{ label: 'serply.io', value: 'serply' },
|
||||
];
|
||||
|
||||
const tabStyle = 'inline-block px-4 py-1 rounded-full mr-3 cursor-pointer text-sm';
|
||||
@@ -150,7 +151,7 @@ const Settings = ({ closeSettings }:SettingsProps) => {
|
||||
minWidth={270}
|
||||
/>
|
||||
</div>
|
||||
{['scrapingant', 'scrapingrobot'].includes(settings.scraper_type) && (
|
||||
{['scrapingant', 'scrapingrobot', 'serply'].includes(settings.scraper_type) && (
|
||||
<div className="settings__section__input mr-3">
|
||||
<label className={labelStyle}>Scraper API Key or Token</label>
|
||||
<input
|
||||
|
||||
5
cron.js
5
cron.js
@@ -50,6 +50,9 @@ const generateCronTime = (interval) => {
|
||||
if (interval === 'daily') {
|
||||
cronTime = '0 0 0 * * *';
|
||||
}
|
||||
if (interval === 'daily_morning') {
|
||||
cronTime = '0 0 0 7 * *';
|
||||
}
|
||||
if (interval === 'weekly') {
|
||||
cronTime = '0 0 0 */7 * *';
|
||||
}
|
||||
@@ -103,7 +106,7 @@ const runAppCronJobs = () => {
|
||||
getAppSettings().then((settings) => {
|
||||
const notif_interval = (!settings.notification_interval || settings.notification_interval === 'never') ? false : settings.notification_interval;
|
||||
if (notif_interval) {
|
||||
const cronTime = generateCronTime(notif_interval);
|
||||
const cronTime = generateCronTime(notif_interval === 'daily' ? 'daily_morning' : notif_interval);
|
||||
if (cronTime) {
|
||||
cron.schedule(cronTime, () => {
|
||||
// console.log('### Sending Notification Email...');
|
||||
|
||||
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "serpbear",
|
||||
"version": "0.1.3",
|
||||
"version": "0.1.5",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "serpbear",
|
||||
"version": "0.1.3",
|
||||
"version": "0.1.5",
|
||||
"dependencies": {
|
||||
"@testing-library/react": "^13.4.0",
|
||||
"@types/react-transition-group": "^4.4.5",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "serpbear",
|
||||
"version": "0.1.3",
|
||||
"version": "0.1.5",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -61,8 +61,8 @@ export const addDomain = async (req: NextApiRequest, res: NextApiResponse<Domain
|
||||
}
|
||||
const { domain } = req.body || {};
|
||||
const domainData = {
|
||||
domain,
|
||||
slug: domain.replaceAll('.', '-'),
|
||||
domain: domain.trim(),
|
||||
slug: domain.trim().replaceAll('-', '_').replaceAll('.', '-'),
|
||||
lastUpdated: new Date().toJSON(),
|
||||
added: new Date().toJSON(),
|
||||
};
|
||||
|
||||
@@ -44,7 +44,8 @@ const getKeywords = async (req: NextApiRequest, res: NextApiResponse<KeywordsGet
|
||||
if (!req.query.domain && typeof req.query.domain !== 'string') {
|
||||
return res.status(400).json({ error: 'Domain is Required!' });
|
||||
}
|
||||
const domain = (req.query.domain as string).replace('-', '.');
|
||||
const domain = (req.query.domain as string).replaceAll('-', '.').replaceAll('_', '-');
|
||||
|
||||
try {
|
||||
const allKeywords:Keyword[] = await Keyword.findAll({ where: { domain } });
|
||||
const keywords: KeywordType[] = parseKeywords(allKeywords.map((e) => e.get({ plain: true })));
|
||||
|
||||
@@ -79,18 +79,21 @@ export const refreshAndUpdateKeywords = async (initKeywords:Keyword[], settings:
|
||||
const newPos = udpatedkeyword.position;
|
||||
const newPosition = newPos !== false ? newPos : keyword.position;
|
||||
const { history } = keyword;
|
||||
const currentDate = new Date();
|
||||
history[`${currentDate.getFullYear()}-${currentDate.getMonth() + 1}-${currentDate.getDate()}`] = newPosition;
|
||||
const theDate = new Date();
|
||||
history[`${theDate.getFullYear()}-${theDate.getMonth() + 1}-${theDate.getDate()}`] = newPosition;
|
||||
|
||||
const updatedVal = {
|
||||
position: newPosition,
|
||||
updating: false,
|
||||
url: udpatedkeyword.url,
|
||||
lastResult: udpatedkeyword.result,
|
||||
history,
|
||||
lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : new Date().toJSON(),
|
||||
lastUpdateError: udpatedkeyword.error ? new Date().toJSON() : 'false',
|
||||
lastUpdated: udpatedkeyword.error ? keyword.lastUpdated : theDate.toJSON(),
|
||||
lastUpdateError: udpatedkeyword.error
|
||||
? JSON.stringify({ date: theDate.toJSON(), error: `${udpatedkeyword.error}`, scraper: settings.scraper_type })
|
||||
: 'false',
|
||||
};
|
||||
updatedKeywords.push({ ...keyword, ...updatedVal });
|
||||
updatedKeywords.push({ ...keyword, ...{ ...updatedVal, lastUpdateError: JSON.parse(updatedVal.lastUpdateError) } });
|
||||
|
||||
// If failed, Add to Retry Queue Cron
|
||||
if (udpatedkeyword.error) {
|
||||
@@ -103,7 +106,7 @@ export const refreshAndUpdateKeywords = async (initKeywords:Keyword[], settings:
|
||||
try {
|
||||
await keywordRaw.update({
|
||||
...updatedVal,
|
||||
lastResult: JSON.stringify(udpatedkeyword.result),
|
||||
lastResult: Array.isArray(udpatedkeyword.result) ? JSON.stringify(udpatedkeyword.result) : udpatedkeyword.result,
|
||||
history: JSON.stringify(history),
|
||||
});
|
||||
console.log('[SUCCESS] Updating the Keyword: ', keyword.keyword);
|
||||
|
||||
2
types.d.ts
vendored
2
types.d.ts
vendored
@@ -31,7 +31,7 @@ type KeywordType = {
|
||||
url: string,
|
||||
tags: string[],
|
||||
updating: boolean,
|
||||
lastUpdateError: string
|
||||
lastUpdateError: {date: string, error: string, scraper: string} | false
|
||||
}
|
||||
|
||||
type KeywordLastResult = {
|
||||
|
||||
@@ -11,6 +11,7 @@ const parseKeywords = (allKeywords: Keyword[]) : KeywordType[] => {
|
||||
history: JSON.parse(keywrd.history),
|
||||
tags: JSON.parse(keywrd.tags),
|
||||
lastResult: JSON.parse(keywrd.lastResult),
|
||||
lastUpdateError: keywrd.lastUpdateError !== 'false' && keywrd.lastUpdateError.includes('{') ? JSON.parse(keywrd.lastUpdateError) : false,
|
||||
}));
|
||||
return parsedItems;
|
||||
};
|
||||
|
||||
@@ -20,10 +20,16 @@ type SERPObject = {
|
||||
export type RefreshResult = false | {
|
||||
ID: number,
|
||||
keyword: string,
|
||||
position:number|boolean,
|
||||
position:number | boolean,
|
||||
url: string,
|
||||
result: SearchResult[],
|
||||
error?: boolean
|
||||
error?: boolean | string
|
||||
}
|
||||
|
||||
interface SerplyResult {
|
||||
title: string,
|
||||
link: string,
|
||||
realPosition: number,
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -34,7 +40,7 @@ export type RefreshResult = false | {
|
||||
*/
|
||||
export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Promise<AxiosResponse|Response> | false => {
|
||||
let apiURL = ''; let client: Promise<AxiosResponse|Response> | false = false;
|
||||
const headers = {
|
||||
const headers: any = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
|
||||
Accept: 'application/json; charset=utf8;',
|
||||
@@ -56,7 +62,21 @@ export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Pr
|
||||
if (settings && settings.scraper_type === 'scrapingrobot' && settings.scaping_api) {
|
||||
const country = keyword.country || 'US';
|
||||
const lang = countries[country][2];
|
||||
apiURL = `https://api.scrapingrobot.com/?url=https%3A%2F%2Fwww.google.com%2Fsearch%3Fnum%3D100%26hl%3D${lang}%26q%3D${encodeURI(keyword.keyword)}&token=${settings.scaping_api}&proxyCountry=${country}&render=false${keyword.device === 'mobile' ? '&mobile=true' : ''}`;
|
||||
apiURL = `https://api.scrapingrobot.com/?token=${settings.scaping_api}&proxyCountry=${country}&render=false${keyword.device === 'mobile' ? '&mobile=true' : ''}&url=https%3A%2F%2Fwww.google.com%2Fsearch%3Fnum%3D100%26hl%3D${lang}%26q%3D${encodeURI(keyword.keyword)}`;
|
||||
}
|
||||
|
||||
// Serply.io docs https://docs.serply.io/api
|
||||
if (settings && settings.scraper_type === 'serply' && settings.scaping_api) {
|
||||
const scraperCountries = ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'];
|
||||
const country = scraperCountries.includes(keyword.country.toUpperCase()) ? keyword.country : 'US';
|
||||
if (keyword.device === 'mobile') {
|
||||
headers['X-User-Agent'] = 'mobile';
|
||||
} else {
|
||||
headers['X-User-Agent'] = 'desktop';
|
||||
}
|
||||
headers['X-Proxy-Location'] = country;
|
||||
headers['X-Api-Key'] = settings.scaping_api;
|
||||
apiURL = `https://api.serply.io/v1/search/q=${encodeURI(keyword.keyword)}&num=100&hl=${country}`;
|
||||
}
|
||||
|
||||
if (settings && settings.scraper_type === 'proxy' && settings.proxy) {
|
||||
@@ -76,7 +96,7 @@ export const getScraperClient = (keyword:KeywordType, settings:SettingsType): Pr
|
||||
const axiosClient = axios.create(axiosConfig);
|
||||
client = axiosClient.get(`https://www.google.com/search?num=100&q=${encodeURI(keyword.keyword)}`);
|
||||
} else {
|
||||
client = fetch(apiURL, { method: 'GET', headers }).then((res) => res.json());
|
||||
client = fetch(apiURL, { method: 'GET', headers });
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -100,18 +120,27 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
|
||||
const scraperClient = getScraperClient(keyword, settings);
|
||||
|
||||
if (!scraperClient) { return false; }
|
||||
|
||||
let res:any = null; let scraperError:any = null;
|
||||
try {
|
||||
const res:any = await scraperClient;
|
||||
if (res && (res.data || res.html)) {
|
||||
// writeFile('result.txt', res.data, { encoding: 'utf-8' });
|
||||
const extracted = extractScrapedResult(res.data || res.html, settings.scraper_type);
|
||||
if (settings && settings.scraper_type === 'proxy' && settings.proxy) {
|
||||
res = await scraperClient;
|
||||
} else {
|
||||
res = await scraperClient.then((result:any) => result.json());
|
||||
}
|
||||
|
||||
if (res && (res.data || res.html || res.result || res.results)) {
|
||||
const extracted = extractScrapedResult(res.data || res.html || res.result || res.results, settings.scraper_type);
|
||||
// await writeFile('result.txt', JSON.stringify(extracted), { encoding: 'utf-8' }).catch((err) => { console.log(err); });
|
||||
const serp = getSerp(keyword.domain, extracted);
|
||||
refreshedResults = { ID: keyword.ID, keyword: keyword.keyword, position: serp.postion, url: serp.url, result: extracted, error: false };
|
||||
console.log('SERP: ', keyword.keyword, serp.postion, serp.url);
|
||||
} else {
|
||||
scraperError = res.detail || res.error || 'Unknown Error';
|
||||
throw new Error(res);
|
||||
}
|
||||
} catch (error:any) {
|
||||
console.log('#### SCRAPE ERROR: ', keyword.keyword, error?.code, error?.response?.status, error?.response?.data, error);
|
||||
console.log('#### SCRAPE ERROR: ', keyword.keyword, '. Error: ', scraperError);
|
||||
refreshedResults.error = scraperError;
|
||||
}
|
||||
|
||||
return refreshedResults;
|
||||
@@ -123,30 +152,48 @@ export const scrapeKeywordFromGoogle = async (keyword:KeywordType, settings:Sett
|
||||
* @param {string} scraper_type - the type of scraper (Proxy or Scraper)
|
||||
* @returns {SearchResult[]}
|
||||
*/
|
||||
export const extractScrapedResult = (content:string, scraper_type:string): SearchResult[] => {
|
||||
export const extractScrapedResult = (content: string, scraper_type:string): SearchResult[] => {
|
||||
const extractedResult = [];
|
||||
const $ = cheerio.load(content);
|
||||
|
||||
const $ = cheerio.load(content);
|
||||
const hasNumberofResult = $('body').find('#search > div > div');
|
||||
const searchResult = hasNumberofResult.children();
|
||||
let lastPosition = 0;
|
||||
|
||||
if (scraper_type === 'proxy') {
|
||||
const mainContent = $('body').find('#main');
|
||||
const children = $(mainContent).find('h3');
|
||||
|
||||
for (let index = 1; index < children.length; index += 1) {
|
||||
for (let index = 0; index < children.length; index += 1) {
|
||||
const title = $(children[index]).text();
|
||||
const url = $(children[index]).closest('a').attr('href');
|
||||
const cleanedURL = url ? url.replace('/url?q=', '').replace(/&sa=.*/, '') : '';
|
||||
extractedResult.push({ title, url: cleanedURL, position: index });
|
||||
if (title && url) {
|
||||
lastPosition += 1;
|
||||
extractedResult.push({ title, url: cleanedURL, position: lastPosition });
|
||||
}
|
||||
}
|
||||
} else if (scraper_type === 'serply') {
|
||||
// results already in json
|
||||
const results: SerplyResult[] = (typeof content === 'string') ? JSON.parse(content) : content as SerplyResult[];
|
||||
for (const result of results) {
|
||||
if (result.title && result.link) {
|
||||
extractedResult.push({
|
||||
title: result.title,
|
||||
url: result.link,
|
||||
position: result.realPosition,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (let i = 1; i < searchResult.length; i += 1) {
|
||||
for (let i = 0; i < searchResult.length; i += 1) {
|
||||
if (searchResult[i]) {
|
||||
const title = $(searchResult[i]).find('h3').html();
|
||||
const url = $(searchResult[i]).find('a').attr('href');
|
||||
// console.log(i, url?.slice(0, 40), title?.slice(0, 40));
|
||||
if (title && url) {
|
||||
extractedResult.push({ title, url, position: i });
|
||||
lastPosition += 1;
|
||||
extractedResult.push({ title, url, position: lastPosition });
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -164,8 +211,8 @@ export const extractScrapedResult = (content:string, scraper_type:string): Searc
|
||||
export const getSerp = (domain:string, result:SearchResult[]) : SERPObject => {
|
||||
if (result.length === 0 || !domain) { return { postion: false, url: '' }; }
|
||||
const foundItem = result.find((item) => {
|
||||
const itemDomain = item.url.match(/^(?:https?:)?(?:\/\/)?([^/?]+)/i);
|
||||
return itemDomain && itemDomain.includes(domain);
|
||||
const itemDomain = item.url.replace('www.', '').match(/^(?:https?:)?(?:\/\/)?([^/?]+)/i);
|
||||
return itemDomain && itemDomain.includes(domain.replace('www.', ''));
|
||||
});
|
||||
return { postion: foundItem ? foundItem.position : 0, url: foundItem && foundItem.url ? foundItem.url : '' };
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user