Nick:
This commit is contained in:
parent
c7bfe4ffe8
commit
477c3257dc
|
@ -144,6 +144,15 @@ export async function crawlController(req: Request, res: Response) {
|
|||
: await crawler.tryGetSitemap();
|
||||
|
||||
if (sitemap !== null) {
|
||||
|
||||
let jobPriority = 20;
|
||||
// If it is over 1000, we need to get the job priority,
|
||||
// otherwise we can use the default priority of 20
|
||||
if(sitemap.length > 1000){
|
||||
// set base to 21
|
||||
jobPriority = await getJobPriority({plan, team_id, basePriority: 21})
|
||||
}
|
||||
|
||||
const jobs = sitemap.map((x) => {
|
||||
const url = x.url;
|
||||
const uuid = uuidv4();
|
||||
|
@ -161,7 +170,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||
},
|
||||
opts: {
|
||||
jobId: uuid,
|
||||
priority: 20,
|
||||
priority: jobPriority,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
|
|
@ -2,7 +2,7 @@ import { Request, Response } from "express";
|
|||
import { WebScraperDataProvider } from "../scraper/WebScraper";
|
||||
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
|
||||
import { authenticateUser } from "./auth";
|
||||
import { RateLimiterMode } from "../types";
|
||||
import { PlanType, RateLimiterMode } from "../types";
|
||||
import { logJob } from "../services/logging/log_job";
|
||||
import { PageOptions, SearchOptions } from "../lib/entities";
|
||||
import { search } from "../search";
|
||||
|
@ -10,6 +10,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
|
|||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from "../lib/logger";
|
||||
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service";
|
||||
import { getJobPriority } from "../lib/job-priority";
|
||||
|
||||
export async function searchHelper(
|
||||
jobId: string,
|
||||
|
@ -18,6 +19,7 @@ export async function searchHelper(
|
|||
crawlerOptions: any,
|
||||
pageOptions: PageOptions,
|
||||
searchOptions: SearchOptions,
|
||||
plan: PlanType
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
|
@ -74,6 +76,8 @@ export async function searchHelper(
|
|||
return { success: true, error: "No search results found", returnCode: 200 };
|
||||
}
|
||||
|
||||
const jobPriority = await getJobPriority({plan, team_id, basePriority: 20});
|
||||
|
||||
// filter out social media links
|
||||
|
||||
const jobDatas = res.map(x => {
|
||||
|
@ -90,7 +94,7 @@ export async function searchHelper(
|
|||
},
|
||||
opts: {
|
||||
jobId: uuid,
|
||||
priority: 20,
|
||||
priority: jobPriority,
|
||||
}
|
||||
};
|
||||
})
|
||||
|
@ -124,7 +128,7 @@ export async function searchHelper(
|
|||
export async function searchController(req: Request, res: Response) {
|
||||
try {
|
||||
// make sure to authenticate user first, Bearer <token>
|
||||
const { success, team_id, error, status } = await authenticateUser(
|
||||
const { success, team_id, error, status, plan } = await authenticateUser(
|
||||
req,
|
||||
res,
|
||||
RateLimiterMode.Search
|
||||
|
@ -165,6 +169,7 @@ export async function searchController(req: Request, res: Response) {
|
|||
crawlerOptions,
|
||||
pageOptions,
|
||||
searchOptions,
|
||||
plan
|
||||
);
|
||||
const endTime = new Date().getTime();
|
||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||
|
|
|
@ -16,3 +16,5 @@ export async function addScrapeJob(
|
|||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -218,13 +218,14 @@ async function processJob(job: Job, token: string) {
|
|||
for (const link of links) {
|
||||
if (await lockURL(job.data.crawl_id, sc, link)) {
|
||||
|
||||
// This seems to work really welel
|
||||
const jobPriority = await getJobPriority({plan:sc.plan as PlanType, team_id: sc.team_id, basePriority: job.data.crawl_id ? 20 : 10})
|
||||
const jobId = uuidv4();
|
||||
|
||||
console.log("plan: ", sc.plan);
|
||||
console.log("team_id: ", sc.team_id)
|
||||
console.log("base priority: ", job.data.crawl_id ? 20 : 10)
|
||||
console.log("job priority: " , jobPriority, "\n\n\n")
|
||||
// console.log("plan: ", sc.plan);
|
||||
// console.log("team_id: ", sc.team_id)
|
||||
// console.log("base priority: ", job.data.crawl_id ? 20 : 10)
|
||||
// console.log("job priority: " , jobPriority, "\n\n\n")
|
||||
|
||||
const newJob = await addScrapeJob({
|
||||
url: link,
|
||||
|
|
Loading…
Reference in New Issue