This commit is contained in:
Nicolas 2024-08-21 22:53:33 -03:00
parent c7bfe4ffe8
commit 477c3257dc
4 changed files with 25 additions and 8 deletions

View File

@ -144,6 +144,15 @@ export async function crawlController(req: Request, res: Response) {
: await crawler.tryGetSitemap();
if (sitemap !== null) {
let jobPriority = 20;
// If it is over 1000, we need to get the job priority,
// otherwise we can use the default priority of 20
if(sitemap.length > 1000){
// set base to 21
jobPriority = await getJobPriority({plan, team_id, basePriority: 21})
}
const jobs = sitemap.map((x) => {
const url = x.url;
const uuid = uuidv4();
@ -161,7 +170,7 @@ export async function crawlController(req: Request, res: Response) {
},
opts: {
jobId: uuid,
priority: 20,
priority: jobPriority,
},
};
});

View File

@ -2,7 +2,7 @@ import { Request, Response } from "express";
import { WebScraperDataProvider } from "../scraper/WebScraper";
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types";
import { PlanType, RateLimiterMode } from "../types";
import { logJob } from "../services/logging/log_job";
import { PageOptions, SearchOptions } from "../lib/entities";
import { search } from "../search";
@ -10,6 +10,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid";
import { Logger } from "../lib/logger";
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service";
import { getJobPriority } from "../lib/job-priority";
export async function searchHelper(
jobId: string,
@ -18,6 +19,7 @@ export async function searchHelper(
crawlerOptions: any,
pageOptions: PageOptions,
searchOptions: SearchOptions,
plan: PlanType
): Promise<{
success: boolean;
error?: string;
@ -74,6 +76,8 @@ export async function searchHelper(
return { success: true, error: "No search results found", returnCode: 200 };
}
const jobPriority = await getJobPriority({plan, team_id, basePriority: 20});
// filter out social media links
const jobDatas = res.map(x => {
@ -90,7 +94,7 @@ export async function searchHelper(
},
opts: {
jobId: uuid,
priority: 20,
priority: jobPriority,
}
};
})
@ -124,7 +128,7 @@ export async function searchHelper(
export async function searchController(req: Request, res: Response) {
try {
// make sure to authenticate user first, Bearer <token>
const { success, team_id, error, status } = await authenticateUser(
const { success, team_id, error, status, plan } = await authenticateUser(
req,
res,
RateLimiterMode.Search
@ -165,6 +169,7 @@ export async function searchController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
searchOptions,
plan
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;

View File

@ -16,3 +16,5 @@ export async function addScrapeJob(
});
}

View File

@ -218,13 +218,14 @@ async function processJob(job: Job, token: string) {
for (const link of links) {
if (await lockURL(job.data.crawl_id, sc, link)) {
// This seems to work really welel
const jobPriority = await getJobPriority({plan:sc.plan as PlanType, team_id: sc.team_id, basePriority: job.data.crawl_id ? 20 : 10})
const jobId = uuidv4();
console.log("plan: ", sc.plan);
console.log("team_id: ", sc.team_id)
console.log("base priority: ", job.data.crawl_id ? 20 : 10)
console.log("job priority: " , jobPriority, "\n\n\n")
// console.log("plan: ", sc.plan);
// console.log("team_id: ", sc.team_id)
// console.log("base priority: ", job.data.crawl_id ? 20 : 10)
// console.log("job priority: " , jobPriority, "\n\n\n")
const newJob = await addScrapeJob({
url: link,