diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts index 43f940b..9d8b4c6 100644 --- a/apps/api/src/controllers/v1/crawl.ts +++ b/apps/api/src/controllers/v1/crawl.ts @@ -81,6 +81,7 @@ export async function crawlController( origin: "api", crawl_id: id, sitemapped: true, + v1: true, }, opts: { jobId: uuid, @@ -110,6 +111,7 @@ export async function crawlController( origin: "api", crawl_id: id, webhook: req.body.webhook, + v1: true, }, { priority: 15, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 96a2b4e..c2e3ba3 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -215,7 +215,7 @@ async function processJob(job: Job, token: string) { }; if (job.data.mode === "crawl") { - await callWebhook(job.data.team_id, job.id as string, data, job.data.webhook); + await callWebhook(job.data.team_id, job.id as string, data, job.data.webhook, job.data.v1); } if (job.data.crawl_id) { @@ -259,6 +259,7 @@ async function processJob(job: Job, token: string) { pageOptions: sc.pageOptions, origin: job.data.origin, crawl_id: job.data.crawl_id, + v1: job.data.v1, }); await addCrawlJob(job.data.crawl_id, newJob.id); @@ -328,7 +329,7 @@ async function processJob(job: Job, token: string) { docs: fullDocs, }; - await callWebhook(job.data.team_id, job.data.crawl_id, data); + await callWebhook(job.data.team_id, job.data.crawl_id, data, job.data.webhook, job.data.v1); } } @@ -372,7 +373,7 @@ async function processJob(job: Job, token: string) { }; if (job.data.mode === "crawl" || job.data.crawl_id) { - await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data); + await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data, job.data.webhook, job.data.v1); } if (job.data.crawl_id) { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index 2b00866..b60774e 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,7 +1,8 @@ +import { legacyDocumentConverter } from "../../src/controllers/v1/types"; import { Logger } from "../../src/lib/logger"; import { supabase_service } from "./supabase"; -export const callWebhook = async (teamId: string, jobId: string, data: any, specified?: string) => { +export const callWebhook = async (teamId: string, jobId: string, data: any, specified?: string, v1 = false) => { try { const selfHostedUrl = process.env.SELF_HOSTED_WEBHOOK_URL?.replace("{{JOB_ID}}", jobId); const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; @@ -30,11 +31,15 @@ export const callWebhook = async (teamId: string, jobId: string, data: any, spec let dataToSend = []; if (data.result.links && data.result.links.length !== 0) { for (let i = 0; i < data.result.links.length; i++) { - dataToSend.push({ - content: data.result.links[i].content.content, - markdown: data.result.links[i].content.markdown, - metadata: data.result.links[i].content.metadata, - }); + if (v1) { + dataToSend.push(legacyDocumentConverter(data.result.links[i].content)) + } else { + dataToSend.push({ + content: data.result.links[i].content.content, + markdown: data.result.links[i].content.markdown, + metadata: data.result.links[i].content.metadata, + }); + } } } diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 70a8ab0..1da2c70 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -31,6 +31,7 @@ export interface WebScraperOptions { crawl_id?: string; sitemapped?: boolean; webhook?: string; + v1?: boolean; } export interface RunWebScraperParams {