Files
GoClaw/server/web-research.ts
Manus 4411db8cd6 Checkpoint: Phase 19 Complete: Task Management System + Web Research Workflow
PHASE 19 COMPLETION SUMMARY:

 COMPLETED FEATURES:

1. Task Management System (Phase 19.1-19.7)
   - Database schema with tasks table (14 columns)
   - Query helpers for CRUD operations
   - 7 tRPC endpoints for task management
   - TasksPanel React component with real-time updates
   - Auto-task creation functions
   - Chat UI integration with conversationId tracking

2. Auto-Task Creation Integration (Phase 19.8)
   - Integrated into orchestratorChat loop
   - Detects missing components from tool errors
   - Auto-creates tasks for: tools, skills, agents, components, dependencies
   - Tracks task completion status

3. Web Research Workflow (Phase 19.9-19.12)
   - server/web-research.ts module with 3 main functions:
     * performWebResearch() - Execute web searches with Browser Agent
     * compileResearchReport() - Generate markdown reports
     * createResearchTasks() - Create research tasks for orchestrator

   - 3 tRPC endpoints:
     * research.search - Perform web research
     * research.compileReport - Compile results into report
     * research.createTasks - Create research tasks

   - WebResearchPanel React component:
     * Search input with real-time results
     * Options: max results, screenshots, text extraction
     * Result cards with expandable details
     * Report download functionality
     * Error handling and empty states

4. Unit Tests
   - 120 tests pass (out of 121 total)
   - Web Research tests: 18 tests covering all functions
   - Task tests: 5 tests (1 fails due to missing DB table)
   - All other tests pass

ARCHITECTURE:
- Browser Agent integration via Puppeteer
- Task tracking with metadata
- Auto-report compilation in markdown
- Screenshot and text extraction support
- Real-time UI updates via tRPC

NEXT STEPS:
1. Run pnpm db:push on production to create tasks table
2. Commit all changes to Gitea
3. Deploy to production
4. Verify tests pass on production DB
5. Test Web Research workflow end-to-end

TEST RESULTS:
- Test Files: 1 failed | 10 passed (11 total)
- Tests: 1 failed | 120 passed (121 total)
- Only failure: tasks.test.ts (requires production DB table)
2026-03-30 05:39:39 -04:00

278 lines
7.2 KiB
TypeScript

/**
* Web Research Workflow — Browser Agent Integration
*
* Provides high-level research capabilities:
* - Search on Google/Bing
* - Extract data from websites
* - Take screenshots for documentation
* - Compile research results
*/
import { createBrowserSession, executeBrowserAction, BrowserAction, BrowserResult } from "./browser-agent";
import { autoCreateTasks, trackTaskCompletion } from "./orchestrator";
import { createTask, updateTask } from "./db";
export interface ResearchQuery {
query: string;
maxResults?: number;
includeScreenshots?: boolean;
extractText?: boolean;
}
export interface ResearchResult {
success: boolean;
query: string;
results: Array<{
title: string;
url: string;
snippet?: string;
screenshotUrl?: string;
extractedText?: string;
}>;
totalResults: number;
executionTimeMs: number;
error?: string;
}
/**
* Perform web research using Browser Agent
*/
export async function performWebResearch(
agentId: number,
conversationId: string,
query: ResearchQuery
): Promise<ResearchResult> {
const startTime = Date.now();
const results: ResearchResult["results"] = [];
try {
// Create browser session
const sessionResult = await createBrowserSession(agentId);
if (!sessionResult.sessionId) {
throw new Error(sessionResult.error || "Failed to create browser session");
}
const sessionId = sessionResult.sessionId;
// Create task for this research
const task = await createTask({
agentId,
conversationId,
title: `🔍 Web Research: ${query.query.substring(0, 50)}`,
description: `Search and extract information about: ${query.query}`,
status: "in_progress",
priority: "high",
metadata: {
researchQuery: query.query,
sessionId,
type: "web_research",
},
});
const taskId = task?.id;
try {
// Navigate to Google Search
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query.query)}`;
const navResult = await executeBrowserAction(sessionId, {
type: "navigate",
params: { url: searchUrl },
});
if (!navResult.success) {
throw new Error("Failed to navigate to search results");
}
// Wait for results to load
await executeBrowserAction(sessionId, {
type: "wait",
params: { selector: "div.g", timeout: 5000 },
});
// Extract search results
const extractResult = await executeBrowserAction(sessionId, {
type: "evaluate",
params: {
script: `
const results = [];
const items = document.querySelectorAll('div.g');
items.forEach((item, idx) => {
if (idx >= ${query.maxResults || 5}) return;
const titleEl = item.querySelector('h3');
const linkEl = item.querySelector('a');
const snippetEl = item.querySelector('div.VwiC3b');
if (titleEl && linkEl) {
results.push({
title: titleEl.textContent || '',
url: linkEl.href || '',
snippet: snippetEl?.textContent || ''
});
}
});
return results;
`,
},
});
if (extractResult.success && extractResult.data) {
for (const result of extractResult.data) {
let screenshotUrl: string | undefined;
let extractedText: string | undefined;
// Take screenshot if requested
if (query.includeScreenshots) {
const screenshotResult = await executeBrowserAction(sessionId, {
type: "navigate",
params: { url: result.url },
});
if (screenshotResult.success) {
const screenshot = await executeBrowserAction(sessionId, {
type: "screenshot",
params: { fullPage: false },
});
screenshotUrl = screenshot.screenshotUrl;
}
}
// Extract text if requested
if (query.extractText) {
const textResult = await executeBrowserAction(sessionId, {
type: "evaluate",
params: {
script: `
document.body.innerText.substring(0, 2000)
`,
},
});
extractedText = textResult.data;
}
results.push({
title: result.title,
url: result.url,
snippet: result.snippet,
screenshotUrl,
extractedText,
});
}
}
// Update task status
if (taskId) {
await updateTask(taskId, {
status: "completed",
result: JSON.stringify({
query: query.query,
resultsCount: results.length,
timestamp: new Date().toISOString(),
}),
});
}
return {
success: true,
query: query.query,
results,
totalResults: results.length,
executionTimeMs: Date.now() - startTime,
};
} catch (error: any) {
// Update task with error
if (taskId) {
await updateTask(taskId, {
status: "failed",
errorMessage: error.message,
});
}
throw error;
}
} catch (error: any) {
return {
success: false,
query: query.query,
results: [],
totalResults: 0,
executionTimeMs: Date.now() - startTime,
error: error.message,
};
}
}
/**
* Compile research results into a report
*/
export async function compileResearchReport(
results: ResearchResult[],
title: string
): Promise<string> {
let report = `# ${title}\n\n`;
report += `Generated: ${new Date().toISOString()}\n\n`;
for (const research of results) {
report += `## Query: ${research.query}\n`;
report += `Total Results: ${research.totalResults}\n\n`;
for (const result of research.results) {
report += `### ${result.title}\n`;
report += `**URL:** [${result.url}](${result.url})\n`;
if (result.snippet) {
report += `**Summary:** ${result.snippet}\n`;
}
if (result.screenshotUrl) {
report += `![Screenshot](${result.screenshotUrl})\n`;
}
if (result.extractedText) {
report += `**Extracted Text:**\n\`\`\`\n${result.extractedText}\n\`\`\`\n`;
}
report += "\n";
}
}
return report;
}
/**
* Auto-create research tasks for orchestrator
*/
export async function createResearchTasks(
agentId: number,
conversationId: string,
queries: string[]
): Promise<number[]> {
const taskIds: number[] = [];
for (const query of queries) {
try {
const task = await createTask({
agentId,
conversationId,
title: `🔍 Research: ${query.substring(0, 40)}`,
description: `Perform web research for: ${query}`,
status: "pending",
priority: "medium",
metadata: {
type: "web_research",
query,
autoCreated: true,
},
});
if (task?.id) {
taskIds.push(task.id);
}
} catch (error) {
console.error(`[Web Research] Failed to create task for query "${query}":`, error);
}
}
return taskIds;
}