import { json } from '@remix-run/node'; import type { ActionFunctionArgs } from '@remix-run/node'; export async function action({ request }: ActionFunctionArgs) { try { const formData = await request.formData(); const url = formData.get('url') as string; if (!url) { return json({ error: 'URL is required' }, { status: 400 }); } // Add proper headers to handle CORS and content type const response = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', } }); if (!response.ok) { throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`); } const contentType = response.headers.get('content-type'); if (!contentType?.includes('text/html')) { throw new Error('URL must point to an HTML page'); } const html = await response.text(); // Extract title const titleMatch = html.match(/]*>([^<]+)<\/title>/i); const title = titleMatch ? titleMatch[1].trim() : 'No title found'; // Extract meta description const descriptionMatch = html.match(/]*name="description"[^>]*content="([^"]*)"[^>]*>/i); const description = descriptionMatch ? descriptionMatch[1].trim() : ''; // Extract main content const mainContent = html .replace(/)<[^<]*)*<\/script>/gi, '') .replace(/)<[^<]*)*<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .trim(); // Extract code blocks const codeBlocks = html.match(/]*>[\s\S]*?<\/pre>|]*>[\s\S]*?<\/code>/gi) || []; const formattedCodeBlocks = codeBlocks.map(block => { return block .replace(/<[^>]+>/g, '') .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .trim(); }); // Extract links const links = html.match(/]*href="([^"]*)"[^>]*>([^<]*)<\/a>/gi) || []; const formattedLinks = links.map(link => { const hrefMatch = link.match(/href="([^"]*)"/i); const textMatch = link.match(/>([^<]*) link.url && !link.url.startsWith('#') && !link.url.startsWith('javascript:') && link.text.trim() ), sourceUrl: url }; return json({ success: true, data: structuredContent }); } catch (error) { console.error('Web search error:', error); return json( { error: error instanceof Error ? error.message : 'Unknown error occurred' }, { status: 500 } ); } }