#!/usr/bin/env node /** * Link Checker Script for Web Applications * * Finds all links on pages and checks for broken ones (404, 500, etc.) * Reports broken links with context (page URL, link text) */ const http = require('http'); const https = require('https'); const { URL } = require('url'); // Playwright MCP endpoint const MCP_ENDPOINT = process.env.PLAYWRIGHT_MCP_URL || 'http://localhost:8931/mcp'; // Configuration const config = { targetUrl: process.env.TARGET_URL || 'http://localhost:3000', maxDepth: parseInt(process.env.MAX_DEPTH || '2'), timeout: parseInt(process.env.TIMEOUT || '5000'), concurrency: parseInt(process.env.CONCURRENCY || '5'), ignorePatterns: (process.env.IGNORE_PATTERNS || '').split(','), reportsDir: process.env.REPORTS_DIR || './reports', }; /** * Make HTTP request to Playwright MCP */ async function mcpRequest(method, params) { return new Promise((resolve, reject) => { const body = JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params, }); const url = new URL(MCP_ENDPOINT); const options = { hostname: url.hostname, port: url.port, path: url.path, method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body), }, }; const client = url.protocol === 'https:' ? https : http; const req = client.request(options, (res) => { let data = ''; res.on('data', chunk => data += chunk); res.on('end', () => { try { resolve(JSON.parse(data)); } catch (e) { reject(e); } }); }); req.on('error', reject); req.setTimeout(config.timeout, () => { req.destroy(); reject(new Error('Timeout')); }); req.write(body); req.end(); }); } /** * Navigate to URL using Playwright MCP */ async function navigateTo(url) { const result = await mcpRequest('tools/call', { name: 'browser_navigate', arguments: { url }, }); return result; } /** * Get page snapshot with all links */ async function getPageSnapshot() { const result = await mcpRequest('tools/call', { name: 'browser_snapshot', arguments: {}, }); return result; } /** * Extract links from accessibility tree */ function extractLinks(snapshot) { // Parse accessibility tree for links const links = []; // This would parse the snapshot content returned by Playwright MCP // For now, return placeholder return links; } /** * Check if a URL is valid */ async function checkUrl(url, baseUrl) { return new Promise((resolve) => { try { const parsedUrl = new URL(url, baseUrl); // Skip anchor links if (url.startsWith('#')) { resolve({ url, status: 'SKIP', message: 'Anchor link' }); return; } // Skip mailto and tel links if (parsedUrl.protocol === 'mailto:' || parsedUrl.protocol === 'tel:') { resolve({ url, status: 'SKIP', message: 'Non-HTTP protocol' }); return; } // Check ignore patterns for (const pattern of config.ignorePatterns) { if (pattern && url.includes(pattern)) { resolve({ url, status: 'SKIP', message: 'Ignored pattern' }); return; } } // Make HEAD request to check URL const client = parsedUrl.protocol === 'https:' ? https : http; const options = { hostname: parsedUrl.hostname, port: parsedUrl.port, path: parsedUrl.pathname + parsedUrl.search, method: 'HEAD', timeout: config.timeout, }; const req = client.request(options, (res) => { resolve({ url, status: res.statusCode >= 400 ? 'BROKEN' : 'OK', statusCode: res.statusCode, }); }); req.on('error', (err) => { resolve({ url, status: 'ERROR', message: err.message }); }); req.on('timeout', () => { req.destroy(); resolve({ url, status: 'TIMEOUT', message: 'Request timed out' }); }); req.end(); } catch (err) { resolve({ url, status: 'ERROR', message: err.message }); } }); } /** * Main link checking function */ async function main() { console.log('=== Link Checker ===\n'); console.log(`Target URL: ${config.targetUrl}`); console.log(`Max Depth: ${config.maxDepth}\n`); const visitedUrls = new Set(); const brokenLinks = []; const allLinks = []; // Connect to Playwright MCP console.log('šŸ“” Connecting to Playwright MCP...'); // Start with target URL const toVisit = [config.targetUrl]; while (toVisit.length > 0) { const url = toVisit.shift(); if (visitedUrls.has(url)) { continue; } visitedUrls.add(url); console.log(`šŸ” Checking: ${url}`); try { // Navigate to URL await navigateTo(url); // Get page content const snapshot = await getPageSnapshot(); const links = extractLinks(snapshot); // Check each link for (const link of links) { const result = await checkUrl(link.href, url); allLinks.push({ sourcePage: url, linkText: link.text || '[no text]', href: link.href, ...result, }); if (result.status === 'BROKEN' || result.status === 'ERROR') { brokenLinks.push(allLinks[allLinks.length - 1]); console.log(` āŒ ${link.href} - ${result.statusCode || result.message}`); } else { console.log(` āœ… ${link.href}`); } // Add to visit queue if same origin if (result.status === 'OK') { try { const parsedUrl = new URL(link.href, config.targetUrl); const parsedBaseUrl = new URL(config.targetUrl); if (parsedUrl.origin === parsedBaseUrl.origin) { toVisit.push(link.href); } } catch (e) { // Skip invalid URLs } } } } catch (error) { console.log(`āŒ Error checking ${url}: ${error.message}`); brokenLinks.push({ sourcePage: url, href: url, status: 'ERROR', message: error.message, }); } } // Generate report const report = { timestamp: new Date().toISOString(), config, summary: { totalLinks: allLinks.length, brokenLinks: brokenLinks.length, pagesChecked: visitedUrls.size, }, allLinks, brokenLinks, }; const fs = require('fs'); const path = require('path'); const reportPath = path.join(config.reportsDir, 'link-check-report.json'); fs.writeFileSync(reportPath, JSON.stringify(report, null, 2)); console.log(`\nšŸ“Š Summary:`); console.log(` Pages Checked: ${visitedUrls.size}`); console.log(` Total Links: ${allLinks.length}`); console.log(` Broken Links: ${brokenLinks.length}`); console.log(`\nšŸ“„ Report saved to: ${reportPath}`); // Exit with error if broken links found process.exit(brokenLinks.length > 0 ? 1 : 0); } main().catch(err => { console.error('Fatal error:', err); process.exit(1); });