Files
API-mega-list/generate_readme.js
2025-12-09 11:15:12 -05:00

214 lines
8.6 KiB
JavaScript

/**
* Script to generate a comprehensive README.md with all Apify Actors organized by category
*/
const fs = require('fs');
// Read the JSON file
const actors = JSON.parse(fs.readFileSync('apify_actors.json', 'utf-8'));
console.log(`Processing ${actors.length} actors...`);
// Function to convert category name to readable format and anchor
function formatCategoryName(category) {
// Special handling for common acronyms
const acronyms = {
'AI': 'AI',
'MCP': 'MCP',
'SEO': 'SEO',
'API': 'API'
};
// Convert underscores to spaces and title case
let readable = category
.split('_')
.map(word => {
const upper = word.toUpperCase();
// Check if it's a known acronym
if (acronyms[upper]) {
return acronyms[upper];
}
// Otherwise title case
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
})
.join(' ');
// Special cases for better readability
if (readable === 'Ai') readable = 'AI';
if (readable === 'Mcp Servers') readable = 'MCP Servers';
if (readable === 'Seo Tools') readable = 'SEO Tools';
// Create anchor (lowercase, replace spaces with hyphens)
// GitHub automatically creates anchors from headers, so we match that format
const anchor = readable.toLowerCase().replace(/\s+/g, '-');
return { readable, anchor };
}
// Organize actors by category
const actorsByCategory = {};
const uncategorized = [];
for (const actor of actors) {
const categories = actor.categories || [];
if (categories.length > 0) {
for (const category of categories) {
if (!actorsByCategory[category]) {
actorsByCategory[category] = [];
}
// Avoid duplicates - check if actor already exists in this category
const exists = actorsByCategory[category].some(a =>
a.name === actor.name && a.username === actor.username
);
if (!exists) {
actorsByCategory[category].push(actor);
}
}
} else {
uncategorized.push(actor);
}
}
// Sort categories alphabetically
const sortedCategories = Object.keys(actorsByCategory).sort();
// Generate README content
let content = `# API-mega-list\n\n`;
content += `This GitHub repo is a powerhouse collection of APIs you can start using immediately to build everything from simple automations to full-scale applications. One of the most valuable API lists on GitHub—period. 💪\n\n`;
content += `## 📦 Apify Actors Collection\n\n`;
content += `This repository includes a comprehensive collection of **${actors.length.toLocaleString()} Apify Actors** (APIs) - ready-to-use web scraping and automation tools from the Apify platform.\n\n`;
content += `### What are Apify Actors?\n\n`;
content += `Apify Actors are pre-built web scraping and automation tools that can extract data from websites, automate workflows, and integrate with AI applications. Each actor is a ready-to-use API that you can run via the Apify platform.\n\n`;
content += `### 📊 Statistics\n\n`;
content += `- **Total APIs**: ${actors.length.toLocaleString()}\n`;
content += `- **Categories**: ${sortedCategories.length}\n`;
content += `- **All links include affiliate tracking** (\`?fpr=p2hrc6\`)\n\n`;
content += `### 📁 Additional Files\n\n`;
content += `- **[APIFY_ACTORS.md](APIFY_ACTORS.md)** - Complete markdown list of all actors organized by category (~8.3 MB)\n`;
content += `- **[apify_actors.json](apify_actors.json)** - Full JSON dataset with all actor details (~11.6 MB)\n`;
content += `- **[apify_actors_simple.txt](apify_actors_simple.txt)** - Simple text format with names and URLs (~996 KB)\n\n`;
content += `---\n\n`;
content += `## 📚 Complete API List by Category\n\n`;
// Add table of contents
content += `### Table of Contents\n\n`;
for (const category of sortedCategories) {
const count = actorsByCategory[category].length;
const { anchor } = formatCategoryName(category);
const { readable } = formatCategoryName(category);
content += `- [${readable}](#${anchor}) (${count.toLocaleString()} APIs)\n`;
}
if (uncategorized.length > 0) {
content += `- [Uncategorized](#uncategorized) (${uncategorized.length.toLocaleString()} APIs)\n`;
}
content += `\n`;
// Write categorized actors
for (const category of sortedCategories) {
const categoryActors = actorsByCategory[category];
const { readable, anchor } = formatCategoryName(category);
// Add horizontal rule before each section for better visibility
content += `---\n\n`;
content += `## ${readable}\n\n`;
content += `*${categoryActors.length.toLocaleString()} APIs*\n\n`;
// Sort actors by title
const sortedActors = categoryActors.sort((a, b) =>
(a.title || a.name || '').localeCompare(b.title || b.name || '')
);
for (const actor of sortedActors) {
const title = actor.title || actor.name || 'Unknown';
const affiliateUrl = actor.affiliate_url || actor.url || '';
const description = actor.description || '';
// Truncate long descriptions for readability, but cut at word boundaries
const maxDescLength = 200;
let shortDescription = description;
if (description.length > maxDescLength) {
// Find the last space before the max length to avoid cutting words
let cutPoint = maxDescLength;
const lastSpace = description.lastIndexOf(' ', maxDescLength);
if (lastSpace > maxDescLength * 0.8) { // Only use word boundary if it's not too far back
cutPoint = lastSpace;
}
shortDescription = description.substring(0, cutPoint).trim() + '...';
}
if (shortDescription) {
content += `- **[${title}](${affiliateUrl})** - ${shortDescription}\n`;
} else {
content += `- **[${title}](${affiliateUrl})**\n`;
}
}
content += `\n`;
}
// Write uncategorized actors
if (uncategorized.length > 0) {
content += `---\n\n`;
content += `## Uncategorized\n\n`;
content += `*${uncategorized.length.toLocaleString()} APIs*\n\n`;
const sortedUncategorized = uncategorized.sort((a, b) =>
(a.title || a.name || '').localeCompare(b.title || b.name || '')
);
for (const actor of sortedUncategorized) {
const title = actor.title || actor.name || 'Unknown';
const affiliateUrl = actor.affiliate_url || actor.url || '';
const description = actor.description || '';
// Truncate long descriptions for readability, but cut at word boundaries
const maxDescLength = 200;
let shortDescription = description;
if (description.length > maxDescLength) {
// Find the last space before the max length to avoid cutting words
let cutPoint = maxDescLength;
const lastSpace = description.lastIndexOf(' ', maxDescLength);
if (lastSpace > maxDescLength * 0.8) { // Only use word boundary if it's not too far back
cutPoint = lastSpace;
}
shortDescription = description.substring(0, cutPoint).trim() + '...';
}
if (shortDescription) {
content += `- **[${title}](${affiliateUrl})** - ${shortDescription}\n`;
} else {
content += `- **[${title}](${affiliateUrl})**\n`;
}
}
content += `\n`;
}
content += `---\n\n`;
content += `## 🚀 Usage\n\n`;
content += `All links in this collection include affiliate tracking. When you click on any actor link, you'll be taken to the Apify platform where you can:\n\n`;
content += `- View actor documentation\n`;
content += `- Run actors via API\n`;
content += `- Schedule automated runs\n`;
content += `- Integrate with your applications\n\n`;
content += `## 📝 Notes\n\n`;
content += `- All APIs are sorted alphabetically within their categories\n`;
content += `- Descriptions are truncated to 200 characters for readability\n`;
content += `- For full descriptions and details, visit the individual API pages\n`;
content += `- This list is automatically generated from the Apify Store API\n\n`;
content += `---\n\n`;
content += `*Last updated: ${new Date().toISOString().split('T')[0]}*\n`;
content += `*Total APIs: ${actors.length.toLocaleString()}*\n`;
// Write to README.md
fs.writeFileSync('README.md', content, 'utf-8');
console.log(`✅ README.md generated successfully!`);
console.log(` - ${sortedCategories.length} categories`);
console.log(` - ${actors.length.toLocaleString()} total APIs`);
console.log(` - ${uncategorized.length.toLocaleString()} uncategorized APIs`);