This commit is contained in:
cporter202
2025-12-09 12:33:33 -05:00
4 changed files with 0 additions and 363997 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,201 +0,0 @@
"""
Script to fetch all Apify Actors from the API and compile them into a list
with affiliate links.
"""
import requests
import json
import time
from typing import List, Dict
# Apify API endpoint
API_BASE_URL = "https://api.apify.com/v2/store"
AFFILIATE_PARAM = "?fpr=p2hrc6"
def fetch_all_actors(limit: int = 100) -> List[Dict]:
"""
Fetch all actors from Apify Store API with pagination.
Args:
limit: Number of actors to fetch per request (max 100)
Returns:
List of all actors with their details
"""
all_actors = []
offset = 0
total_fetched = 0
print("Starting to fetch Apify Actors...")
while True:
try:
# Make API request with pagination
params = {
"limit": limit,
"offset": offset
}
response = requests.get(API_BASE_URL, params=params)
response.raise_for_status()
data = response.json()
# Extract actors from response
actors = data.get("data", {}).get("items", [])
if not actors:
print(f"No more actors found at offset {offset}")
break
# Process each actor
for actor in actors:
actor_info = {
"name": actor.get("name", "Unknown"),
"username": actor.get("username", ""),
"title": actor.get("title", actor.get("name", "Unknown")),
"description": actor.get("description", ""),
"url": actor.get("url", ""),
"affiliate_url": "",
"stats": actor.get("stats", {}),
"categories": actor.get("categories", []),
"createdAt": actor.get("createdAt", ""),
"modifiedAt": actor.get("modifiedAt", "")
}
# Create affiliate URL
if actor_info["url"]:
# Check if URL already has query parameters
separator = "&" if "?" in actor_info["url"] else "?"
actor_info["affiliate_url"] = f"{actor_info['url']}{separator}fpr=p2hrc6"
else:
# Construct URL from username and name if URL is missing
if actor_info["username"] and actor_info["name"]:
actor_info["url"] = f"https://apify.com/{actor_info['username']}/{actor_info['name']}"
actor_info["affiliate_url"] = f"{actor_info['url']}?fpr=p2hrc6"
all_actors.append(actor_info)
total_fetched += len(actors)
print(f"Fetched {total_fetched} actors so far... (offset: {offset})")
# Check if we've reached the end
total_count = data.get("data", {}).get("total", 0)
if offset + len(actors) >= total_count:
print(f"Reached end. Total actors: {total_count}")
break
offset += limit
# Be respectful with API rate limits
time.sleep(0.5)
except requests.exceptions.RequestException as e:
print(f"Error fetching actors at offset {offset}: {e}")
print("Retrying in 5 seconds...")
time.sleep(5)
continue
except Exception as e:
print(f"Unexpected error: {e}")
break
print(f"\nTotal actors fetched: {len(all_actors)}")
return all_actors
def save_to_json(actors: List[Dict], filename: str = "apify_actors.json"):
"""Save actors data to JSON file."""
with open(filename, "w", encoding="utf-8") as f:
json.dump(actors, f, indent=2, ensure_ascii=False)
print(f"Saved {len(actors)} actors to {filename}")
def generate_markdown_list(actors: List[Dict], filename: str = "APIFY_ACTORS.md"):
"""Generate a markdown file with all actors and affiliate links."""
with open(filename, "w", encoding="utf-8") as f:
f.write("# Apify Actors List\n\n")
f.write(f"Complete list of {len(actors)} Apify Actors (APIs) available on the Apify platform.\n\n")
f.write("---\n\n")
# Group by category if available
actors_by_category = {}
uncategorized = []
for actor in actors:
categories = actor.get("categories", [])
if categories:
for category in categories:
if category not in actors_by_category:
actors_by_category[category] = []
actors_by_category[category].append(actor)
else:
uncategorized.append(actor)
# Write categorized actors
if actors_by_category:
for category in sorted(actors_by_category.keys()):
f.write(f"## {category}\n\n")
for actor in sorted(actors_by_category[category], key=lambda x: x.get("title", "")):
title = actor.get("title", actor.get("name", "Unknown"))
affiliate_url = actor.get("affiliate_url", actor.get("url", ""))
description = actor.get("description", "")
if description:
f.write(f"- **[{title}]({affiliate_url})** - {description}\n")
else:
f.write(f"- **[{title}]({affiliate_url})**\n")
f.write("\n")
# Write uncategorized actors
if uncategorized:
f.write("## Uncategorized\n\n")
for actor in sorted(uncategorized, key=lambda x: x.get("title", "")):
title = actor.get("title", actor.get("name", "Unknown"))
affiliate_url = actor.get("affiliate_url", actor.get("url", ""))
description = actor.get("description", "")
if description:
f.write(f"- **[{title}]({affiliate_url})** - {description}\n")
else:
f.write(f"- **[{title}]({affiliate_url})**\n")
f.write("\n")
f.write("---\n\n")
f.write(f"*Total: {len(actors)} Actors*\n")
f.write(f"*Last updated: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n")
print(f"Generated markdown list: {filename}")
def generate_simple_list(actors: List[Dict], filename: str = "apify_actors_simple.txt"):
"""Generate a simple text file with just names and affiliate URLs."""
with open(filename, "w", encoding="utf-8") as f:
for actor in sorted(actors, key=lambda x: x.get("title", "")):
title = actor.get("title", actor.get("name", "Unknown"))
affiliate_url = actor.get("affiliate_url", actor.get("url", ""))
f.write(f"{title}|{affiliate_url}\n")
print(f"Generated simple list: {filename}")
if __name__ == "__main__":
print("=" * 60)
print("Apify Actors Fetcher")
print("=" * 60)
print()
# Fetch all actors
actors = fetch_all_actors(limit=100)
if actors:
# Save to JSON
save_to_json(actors, "apify_actors.json")
# Generate markdown list
generate_markdown_list(actors, "APIFY_ACTORS.md")
# Generate simple list
generate_simple_list(actors, "apify_actors_simple.txt")
print("\n" + "=" * 60)
print("Done! All files have been generated.")
print("=" * 60)
else:
print("No actors were fetched. Please check the API connection.")

View File

@@ -1,213 +0,0 @@
/**
* Script to generate a comprehensive README.md with all Apify Actors organized by category
*/
const fs = require('fs');
// Read the JSON file
const actors = JSON.parse(fs.readFileSync('apify_actors.json', 'utf-8'));
console.log(`Processing ${actors.length} actors...`);
// Function to convert category name to readable format and anchor
function formatCategoryName(category) {
// Special handling for common acronyms
const acronyms = {
'AI': 'AI',
'MCP': 'MCP',
'SEO': 'SEO',
'API': 'API'
};
// Convert underscores to spaces and title case
let readable = category
.split('_')
.map(word => {
const upper = word.toUpperCase();
// Check if it's a known acronym
if (acronyms[upper]) {
return acronyms[upper];
}
// Otherwise title case
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
})
.join(' ');
// Special cases for better readability
if (readable === 'Ai') readable = 'AI';
if (readable === 'Mcp Servers') readable = 'MCP Servers';
if (readable === 'Seo Tools') readable = 'SEO Tools';
// Create anchor (lowercase, replace spaces with hyphens)
// GitHub automatically creates anchors from headers, so we match that format
const anchor = readable.toLowerCase().replace(/\s+/g, '-');
return { readable, anchor };
}
// Organize actors by category
const actorsByCategory = {};
const uncategorized = [];
for (const actor of actors) {
const categories = actor.categories || [];
if (categories.length > 0) {
for (const category of categories) {
if (!actorsByCategory[category]) {
actorsByCategory[category] = [];
}
// Avoid duplicates - check if actor already exists in this category
const exists = actorsByCategory[category].some(a =>
a.name === actor.name && a.username === actor.username
);
if (!exists) {
actorsByCategory[category].push(actor);
}
}
} else {
uncategorized.push(actor);
}
}
// Sort categories alphabetically
const sortedCategories = Object.keys(actorsByCategory).sort();
// Generate README content
let content = `# API-mega-list\n\n`;
content += `This GitHub repo is a powerhouse collection of APIs you can start using immediately to build everything from simple automations to full-scale applications. One of the most valuable API lists on GitHub—period. 💪\n\n`;
content += `## 📦 Apify Actors Collection\n\n`;
content += `This repository includes a comprehensive collection of **${actors.length.toLocaleString()} Apify Actors** (APIs) - ready-to-use web scraping and automation tools from the Apify platform.\n\n`;
content += `### What are Apify Actors?\n\n`;
content += `Apify Actors are pre-built web scraping and automation tools that can extract data from websites, automate workflows, and integrate with AI applications. Each actor is a ready-to-use API that you can run via the Apify platform.\n\n`;
content += `### 📊 Statistics\n\n`;
content += `- **Total APIs**: ${actors.length.toLocaleString()}\n`;
content += `- **Categories**: ${sortedCategories.length}\n`;
content += `- **All links include affiliate tracking** (\`?fpr=p2hrc6\`)\n\n`;
content += `### 📁 Additional Files\n\n`;
content += `- **[APIFY_ACTORS.md](APIFY_ACTORS.md)** - Complete markdown list of all actors organized by category (~8.3 MB)\n`;
content += `- **[apify_actors.json](apify_actors.json)** - Full JSON dataset with all actor details (~11.6 MB)\n`;
content += `- **[apify_actors_simple.txt](apify_actors_simple.txt)** - Simple text format with names and URLs (~996 KB)\n\n`;
content += `---\n\n`;
content += `## 📚 Complete API List by Category\n\n`;
// Add table of contents
content += `### Table of Contents\n\n`;
for (const category of sortedCategories) {
const count = actorsByCategory[category].length;
const { anchor } = formatCategoryName(category);
const { readable } = formatCategoryName(category);
content += `- [${readable}](#${anchor}) (${count.toLocaleString()} APIs)\n`;
}
if (uncategorized.length > 0) {
content += `- [Uncategorized](#uncategorized) (${uncategorized.length.toLocaleString()} APIs)\n`;
}
content += `\n`;
// Write categorized actors
for (const category of sortedCategories) {
const categoryActors = actorsByCategory[category];
const { readable, anchor } = formatCategoryName(category);
// Add horizontal rule before each section for better visibility
content += `---\n\n`;
content += `## ${readable}\n\n`;
content += `*${categoryActors.length.toLocaleString()} APIs*\n\n`;
// Sort actors by title
const sortedActors = categoryActors.sort((a, b) =>
(a.title || a.name || '').localeCompare(b.title || b.name || '')
);
for (const actor of sortedActors) {
const title = actor.title || actor.name || 'Unknown';
const affiliateUrl = actor.affiliate_url || actor.url || '';
const description = actor.description || '';
// Truncate long descriptions for readability, but cut at word boundaries
const maxDescLength = 200;
let shortDescription = description;
if (description.length > maxDescLength) {
// Find the last space before the max length to avoid cutting words
let cutPoint = maxDescLength;
const lastSpace = description.lastIndexOf(' ', maxDescLength);
if (lastSpace > maxDescLength * 0.8) { // Only use word boundary if it's not too far back
cutPoint = lastSpace;
}
shortDescription = description.substring(0, cutPoint).trim() + '...';
}
if (shortDescription) {
content += `- **[${title}](${affiliateUrl})** - ${shortDescription}\n`;
} else {
content += `- **[${title}](${affiliateUrl})**\n`;
}
}
content += `\n`;
}
// Write uncategorized actors
if (uncategorized.length > 0) {
content += `---\n\n`;
content += `## Uncategorized\n\n`;
content += `*${uncategorized.length.toLocaleString()} APIs*\n\n`;
const sortedUncategorized = uncategorized.sort((a, b) =>
(a.title || a.name || '').localeCompare(b.title || b.name || '')
);
for (const actor of sortedUncategorized) {
const title = actor.title || actor.name || 'Unknown';
const affiliateUrl = actor.affiliate_url || actor.url || '';
const description = actor.description || '';
// Truncate long descriptions for readability, but cut at word boundaries
const maxDescLength = 200;
let shortDescription = description;
if (description.length > maxDescLength) {
// Find the last space before the max length to avoid cutting words
let cutPoint = maxDescLength;
const lastSpace = description.lastIndexOf(' ', maxDescLength);
if (lastSpace > maxDescLength * 0.8) { // Only use word boundary if it's not too far back
cutPoint = lastSpace;
}
shortDescription = description.substring(0, cutPoint).trim() + '...';
}
if (shortDescription) {
content += `- **[${title}](${affiliateUrl})** - ${shortDescription}\n`;
} else {
content += `- **[${title}](${affiliateUrl})**\n`;
}
}
content += `\n`;
}
content += `---\n\n`;
content += `## 🚀 Usage\n\n`;
content += `All links in this collection include affiliate tracking. When you click on any actor link, you'll be taken to the Apify platform where you can:\n\n`;
content += `- View actor documentation\n`;
content += `- Run actors via API\n`;
content += `- Schedule automated runs\n`;
content += `- Integrate with your applications\n\n`;
content += `## 📝 Notes\n\n`;
content += `- All APIs are sorted alphabetically within their categories\n`;
content += `- Descriptions are truncated to 200 characters for readability\n`;
content += `- For full descriptions and details, visit the individual API pages\n`;
content += `- This list is automatically generated from the Apify Store API\n\n`;
content += `---\n\n`;
content += `*Last updated: ${new Date().toISOString().split('T')[0]}*\n`;
content += `*Total APIs: ${actors.length.toLocaleString()}*\n`;
// Write to README.md
fs.writeFileSync('README.md', content, 'utf-8');
console.log(`✅ README.md generated successfully!`);
console.log(` - ${sortedCategories.length} categories`);
console.log(` - ${actors.length.toLocaleString()} total APIs`);
console.log(` - ${uncategorized.length.toLocaleString()} uncategorized APIs`);