From 252fbb933b5adfbd3dee5fd6af7c7c0a67764ed4 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Fri, 20 Mar 2026 14:30:35 +0100 Subject: [PATCH 01/11] Remove mcpc dependency from apify-ultimate-scraper Replace mcpc CLI calls with zero-dependency Node.js scripts that call the Apify REST API directly. This removes the need to install @apify/mcpc. - Add fetch_actor_details.js (calls /v2/acts/ and /v2/acts/builds/) - Add search_actors.js (calls /v2/store search endpoint) - Update SKILL.md: remove mcpc prerequisite, update commands, add skip-for-simple-lookups and cost safety guidance to Step 3 - Remove mcpc error handling entry Co-Authored-By: Claude Opus 4.6 (1M context) --- skills/apify-ultimate-scraper/SKILL.md | 22 +-- .../reference/scripts/fetch_actor_details.js | 136 ++++++++++++++++++ .../reference/scripts/search_actors.js | 103 +++++++++++++ 3 files changed, 251 insertions(+), 10 deletions(-) create mode 100644 skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js create mode 100644 skills/apify-ultimate-scraper/reference/scripts/search_actors.js diff --git a/skills/apify-ultimate-scraper/SKILL.md b/skills/apify-ultimate-scraper/SKILL.md index 5c62d30..4da4583 100644 --- a/skills/apify-ultimate-scraper/SKILL.md +++ b/skills/apify-ultimate-scraper/SKILL.md @@ -12,7 +12,6 @@ AI-driven data extraction from 55+ Actors across all major platforms. This skill - `.env` file with `APIFY_TOKEN` - Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` ## Workflow @@ -21,7 +20,7 @@ Copy this checklist and track progress: ``` Task Progress: - [ ] Step 1: Understand user goal and select Actor -- [ ] Step 2: Fetch Actor schema via mcpc +- [ ] Step 2: Fetch Actor schema - [ ] Step 3: Ask user preferences (format, filename) - [ ] Step 4: Run the scraper script - [ ] Step 5: Summarize results and offer follow-ups @@ -149,35 +148,39 @@ For complex tasks, chain multiple Actors: If none of the Actors above match the user's request, search the Apify Store directly: ```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call search-actors keywords:="SEARCH_KEYWORDS" limit:=10 offset:=0 category:="" | jq -r '.content[0].text' +node ${CLAUDE_PLUGIN_ROOT}/reference/scripts/search_actors.js --query "SEARCH_KEYWORDS" ``` Replace `SEARCH_KEYWORDS` with 1-3 simple terms (e.g., "LinkedIn profiles", "Amazon products", "Twitter"). ### Step 2: Fetch Actor Schema -Fetch the Actor's input schema and details dynamically using mcpc: +Fetch the Actor's input schema and details: ```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" +node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/fetch_actor_details.js --actor "ACTOR_ID" ``` Replace `ACTOR_ID` with the selected Actor (e.g., `compass/crawler-google-places`). This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) +- Actor info (title, description, URL, categories, stats, rating) +- README summary +- Input schema (required and optional parameters) ### Step 3: Ask User Preferences -Before running, ask: +**Skip this step** for simple lookups (e.g., "what's Nike's follower count?", "find me 5 coffee shops in Prague") — just use quick answer mode and move to Step 4. + +For larger scraping tasks, ask: 1. **Output format**: - **Quick answer** - Display top few results in chat (no file saved) - **CSV** - Full export with all fields - **JSON** - Full export in JSON format 2. **Number of results**: Based on character of use case +**Cost safety**: Always set a sensible result limit in the Actor input (e.g., `maxResults`, `resultsLimit`, `maxCrawledPages`, or equivalent field from the input schema). Default to 100 results unless the user explicitly asks for more. Warn the user before running large scrapes (1000+ results) as they consume more Apify credits. + ### Step 4: Run the Script **Quick answer (display in chat, no file):** @@ -224,7 +227,6 @@ After completion, report: ## Error Handling `APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` `Actor not found` - Check Actor ID spelling `Run FAILED` - Ask user to check Apify console link in error output `Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js b/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js new file mode 100644 index 0000000..d19b242 --- /dev/null +++ b/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js @@ -0,0 +1,136 @@ +#!/usr/bin/env node +/** + * Fetch Apify Actor details: README, input schema, and description. + * + * Usage: + * node --env-file=.env scripts/fetch_actor_details.js --actor "apify/instagram-profile-scraper" + */ + +import { parseArgs } from 'node:util'; + +const USER_AGENT = 'apify-agent-skills/apify-ultimate-scraper-1.3.0'; + +function parseCliArgs() { + const options = { + actor: { type: 'string', short: 'a' }, + help: { type: 'boolean', short: 'h' }, + }; + + const { values } = parseArgs({ options, allowPositionals: false }); + + if (values.help) { + console.log(` +Fetch Apify Actor details (README, input schema, description) + +Usage: + node --env-file=.env scripts/fetch_actor_details.js --actor "ACTOR_ID" + +Options: + --actor, -a Actor ID (e.g., apify/instagram-profile-scraper) [required] + --help, -h Show this help message +`); + process.exit(0); + } + + if (!values.actor) { + console.error('Error: --actor is required'); + process.exit(1); + } + + return { actor: values.actor }; +} + +async function fetchActorInfo(token, actorId) { + const apiActorId = actorId.replace('/', '~'); + const url = `https://api.apify.com/v2/acts/${apiActorId}?token=${encodeURIComponent(token)}`; + + const response = await fetch(url, { + headers: { 'User-Agent': `${USER_AGENT}/fetch_actor_info` }, + }); + + if (response.status === 404) { + console.error(`Error: Actor '${actorId}' not found`); + process.exit(1); + } + + if (!response.ok) { + const text = await response.text(); + console.error(`Error: Failed to fetch actor info (${response.status}): ${text}`); + process.exit(1); + } + + return (await response.json()).data; +} + +async function fetchBuildDetails(token, actorId, buildId) { + const apiActorId = actorId.replace('/', '~'); + const url = `https://api.apify.com/v2/acts/${apiActorId}/builds/${buildId}?token=${encodeURIComponent(token)}`; + + const response = await fetch(url, { + headers: { 'User-Agent': `${USER_AGENT}/fetch_build` }, + }); + + if (!response.ok) { + return null; + } + + return (await response.json()).data; +} + +async function main() { + const args = parseCliArgs(); + + const token = process.env.APIFY_TOKEN; + if (!token) { + console.error('Error: APIFY_TOKEN not found in .env file'); + console.error('Add your token to .env: APIFY_TOKEN=your_token_here'); + console.error('Get your token: https://console.apify.com/account/integrations'); + process.exit(1); + } + + // Step 1: Get actor info (includes readmeSummary, taggedBuilds) + const actorInfo = await fetchActorInfo(token, args.actor); + + // Step 2: Get build details for input schema + const buildId = actorInfo.taggedBuilds?.latest?.buildId; + let inputSchema = null; + + if (buildId) { + const build = await fetchBuildDetails(token, args.actor, buildId); + if (build) { + const schemaRaw = build.inputSchema; + if (schemaRaw) { + inputSchema = typeof schemaRaw === 'string' ? JSON.parse(schemaRaw) : schemaRaw; + } + } + } + + // Compose output (matching mcpc fetch-actor-details structure) + const stats = actorInfo.stats || {}; + const output = { + actorId: args.actor, + title: actorInfo.title || null, + url: `https://apify.com/${args.actor}`, + description: actorInfo.description || null, + categories: actorInfo.categories || [], + isDeprecated: actorInfo.isDeprecated || false, + stats: { + totalUsers: stats.totalUsers || 0, + monthlyUsers: stats.totalUsers30Days || 0, + bookmarks: stats.bookmarkCount || 0, + }, + rating: { + average: stats.actorReviewRating || null, + count: stats.actorReviewCount || 0, + }, + readmeSummary: actorInfo.readmeSummary || null, + inputSchema: inputSchema || null, + }; + + console.log(JSON.stringify(output, null, 2)); +} + +main().catch((err) => { + console.error(`Error: ${err.message}`); + process.exit(1); +}); diff --git a/skills/apify-ultimate-scraper/reference/scripts/search_actors.js b/skills/apify-ultimate-scraper/reference/scripts/search_actors.js new file mode 100644 index 0000000..e96823b --- /dev/null +++ b/skills/apify-ultimate-scraper/reference/scripts/search_actors.js @@ -0,0 +1,103 @@ +#!/usr/bin/env node +/** + * Search Apify Store for Actors matching keywords. + * + * Usage: + * node --env-file=.env scripts/search_actors.js --query "instagram" + * node --env-file=.env scripts/search_actors.js --query "amazon products" --limit 5 + */ + +import { parseArgs } from 'node:util'; + +const USER_AGENT = 'apify-agent-skills/apify-ultimate-scraper-1.3.0'; + +function parseCliArgs() { + const options = { + query: { type: 'string', short: 'q' }, + limit: { type: 'string', short: 'l', default: '10' }, + help: { type: 'boolean', short: 'h' }, + }; + + const { values } = parseArgs({ options, allowPositionals: false }); + + if (values.help) { + console.log(` +Search Apify Store for Actors + +Usage: + node --env-file=.env scripts/search_actors.js --query "KEYWORDS" + +Options: + --query, -q Search keywords (e.g., "instagram", "amazon products") [required] + --limit, -l Max results to return (default: 10) + --help, -h Show this help message +`); + process.exit(0); + } + + if (!values.query) { + console.error('Error: --query is required'); + process.exit(1); + } + + return { + query: values.query, + limit: parseInt(values.limit, 10) || 10, + }; +} + +async function searchStore(query, limit) { + const params = new URLSearchParams({ search: query, limit: String(limit) }); + const url = `https://api.apify.com/v2/store?${params}`; + + const response = await fetch(url, { + headers: { 'User-Agent': `${USER_AGENT}/search_actors` }, + }); + + if (!response.ok) { + const text = await response.text(); + console.error(`Error: Store search failed (${response.status}): ${text}`); + process.exit(1); + } + + const result = await response.json(); + return result.data?.items || []; +} + +function formatResults(actors) { + if (actors.length === 0) { + console.log('No actors found.'); + return; + } + + console.log(`Found ${actors.length} actor(s):\n`); + + for (const actor of actors) { + const id = `${actor.username}/${actor.name}`; + const title = actor.title || id; + const desc = actor.description + ? actor.description.length > 120 + ? actor.description.slice(0, 120) + '...' + : actor.description + : 'No description'; + const runs = actor.stats?.totalRuns?.toLocaleString() || '0'; + const users = actor.stats?.totalUsers?.toLocaleString() || '0'; + + console.log(` ${id}`); + console.log(` Title: ${title}`); + console.log(` ${desc}`); + console.log(` Runs: ${runs} | Users: ${users}`); + console.log(); + } +} + +async function main() { + const args = parseCliArgs(); + const actors = await searchStore(args.query, args.limit); + formatResults(actors); +} + +main().catch((err) => { + console.error(`Error: ${err.message}`); + process.exit(1); +}); From 14b2c63cf350299983cbb5a21d84f98b443f5df4 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Fri, 20 Mar 2026 14:34:39 +0100 Subject: [PATCH 02/11] Update README for simplified repo structure Streamline README to reflect the three core skills (ultimate-scraper, actor-development, actorization) and point to awesome-skills for domain-specific marketing skills. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 99 ++++++++++++++----------------------------------------- 1 file changed, 25 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 0f6f606..7d7d921 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,52 @@ # Apify Agent Skills -Official Apify Agent Skills for web scraping, data extraction, and automation. Works with Claude Code, Cursor, Codex, Gemini CLI, and other AI coding assistants. +A collection of AI agent skills for web scraping, data extraction, and Actor development on the Apify platform. -## Available skills +> Looking for more specialized skills? Check out [apify/awesome-skills](https://github.com/apify/awesome-skills) — a community collection of domain-specific skills for lead generation, brand monitoring, competitor intelligence, and more. +> +> Using Cursor? See [apify/cursor-plugins](https://github.com/apify/cursor-plugins) for ready-to-install Cursor marketplace plugins. - -| Name | Description | Documentation | -|------|-------------|---------------| -| `apify-actor-development` | Develop, debug, and deploy Apify Actors - serverless cloud programs for web scraping, automation, and data processing | [SKILL.md](skills/apify-actor-development/SKILL.md) | -| `apify-actorization` | Convert existing projects into Apify Actors - serverless cloud programs. Actorize JavaScript/TypeScript (SDK with Actor.init/exit), Python (async context manager), or any language (CLI wrapper). Use when migrating code to Apify, wrapping CLI tools as Actors, or adding Actor SDK to existing projects. | [SKILL.md](skills/apify-actorization/SKILL.md) | -| `apify-audience-analysis` | Understand audience demographics, preferences, behavior patterns, and engagement quality across Facebook, Instagram, YouTube, and TikTok | [SKILL.md](skills/apify-audience-analysis/SKILL.md) | -| `apify-brand-reputation-monitoring` | Track reviews, ratings, sentiment, and brand mentions across Google Maps, Booking.com, TripAdvisor, Facebook, Instagram, YouTube, and TikTok | [SKILL.md](skills/apify-brand-reputation-monitoring/SKILL.md) | -| `apify-competitor-intelligence` | Analyze competitor strategies, content, pricing, ads, and market positioning across Google Maps, Booking.com, Facebook, Instagram, YouTube, and TikTok | [SKILL.md](skills/apify-competitor-intelligence/SKILL.md) | -| `apify-content-analytics` | Track engagement metrics, measure campaign ROI, and analyze content performance across Instagram, Facebook, YouTube, and TikTok | [SKILL.md](skills/apify-content-analytics/SKILL.md) | -| `apify-ecommerce` | Scrape e-commerce data for pricing intelligence, customer sentiment, product research, quality analysis, and supply chain monitoring across Amazon, Walmart, eBay, IKEA, and 50+ marketplaces | [SKILL.md](skills/apify-ecommerce/SKILL.md) | -| `apify-influencer-discovery` | Find and evaluate influencers for brand partnerships, verify authenticity, and track collaboration performance across Instagram, Facebook, YouTube, and TikTok | [SKILL.md](skills/apify-influencer-discovery/SKILL.md) | -| `apify-lead-generation` | Generate B2B/B2C leads by scraping Google Maps, websites, Instagram, TikTok, Facebook, LinkedIn, YouTube, and Google Search using Apify Actors | [SKILL.md](skills/apify-lead-generation/SKILL.md) | -| `apify-market-research` | Analyze market conditions, geographic opportunities, pricing, consumer behavior, and product validation across Google Maps, Facebook, Instagram, Booking.com, and TripAdvisor | [SKILL.md](skills/apify-market-research/SKILL.md) | -| `apify-trend-analysis` | Discover and track emerging trends across Google Trends, Instagram, Facebook, YouTube, and TikTok to inform content strategy | [SKILL.md](skills/apify-trend-analysis/SKILL.md) | -| `apify-ultimate-scraper` | Universal AI-powered web scraper for any platform. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Google Search, Google Trends, Booking.com, and TripAdvisor for lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, and more. | [SKILL.md](skills/apify-ultimate-scraper/SKILL.md) | - +## Skills -## Installation +### `apify-ultimate-scraper` -```bash -npx skills add apify/agent-skills -``` +Universal AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. -### Claude Code +**Use cases**: lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, e-commerce pricing, reviews. -```bash -# Add the marketplace -/plugin marketplace add https://github.com/apify/agent-skills +### `apify-actor-development` -# Install a skill -/plugin install apify-ultimate-scraper@apify-agent-skills -``` +Create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, or Python. -### Cursor / Windsurf +### `apify-actorization` -Add to your project's `.cursor/settings.json` or use the same Claude Code plugin format. +Convert existing projects into Apify Actors — supports JS/TS (SDK), Python (async context manager), and any language (CLI wrapper). -### Codex / Gemini CLI +## Usage -Point your agent to the `agents/AGENTS.md` file which contains skill descriptions and paths: +Any AI tool that supports Markdown context can use these skills by pointing to the SKILL.md files: -```bash -# Gemini CLI uses gemini-extension.json automatically -# For Codex, reference agents/AGENTS.md in your configuration -``` +- `skills/apify-ultimate-scraper/SKILL.md` +- `skills/apify-actor-development/SKILL.md` +- `skills/apify-actorization/SKILL.md` -### Other AI tools - -Any AI tool that supports Markdown context can use the skills by pointing to: -- `agents/AGENTS.md` - auto-generated skill index -- `skills/*/SKILL.md` - individual skill documentation +For Codex and Gemini CLI, use the auto-generated index at `agents/AGENTS.md`. ## Prerequisites -1. **Apify account** - [apify.com](https://apify.com) -2. **API token** - get from [Apify Console](https://console.apify.com/account/integrations), add `APIFY_TOKEN=your_token` to `.env` -3. **Node.js 20.6+** -4. **[mcpc CLI](https://github.com/apify/mcp-cli)** - `npm install -g @apify/mcpc` - -## Output formats - -- **Quick answer** - top 5 results displayed in chat (no file saved) -- **CSV** - full export with all fields -- **JSON** - full data export +1. **Apify account** — [apify.com](https://apify.com) +2. **API token** — get from [Apify Console](https://console.apify.com/account/integrations), add `APIFY_TOKEN=your_token` to `.env` +3. **Node.js 20.6+** (for the scraper skill) ## Pricing Apify Actors use pay-per-result pricing. Check individual Actor pricing on the [Apify platform](https://apify.com). -## Contributing - -1. Fork this repository. -2. Create your skill in `skills/your-skill-name/`. -3. Add `SKILL.md` with proper frontmatter: - ```yaml - --- - name: your-skill-name - description: What your skill does and when to use it - --- - ``` -4. Add entry to `.claude-plugin/marketplace.json`. -5. Run `uv run scripts/generate_agents.py` to update AGENTS.md. -6. Submit a pull request. - -## Development - -```bash -# Regenerate AGENTS.md and validate marketplace.json -uv run scripts/generate_agents.py -``` - ## Support - [Apify Documentation](https://docs.apify.com) - [Apify Discord](https://discord.gg/jyEM2PRvMU) + +## License + +[Apache-2.0](LICENSE) From d753dca1ce55192d95e48b16222c4777e40b70fc Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:14:13 +0100 Subject: [PATCH 03/11] feat: simplify repo to 3 core skills, remove mcpc and plugin infrastructure Remove domain-specific skills (audience-analysis, brand-monitoring, etc.) that duplicated the universal scraper. Strip plugin manifests, marketplace JSON, CI workflow, and version-bumping logic. Keep only apify-ultimate-scraper, apify-actor-development, and apify-actorization. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 281 ------------- .claude-plugin/plugin.json | 19 - .github/workflows/generate-agents.yml | 30 -- .gitignore | 3 - CLAUDE.md | 25 ++ agents/AGENTS.md | 18 - commands/create-actor.md | 232 ----------- gemini-extension.json | 6 - scripts/generate_agents.py | 317 +-------------- skills/apify-audience-analysis/SKILL.md | 121 ------ .../reference/scripts/run_actor.js | 363 ----------------- .../SKILL.md | 121 ------ .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-competitor-intelligence/SKILL.md | 131 ------- .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-content-analytics/SKILL.md | 120 ------ .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-ecommerce/SKILL.md | 263 ------------- .../reference/scripts/package.json | 3 - .../reference/scripts/run_actor.js | 369 ------------------ skills/apify-influencer-discovery/SKILL.md | 118 ------ .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-lead-generation/SKILL.md | 120 ------ .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-market-research/SKILL.md | 119 ------ .../reference/scripts/run_actor.js | 363 ----------------- skills/apify-trend-analysis/SKILL.md | 122 ------ .../reference/scripts/run_actor.js | 363 ----------------- .../reference/scripts/fetch_actor_details.js | 2 +- 29 files changed, 28 insertions(+), 5416 deletions(-) delete mode 100644 .claude-plugin/marketplace.json delete mode 100644 .claude-plugin/plugin.json delete mode 100644 .github/workflows/generate-agents.yml create mode 100644 CLAUDE.md delete mode 100644 commands/create-actor.md delete mode 100644 gemini-extension.json delete mode 100644 skills/apify-audience-analysis/SKILL.md delete mode 100644 skills/apify-audience-analysis/reference/scripts/run_actor.js delete mode 100644 skills/apify-brand-reputation-monitoring/SKILL.md delete mode 100644 skills/apify-brand-reputation-monitoring/reference/scripts/run_actor.js delete mode 100644 skills/apify-competitor-intelligence/SKILL.md delete mode 100644 skills/apify-competitor-intelligence/reference/scripts/run_actor.js delete mode 100644 skills/apify-content-analytics/SKILL.md delete mode 100644 skills/apify-content-analytics/reference/scripts/run_actor.js delete mode 100644 skills/apify-ecommerce/SKILL.md delete mode 100644 skills/apify-ecommerce/reference/scripts/package.json delete mode 100644 skills/apify-ecommerce/reference/scripts/run_actor.js delete mode 100644 skills/apify-influencer-discovery/SKILL.md delete mode 100644 skills/apify-influencer-discovery/reference/scripts/run_actor.js delete mode 100644 skills/apify-lead-generation/SKILL.md delete mode 100644 skills/apify-lead-generation/reference/scripts/run_actor.js delete mode 100644 skills/apify-market-research/SKILL.md delete mode 100644 skills/apify-market-research/reference/scripts/run_actor.js delete mode 100644 skills/apify-trend-analysis/SKILL.md delete mode 100644 skills/apify-trend-analysis/reference/scripts/run_actor.js diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json deleted file mode 100644 index fea1730..0000000 --- a/.claude-plugin/marketplace.json +++ /dev/null @@ -1,281 +0,0 @@ -{ - "name": "apify-agent-skills", - "owner": { - "name": "Apify", - "email": "support@apify.com" - }, - "metadata": { - "description": "Official Apify Agent Skills for web scraping, data extraction, and automation", - "version": "1.6.1" - }, - "plugins": [ - { - "name": "apify-lead-generation", - "source": "./skills/apify-lead-generation", - "skills": "./", - "description": "Generate B2B/B2C leads by scraping Google Maps, websites, Instagram, TikTok, Facebook, LinkedIn, YouTube, and Google Search using Apify Actors", - "keywords": [ - "leads", - "sales", - "prospecting", - "b2b", - "b2c", - "scraping", - "apify", - "google-maps", - "instagram", - "tiktok", - "facebook", - "linkedin", - "youtube" - ], - "category": "data-extraction", - "version": "1.1.12" - }, - { - "name": "apify-brand-reputation-monitoring", - "source": "./skills/apify-brand-reputation-monitoring", - "skills": "./", - "description": "Track reviews, ratings, sentiment, and brand mentions across Google Maps, Booking.com, TripAdvisor, Facebook, Instagram, YouTube, and TikTok", - "keywords": [ - "reputation", - "reviews", - "sentiment", - "monitoring", - "brand", - "ratings", - "google-maps", - "booking", - "tripadvisor", - "facebook", - "instagram", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.0.2" - }, - { - "name": "apify-competitor-intelligence", - "source": "./skills/apify-competitor-intelligence", - "skills": "./", - "description": "Analyze competitor strategies, content, pricing, ads, and market positioning across Google Maps, Booking.com, Facebook, Instagram, YouTube, and TikTok", - "keywords": [ - "competitor", - "intelligence", - "analysis", - "benchmarking", - "strategy", - "ads", - "google-maps", - "booking", - "facebook", - "instagram", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.1.2" - }, - { - "name": "apify-market-research", - "source": "./skills/apify-market-research", - "skills": "./", - "description": "Analyze market conditions, geographic opportunities, pricing, consumer behavior, and product validation across Google Maps, Facebook, Instagram, Booking.com, and TripAdvisor", - "keywords": [ - "market", - "research", - "analysis", - "pricing", - "geographic", - "validation", - "trends", - "google-maps", - "facebook", - "instagram", - "booking", - "tripadvisor" - ], - "category": "data-extraction", - "version": "1.0.2" - }, - { - "name": "apify-influencer-discovery", - "source": "./skills/apify-influencer-discovery", - "skills": "./", - "description": "Find and evaluate influencers for brand partnerships, verify authenticity, and track collaboration performance across Instagram, Facebook, YouTube, and TikTok", - "keywords": [ - "influencer", - "discovery", - "partnership", - "creator", - "collaboration", - "authenticity", - "instagram", - "facebook", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.0.1" - }, - { - "name": "apify-trend-analysis", - "source": "./skills/apify-trend-analysis", - "skills": "./", - "description": "Discover and track emerging trends across Google Trends, Instagram, Facebook, YouTube, and TikTok to inform content strategy", - "keywords": [ - "trends", - "analysis", - "hashtags", - "viral", - "discovery", - "content", - "google-trends", - "instagram", - "facebook", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.0.1" - }, - { - "name": "apify-content-analytics", - "source": "./skills/apify-content-analytics", - "skills": "./", - "description": "Track engagement metrics, measure campaign ROI, and analyze content performance across Instagram, Facebook, YouTube, and TikTok", - "keywords": [ - "analytics", - "engagement", - "performance", - "metrics", - "ROI", - "content", - "instagram", - "facebook", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.0.1" - }, - { - "name": "apify-audience-analysis", - "source": "./skills/apify-audience-analysis", - "skills": "./", - "description": "Understand audience demographics, preferences, behavior patterns, and engagement quality across Facebook, Instagram, YouTube, and TikTok", - "keywords": [ - "audience", - "demographics", - "behavior", - "engagement", - "analysis", - "followers", - "facebook", - "instagram", - "youtube", - "tiktok" - ], - "category": "data-extraction", - "version": "1.0.1" - }, - { - "name": "apify-ultimate-scraper", - "source": "./skills/apify-ultimate-scraper", - "skills": "./", - "description": "Universal AI-powered web scraper for any platform. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Google Search, Google Trends, Booking.com, and TripAdvisor for lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, and more", - "keywords": [ - "scraper", - "universal", - "instagram", - "facebook", - "tiktok", - "youtube", - "google-maps", - "leads", - "monitoring", - "competitor", - "trends", - "influencer" - ], - "category": "data-extraction", - "version": "1.4.1" - }, - { - "name": "apify-ecommerce", - "source": "./skills/apify-ecommerce", - "skills": "./", - "description": "Scrape e-commerce data for pricing intelligence, customer sentiment, product research, quality analysis, and supply chain monitoring across Amazon, Walmart, eBay, IKEA, and 50+ marketplaces", - "keywords": [ - "ecommerce", - "pricing", - "reviews", - "sentiment", - "products", - "sellers", - "amazon", - "walmart", - "ebay", - "MAP", - "competitor", - "research", - "supply-chain" - ], - "category": "data-extraction", - "version": "1.0.0" - }, - { - "name": "apify-actor-development", - "source": "./skills/apify-actor-development", - "skills": "./", - "description": "Develop, debug, and deploy Apify Actors - serverless cloud programs for web scraping, automation, and data processing", - "keywords": [ - "apify", - "actor", - "web-scraping", - "automation", - "crawlee", - "playwright", - "cheerio", - "serverless", - "development" - ], - "category": "development", - "version": "1.0.0" - }, - { - "name": "apify-actor-commands", - "description": "Commands for Apify Actor development workflow", - "version": "1.0.0", - "author": { - "name": "Apify", - "email": "support@apify.com" - }, - "source": "./", - "category": "development", - "commands": [ - "./commands/create-actor.md" - ] - }, - { - "name": "apify-actorization", - "source": "./skills/apify-actorization", - "skills": "./", - "description": "Convert existing projects into Apify Actors - serverless cloud programs. Actorize JavaScript/TypeScript (SDK with Actor.init/exit), Python (async context manager), or any language (CLI wrapper). Use when migrating code to Apify, wrapping CLI tools as Actors, or adding Actor SDK to existing projects.", - "keywords": [ - "actorization", - "convert", - "migrate", - "actor", - "apify", - "sdk", - "deployment", - "crawlee", - "serverless" - ], - "category": "development", - "version": "1.0.0" - } - ] -} diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json deleted file mode 100644 index 4f11558..0000000 --- a/.claude-plugin/plugin.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "apify-agent-skills", - "version": "1.6.1", - "description": "Official Apify agent skills for web scraping, data extraction, and automation", - "author": { - "name": "Apify", - "email": "support@apify.com" - }, - "homepage": "https://github.com/apify/agent-skills", - "repository": "https://github.com/apify/agent-skills", - "license": "Apache-2.0", - "keywords": [ - "apify", - "scraping", - "automation", - "leads", - "data-extraction" - ] -} diff --git a/.github/workflows/generate-agents.yml b/.github/workflows/generate-agents.yml deleted file mode 100644 index 5530fde..0000000 --- a/.github/workflows/generate-agents.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Check AGENTS.md and marketplace.json - -on: - pull_request: - paths: - - "scripts/AGENTS_TEMPLATE.md" - - "scripts/generate_agents.py" - - "**/SKILL.md" - - "agents/AGENTS.md" - - ".claude-plugin/marketplace.json" - -jobs: - validate: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up uv - uses: astral-sh/setup-uv@v4 - - - name: Generate AGENTS.md and validate marketplace.json - run: uv run scripts/generate_agents.py - - - name: Ensure AGENTS.md is up to date - run: | - git diff --quiet -- agents/AGENTS.md || { - echo "::error::agents/AGENTS.md is outdated. Run 'uv run scripts/generate_agents.py' and commit the changes." - exit 1 - } diff --git a/.gitignore b/.gitignore index 2f157f1..9663ddc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,7 @@ # DEVELOPMENT .vscode .claude -context/ -CLAUDE.md PLAN.md PLAN-*.md .mcp.json -scripts/hooks/ .idea diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2486e15 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,25 @@ +This repository contains official Apify agent skills for web scraping, data extraction, and automation. + +## Structure + +- `skills/apify-ultimate-scraper/` - Universal web scraper skill (55+ platforms) +- `skills/apify-actor-development/` - Actor development skill (JS/TS/Python) +- `skills/apify-actorization/` - Actorization skill (convert projects to Actors) +- `agents/AGENTS.md` - Auto-generated skill index for Codex/Gemini CLI +- `scripts/generate_agents.py` - Script to regenerate AGENTS.md + +## Development + +Run `uv run scripts/generate_agents.py` to regenerate `agents/AGENTS.md`. + +## Committing Changes + +Use conventional commit messages (feat:, fix:, docs:, chore:, etc.). + +## Apify MCP + +APIFY_TOKEN could be found in `.env` file + +You've got also access to Apify MCP server - it could be useful if you need to search or reach for Apify Actors and it's input schemas + +IMPORTANT: always use name of Apify Actors in format "author/Actor" (never "author~Actor") diff --git a/agents/AGENTS.md b/agents/AGENTS.md index 5de9e8b..bffa49d 100644 --- a/agents/AGENTS.md +++ b/agents/AGENTS.md @@ -5,15 +5,6 @@ You have additional SKILLs documented in directories containing a "SKILL.md" fil These skills are: - apify-actor-development -> "skills/apify-actor-development/SKILL.md" - apify-actorization -> "skills/apify-actorization/SKILL.md" - - apify-audience-analysis -> "skills/apify-audience-analysis/SKILL.md" - - apify-brand-reputation-monitoring -> "skills/apify-brand-reputation-monitoring/SKILL.md" - - apify-competitor-intelligence -> "skills/apify-competitor-intelligence/SKILL.md" - - apify-content-analytics -> "skills/apify-content-analytics/SKILL.md" - - apify-ecommerce -> "skills/apify-ecommerce/SKILL.md" - - apify-influencer-discovery -> "skills/apify-influencer-discovery/SKILL.md" - - apify-lead-generation -> "skills/apify-lead-generation/SKILL.md" - - apify-market-research -> "skills/apify-market-research/SKILL.md" - - apify-trend-analysis -> "skills/apify-trend-analysis/SKILL.md" - apify-ultimate-scraper -> "skills/apify-ultimate-scraper/SKILL.md" IMPORTANT: You MUST read the SKILL.md file whenever the description of the skills matches the user intent, or may help accomplish their task. @@ -22,15 +13,6 @@ IMPORTANT: You MUST read the SKILL.md file whenever the description of the skill apify-actor-development: `Develop, debug, and deploy Apify Actors - serverless cloud programs for web scraping, automation, and data processing. Use when creating new Actors, modifying existing ones, or troubleshooting Actor code.` apify-actorization: `Convert existing projects into Apify Actors - serverless cloud programs. Actorize JavaScript/TypeScript (SDK with Actor.init/exit), Python (async context manager), or any language (CLI wrapper). Use when migrating code to Apify, wrapping CLI tools as Actors, or adding Actor SDK to existing projects.` -apify-audience-analysis: `Understand audience demographics, preferences, behavior patterns, and engagement quality across Facebook, Instagram, YouTube, and TikTok.` -apify-brand-reputation-monitoring: `Track reviews, ratings, sentiment, and brand mentions across Google Maps, Booking.com, TripAdvisor, Facebook, Instagram, YouTube, and TikTok. Use when user asks to monitor brand reputation, analyze reviews, track mentions, or gather customer feedback.` -apify-competitor-intelligence: `Analyze competitor strategies, content, pricing, ads, and market positioning across Google Maps, Booking.com, Facebook, Instagram, YouTube, and TikTok.` -apify-content-analytics: `Track engagement metrics, measure campaign ROI, and analyze content performance across Instagram, Facebook, YouTube, and TikTok.` -apify-ecommerce: `Scrape e-commerce data for pricing intelligence, customer reviews, and seller discovery across Amazon, Walmart, eBay, IKEA, and 50+ marketplaces. Use when user asks to monitor prices, track competitors, analyze reviews, research products, or find sellers.` -apify-influencer-discovery: `Find and evaluate influencers for brand partnerships, verify authenticity, and track collaboration performance across Instagram, Facebook, YouTube, and TikTok.` -apify-lead-generation: `Generates B2B/B2C leads by scraping Google Maps, websites, Instagram, TikTok, Facebook, LinkedIn, YouTube, and Google Search. Use when user asks to find leads, prospects, businesses, build lead lists, enrich contacts, or scrape profiles for sales outreach.` -apify-market-research: `Analyze market conditions, geographic opportunities, pricing, consumer behavior, and product validation across Google Maps, Facebook, Instagram, Booking.com, and TripAdvisor.` -apify-trend-analysis: `Discover and track emerging trends across Google Trends, Instagram, Facebook, YouTube, and TikTok to inform content strategy.` apify-ultimate-scraper: `Universal AI-powered web scraper for any platform. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Google Search, Google Trends, Booking.com, and TripAdvisor. Use for lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, or any data extraction task.` diff --git a/commands/create-actor.md b/commands/create-actor.md deleted file mode 100644 index 9f886f8..0000000 --- a/commands/create-actor.md +++ /dev/null @@ -1,232 +0,0 @@ ---- -description: Guided Apify Actor development with best practices and systematic workflow -argument-hint: Optional actor description ---- - -# Actor Development - -You are helping a developer create an Apify Actor - a serverless cloud program for web scraping, automation, and data processing. Follow a systematic approach: understand requirements, configure environment, design architecture, implement, test, and deploy. - -## Core Principles - -- **Ask clarifying questions**: Identify target websites, data requirements, edge cases, and constraints before implementation -- **Follow Apify best practices**: Use appropriate crawlers (Cheerio vs Playwright), implement proper error handling, respect rate limits -- **Validate early**: Check CLI installation and authentication before starting -- **Use TodoWrite**: Track all progress throughout -- **Security first**: Use `apify/log` for censoring sensitive data, validate input, handle errors gracefully - ---- - -## Phase 1: Discovery - -**Goal**: Understand what actor needs to be built - -Initial request: $ARGUMENTS - -**Actions**: -1. Create todo list with all phases -2. Ask user for clarification if needed: - - What is the actor's primary purpose? (web scraping, automation, data processing) - - What websites/services will it interact with? - - What data should it extract or what actions should it perform? - - Any specific requirements or constraints? -3. Summarize understanding and confirm with user - ---- - -## Phase 2: Environment Setup - -**Goal**: Verify Apify CLI is installed and authenticated - -**CRITICAL**: Do not proceed without proper setup - -**Actions**: -1. Check if Apify CLI is installed: `apify --help` -2. If not installed, guide user to install: - ```bash - curl -fsSL https://apify.com/install-cli.sh | bash - # Or: brew install apify-cli (Mac) - # Or: npm install -g apify-cli - ``` -3. Verify authentication: `apify info` -4. If not logged in: - - Check for APIFY_TOKEN environment variable - - If missing, ask user to generate token at https://console.apify.com/settings/integrations - - Login with: `apify login -t $APIFY_TOKEN` - ---- - -## Phase 3: Language Selection - -**Goal**: Choose programming language and template - -**Actions**: -1. **Ask user which language they prefer:** - - JavaScript (skills/apify-actor-development/references/actor-template-js.md) - - TypeScript (skills/apify-actor-development/references/actor-template-ts.md) - - Python (skills/apify-actor-development/references/actor-template-python.md) -2. Note: Additional packages (Crawlee, Playwright, etc.) can be installed later as needed - ---- - -## Phase 4: Requirements & Architecture Design - -**Goal**: Define input/output schemas and implementation approach - -**Actions**: -1. Clarify detailed requirements: - - What input parameters should the actor accept? - - What output format is needed? (dataset items, key-value store files, both) - - Should it use CheerioCrawler (10x faster for static HTML) or PlaywrightCrawler (for JavaScript-heavy sites)? - - Concurrency settings? (HTTP: 10-50, Browser: 1-5) - - Rate limiting and retry strategies? - - Should standby mode be enabled? -2. Design architecture: - - Input schema structure - - Output/dataset schema structure - - Key-value store schema (if needed) - - Error handling approach - - Data validation and cleaning strategy -3. Present architecture to user and get approval - ---- - -## Phase 5: Actor Creation - -**Goal**: Create actor from template and configure schemas - -**DO NOT START WITHOUT USER APPROVAL** - -**Actions**: -1. Wait for explicit user approval -2. Copy appropriate language template from `skills/apify-actor-development/references/` directory -3. Update `.actor/actor.json`: - - Set actor name and version - - **IMPORTANT**: Fill in `generatedBy` property with current model name - - Configure runtime, memory, timeout - - Set `usesStandbyMode` if applicable -4. Create/update `.actor/input_schema.json` with input parameters -5. Create/update `.actor/output_schema.json` with output structure -6. Create/update `.actor/dataset_schema.json` if using datasets -7. Create/update `.actor/key_value_store_schema.json` if using key-value store -8. Update todos as you progress - -**Reference documentation:** -- [skills/apify-actor-development/references/actor-json.md](skills/apify-actor-development/references/actor-json.md) -- [skills/apify-actor-development/references/input-schema.md](skills/apify-actor-development/references/input-schema.md) -- [skills/apify-actor-development/references/output-schema.md](skills/apify-actor-development/references/output-schema.md) -- [skills/apify-actor-development/references/dataset-schema.md](skills/apify-actor-development/references/dataset-schema.md) -- [skills/apify-actor-development/references/key-value-store-schema.md](skills/apify-actor-development/references/key-value-store-schema.md) - ---- - -## Phase 6: Implementation - -**Goal**: Implement actor logic following best practices - -**Actions**: -1. Implement actor code in `src/main.py`, `src/main.js`, or `src/main.ts` -2. Follow best practices: - - ✓ Use Apify SDK (`apify`) for code running on Apify platform - - ✓ Validate input early with proper error handling - - ✓ Use CheerioCrawler for static HTML (10x faster) - - ✓ Use PlaywrightCrawler only for JavaScript-heavy sites - - ✓ Use router pattern for complex crawls - - ✓ Implement retry strategies with exponential backoff - - ✓ Use proper concurrency settings - - ✓ Clean and validate data before pushing to dataset - - ✓ **Always use `apify/log` package** - censors sensitive data - - ✓ Implement readiness probe handler if using standby mode - - ✗ Don't use browser crawlers when HTTP/Cheerio works - - ✗ Don't hard code values that should be in input schema - - ✗ Don't skip input validation or error handling - - ✗ Don't overload servers - use appropriate concurrency and delays -3. Implement standby mode readiness probe if `usesStandbyMode: true` (see [skills/apify-actor-development/references/standby-mode.md](skills/apify-actor-development/references/standby-mode.md)) -4. Use proper logging (see [skills/apify-actor-development/references/logging.md](skills/apify-actor-development/references/logging.md)) -5. Update todos as you progress - ---- - -## Phase 7: Documentation - -**Goal**: Create comprehensive README for marketplace - -**Actions**: -1. Create README.md with: - - Clear description of what the actor does - - Input parameters with examples - - Output format with examples - - Usage instructions - - Limitations and known issues - - Example runs -2. Include code examples for common use cases -3. Mention rate limits, costs, or legal considerations if applicable - ---- - -## Phase 8: Local Testing - -**Goal**: Test actor locally before deployment - -**Actions**: -1. Install dependencies: - - JavaScript/TypeScript: `npm install` - - Python: `pip install -r requirements.txt` -2. Create test input file at `storage/key_value_stores/default/INPUT.json` with sample parameters -3. Run actor locally: `apify run` -4. Verify: - - Input is parsed correctly - - Actor completes successfully - - Output is in expected format - - Error handling works - - Logging is appropriate -5. Fix any issues found -6. Test edge cases and error scenarios - ---- - -## Phase 9: Deployment - -**Goal**: Deploy actor to Apify platform - -**DO NOT DEPLOY WITHOUT USER APPROVAL** - -**Actions**: -1. **Ask user if they want to deploy now** -2. If yes, deploy with: `apify push` -3. Actor will be deployed with name from `.actor/actor.json` -4. Provide user with: - - Deployment confirmation - - Actor URL on Apify platform - - Instructions for running on platform - ---- - -## Phase 10: Summary - -**Goal**: Document what was accomplished - -**Actions**: -1. Mark all todos complete -2. Summarize: - - What actor was built - - Key features and capabilities - - Input/output schemas - - Files created/modified - - Deployment status - - Suggested next steps (testing on platform, publishing to store, monitoring) - ---- - -## Additional Resources - -**MCP Tools** (if configured): -- `search-apify-docs` - Search documentation -- `fetch-apify-docs` - Get full doc pages - -**Documentation:** -- [docs.apify.com/llms.txt](https://docs.apify.com/llms.txt) - Apify quick reference -- [docs.apify.com/llms-full.txt](https://docs.apify.com/llms-full.txt) - Apify complete docs -- [crawlee.dev/llms.txt](https://crawlee.dev/llms.txt) - Crawlee quick reference -- [crawlee.dev/llms-full.txt](https://crawlee.dev/llms-full.txt) - Crawlee complete docs -- [whitepaper.actor](https://raw.githubusercontent.com/apify/actor-whitepaper/refs/heads/master/README.md) - Complete Actor specification diff --git a/gemini-extension.json b/gemini-extension.json deleted file mode 100644 index e9daa47..0000000 --- a/gemini-extension.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "apify-agent-skills", - "description": "Provides access to Apify Agent Skills for web scraping, data extraction, and automation.", - "version": "1.0.0", - "contextFileName": "agents/AGENTS.md" -} diff --git a/scripts/generate_agents.py b/scripts/generate_agents.py index d463bf9..461ef4f 100644 --- a/scripts/generate_agents.py +++ b/scripts/generate_agents.py @@ -5,40 +5,19 @@ # /// """Generate AGENTS.md from AGENTS_TEMPLATE.md and SKILL.md frontmatter. -Also validates that marketplace.json is in sync with discovered skills, -updates the skills table in README.md, and handles version bumping. - -Version bumping (conventional commits): - - BREAKING CHANGE: or feat!: → major bump (1.0.0 → 2.0.0) - - feat: → minor bump (1.0.0 → 1.1.0) - - fix:, docs:, chore:, etc. → patch bump (1.0.0 → 1.0.1) - Usage: - uv run scripts/generate_agents.py # Just regenerate - uv run scripts/generate_agents.py --bump "feat: X" # Bump based on commit msg + uv run scripts/generate_agents.py """ from __future__ import annotations -import argparse -import json import re -import subprocess -import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent TEMPLATE_PATH = ROOT / "scripts" / "AGENTS_TEMPLATE.md" OUTPUT_PATH = ROOT / "agents" / "AGENTS.md" -MARKETPLACE_PATH = ROOT / ".claude-plugin" / "marketplace.json" -PLUGIN_PATH = ROOT / ".claude-plugin" / "plugin.json" -README_PATH = ROOT / "README.md" -SKILLS_DIR = ROOT / "skills" - -# Markers for the auto-generated skills table in README -README_TABLE_START = "" -README_TABLE_END = "" def load_template() -> str: @@ -97,286 +76,7 @@ def repl(match: re.Match[str]) -> str: return content -def load_marketplace() -> dict: - """Load marketplace.json and return parsed structure.""" - if not MARKETPLACE_PATH.exists(): - raise FileNotFoundError(f"marketplace.json not found at {MARKETPLACE_PATH}") - return json.loads(MARKETPLACE_PATH.read_text(encoding="utf-8")) - - -def generate_readme_table(skills: list[dict[str, str]]) -> str: - """Generate the skills table for README.md using marketplace.json names.""" - marketplace = load_marketplace() - plugins = {p["source"]: p for p in marketplace.get("plugins", [])} - - lines = [ - "| Name | Description | Documentation |", - "|------|-------------|---------------|", - ] - - for skill in skills: - source = f"./{skill['path']}" - plugin = plugins.get(source, {}) - name = plugin.get("name", skill["name"]) - description = plugin.get("description", skill["description"]) - doc_link = f"[SKILL.md]({skill['path']}/SKILL.md)" - lines.append(f"| `{name}` | {description} | {doc_link} |") - - return "\n".join(lines) - - -def update_readme(skills: list[dict[str, str]]) -> bool: - """ - Update the README.md skills table between markers. - Returns True if the file was updated, False if markers not found. - """ - if not README_PATH.exists(): - print(f"Warning: README.md not found at {README_PATH}", file=sys.stderr) - return False - - content = README_PATH.read_text(encoding="utf-8") - - start_idx = content.find(README_TABLE_START) - end_idx = content.find(README_TABLE_END) - - if start_idx == -1 or end_idx == -1: - print( - f"Warning: README.md markers not found. Add {README_TABLE_START} and " - f"{README_TABLE_END} to enable table generation.", - file=sys.stderr, - ) - return False - - if end_idx < start_idx: - print("Warning: README.md markers are in wrong order.", file=sys.stderr) - return False - - table = generate_readme_table(skills) - new_content = ( - content[: start_idx + len(README_TABLE_START)] - + "\n" - + table - + "\n" - + content[end_idx:] - ) - - README_PATH.write_text(new_content, encoding="utf-8") - return True - - -def validate_marketplace(skills: list[dict[str, str]]) -> list[str]: - """ - Validate marketplace.json against discovered skills. - Returns list of error messages (empty = passed). - """ - errors: list[str] = [] - marketplace = load_marketplace() - plugins = marketplace.get("plugins", []) - - # Build lookups (normalize paths: skill uses "skills/x", marketplace uses "./skills/x") - skill_by_source = {f"./{s['path']}": s for s in skills} - plugin_by_source = {p["source"]: p for p in plugins} - - # Check: every skill has a marketplace entry with matching name - for skill in skills: - expected_source = f"./{skill['path']}" - if expected_source not in plugin_by_source: - errors.append( - f"Skill '{skill['name']}' at '{skill['path']}' is missing from marketplace.json" - ) - elif plugin_by_source[expected_source]["name"] != skill["name"]: - errors.append( - f"Name mismatch at '{expected_source}': " - f"SKILL.md='{skill['name']}', marketplace.json='{plugin_by_source[expected_source]['name']}'" - ) - - # Check: every marketplace plugin with skills has a corresponding skill - for plugin in plugins: - # Skip plugins that don't have skills (e.g., commands-only plugins) - if "skills" not in plugin: - continue - if plugin["source"] not in skill_by_source: - errors.append( - f"Marketplace plugin '{plugin['name']}' at '{plugin['source']}' has no SKILL.md" - ) - - return errors - - -def parse_version(version: str) -> tuple[int, int, int]: - """Parse semver string to tuple.""" - match = re.match(r"(\d+)\.(\d+)\.(\d+)", version) - if not match: - return (1, 0, 0) - return (int(match.group(1)), int(match.group(2)), int(match.group(3))) - - -def format_version(version: tuple[int, int, int]) -> str: - """Format version tuple to string.""" - return f"{version[0]}.{version[1]}.{version[2]}" - - -def get_bump_type(commit_msg: str) -> str: - """ - Determine version bump type from conventional commit message. - Returns: 'major', 'minor', 'patch', or 'none' - """ - msg_lower = commit_msg.lower() - - # Major: BREAKING CHANGE or ! after type - if "breaking change" in msg_lower or re.match(r"^\w+!:", commit_msg): - return "major" - - # Minor: feat - if re.match(r"^feat(\(.+\))?:", commit_msg, re.IGNORECASE): - return "minor" - - # Patch: fix, docs, chore, refactor, style, test, perf, ci, build - patch_types = ["fix", "docs", "chore", "refactor", "style", "test", "perf", "ci", "build"] - for t in patch_types: - if re.match(rf"^{t}(\(.+\))?:", commit_msg, re.IGNORECASE): - return "patch" - - return "none" - - -def bump_version(version: str, bump_type: str) -> str: - """Bump version based on type.""" - major, minor, patch = parse_version(version) - - if bump_type == "major": - return format_version((major + 1, 0, 0)) - elif bump_type == "minor": - return format_version((major, minor + 1, 0)) - elif bump_type == "patch": - return format_version((major, minor, patch + 1)) - return version - - -def update_user_agent_in_skill(skill_name: str, new_version: str) -> bool: - """ - Update USER_AGENT version in skill's run_actor.py script. - Returns True if updated, False otherwise. - """ - script_path = SKILLS_DIR / skill_name / "reference" / "scripts" / "run_actor.py" - if not script_path.exists(): - return False - - content = script_path.read_text(encoding="utf-8") - - # Pattern: USER_AGENT = "apify-agent-skills/skill-name-X.Y.Z" - pattern = rf'(USER_AGENT\s*=\s*"apify-agent-skills/{re.escape(skill_name)}-)\d+\.\d+\.\d+"' - replacement = rf'\g<1>{new_version}"' - - new_content, count = re.subn(pattern, replacement, content) - - if count > 0: - script_path.write_text(new_content, encoding="utf-8") - print(f"Updated USER_AGENT in {script_path.relative_to(ROOT)}: {new_version}") - return True - - return False - - -def get_changed_skills() -> set[str]: - """Get list of skill names that have staged changes.""" - try: - result = subprocess.run( - ["git", "diff", "--cached", "--name-only"], - capture_output=True, - text=True, - cwd=ROOT, - ) - changed_files = result.stdout.strip().split("\n") if result.stdout.strip() else [] - except Exception: - return set() - - changed_skills = set() - for f in changed_files: - # Match skills/skill-name/... pattern - match = re.match(r"skills/([^/]+)/", f) - if match: - changed_skills.add(match.group(1)) - - return changed_skills - - -def update_versions(commit_msg: str) -> bool: - """ - Update versions based on commit message and changed files. - Returns True if any version was bumped. - """ - bump_type = get_bump_type(commit_msg) - if bump_type == "none": - print(f"No version bump needed for commit: {commit_msg[:50]}...") - return False - - changed_skills = get_changed_skills() - bumped = False - - # Load marketplace.json - marketplace = load_marketplace() - - # Bump individual skill versions if they changed - for plugin in marketplace.get("plugins", []): - skill_name = plugin["source"].replace("./skills/", "") - if skill_name in changed_skills: - old_version = plugin.get("version", "1.0.0") - new_version = bump_version(old_version, bump_type) - if old_version != new_version: - plugin["version"] = new_version - print(f"Bumped {skill_name}: {old_version} → {new_version} ({bump_type})") - bumped = True - # Also update USER_AGENT in skill's run_actor.py - update_user_agent_in_skill(skill_name, new_version) - - # Bump marketplace version if any skill changed - if changed_skills or bumped: - old_version = marketplace.get("metadata", {}).get("version", "1.0.0") - new_version = bump_version(old_version, bump_type) - if old_version != new_version: - if "metadata" not in marketplace: - marketplace["metadata"] = {} - marketplace["metadata"]["version"] = new_version - print(f"Bumped marketplace: {old_version} → {new_version} ({bump_type})") - bumped = True - - # Save marketplace.json - if bumped: - MARKETPLACE_PATH.write_text( - json.dumps(marketplace, indent=2) + "\n", - encoding="utf-8" - ) - - # Also bump plugin.json version - if bumped and PLUGIN_PATH.exists(): - plugin_data = json.loads(PLUGIN_PATH.read_text(encoding="utf-8")) - old_version = plugin_data.get("version", "1.0.0") - new_version = bump_version(old_version, bump_type) - if old_version != new_version: - plugin_data["version"] = new_version - PLUGIN_PATH.write_text( - json.dumps(plugin_data, indent=2) + "\n", - encoding="utf-8" - ) - print(f"Bumped plugin.json: {old_version} → {new_version}") - - return bumped - - def main() -> None: - parser = argparse.ArgumentParser(description="Generate AGENTS.md and manage versions") - parser.add_argument( - "--bump", - metavar="COMMIT_MSG", - help="Bump versions based on conventional commit message" - ) - args = parser.parse_args() - - # Handle version bumping if requested - if args.bump: - update_versions(args.bump) - template = load_template() skills = collect_skills() output = render(template, skills) @@ -384,19 +84,6 @@ def main() -> None: OUTPUT_PATH.write_text(output, encoding="utf-8") print(f"Wrote {OUTPUT_PATH} with {len(skills)} skills.") - # Validate marketplace.json - errors = validate_marketplace(skills) - if errors: - print("\nMarketplace.json validation errors:", file=sys.stderr) - for error in errors: - print(f" - {error}", file=sys.stderr) - sys.exit(1) - print("Marketplace.json validation passed.") - - # Update README.md skills table - if update_readme(skills): - print(f"Updated {README_PATH} skills table.") - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/skills/apify-audience-analysis/SKILL.md b/skills/apify-audience-analysis/SKILL.md deleted file mode 100644 index 7ce31aa..0000000 --- a/skills/apify-audience-analysis/SKILL.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -name: apify-audience-analysis -description: Understand audience demographics, preferences, behavior patterns, and engagement quality across Facebook, Instagram, YouTube, and TikTok. ---- - -# Audience Analysis - -Analyze and understand your audience using Apify Actors to extract follower demographics, engagement patterns, and behavior data from multiple platforms. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Identify audience analysis type (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the analysis script -- [ ] Step 5: Summarize findings -``` - -### Step 1: Identify Audience Analysis Type - -Select the appropriate Actor based on analysis needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Facebook follower demographics | `apify/facebook-followers-following-scraper` | FB followers/following lists | -| Facebook engagement behavior | `apify/facebook-likes-scraper` | FB post likes analysis | -| Facebook video audience | `apify/facebook-reels-scraper` | FB Reels viewers | -| Facebook comment analysis | `apify/facebook-comments-scraper` | FB post/video comments | -| Facebook content engagement | `apify/facebook-posts-scraper` | FB post engagement metrics | -| Instagram audience sizing | `apify/instagram-profile-scraper` | IG profile demographics | -| Instagram location-based | `apify/instagram-search-scraper` | IG geo-tagged audience | -| Instagram tagged network | `apify/instagram-tagged-scraper` | IG tag network analysis | -| Instagram comprehensive | `apify/instagram-scraper` | Full IG audience data | -| Instagram API-based | `apify/instagram-api-scraper` | IG API access | -| Instagram follower counts | `apify/instagram-followers-count-scraper` | IG follower tracking | -| Instagram comment export | `apify/export-instagram-comments-posts` | IG comment bulk export | -| Instagram comment analysis | `apify/instagram-comment-scraper` | IG comment sentiment | -| YouTube viewer feedback | `streamers/youtube-comments-scraper` | YT comment analysis | -| YouTube channel audience | `streamers/youtube-channel-scraper` | YT channel subscribers | -| TikTok follower demographics | `clockworks/tiktok-followers-scraper` | TT follower lists | -| TikTok profile analysis | `clockworks/tiktok-profile-scraper` | TT profile demographics | -| TikTok comment analysis | `clockworks/tiktok-comments-scraper` | TT comment engagement | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `apify/facebook-followers-following-scraper`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Findings - -After completion, report: -- Number of audience members/profiles analyzed -- File location and name -- Key demographic insights -- Suggested next steps (deeper analysis, segmentation) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-audience-analysis/reference/scripts/run_actor.js b/skills/apify-audience-analysis/reference/scripts/run_actor.js deleted file mode 100644 index 1a28392..0000000 --- a/skills/apify-audience-analysis/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-audience-analysis-1.0.1'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-brand-reputation-monitoring/SKILL.md b/skills/apify-brand-reputation-monitoring/SKILL.md deleted file mode 100644 index 5fc0ee5..0000000 --- a/skills/apify-brand-reputation-monitoring/SKILL.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -name: apify-brand-reputation-monitoring -description: Track reviews, ratings, sentiment, and brand mentions across Google Maps, Booking.com, TripAdvisor, Facebook, Instagram, YouTube, and TikTok. Use when user asks to monitor brand reputation, analyze reviews, track mentions, or gather customer feedback. ---- - -# Brand Reputation Monitoring - -Scrape reviews, ratings, and brand mentions from multiple platforms using Apify Actors. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Determine data source (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the monitoring script -- [ ] Step 5: Summarize results -``` - -### Step 1: Determine Data Source - -Select the appropriate Actor based on user needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Google Maps reviews | `compass/crawler-google-places` | Business reviews, ratings | -| Google Maps review export | `compass/Google-Maps-Reviews-Scraper` | Dedicated review scraping | -| Booking.com hotels | `voyager/booking-scraper` | Hotel data, scores | -| Booking.com reviews | `voyager/booking-reviews-scraper` | Detailed hotel reviews | -| TripAdvisor reviews | `maxcopell/tripadvisor-reviews` | Attraction/restaurant reviews | -| Facebook reviews | `apify/facebook-reviews-scraper` | Page reviews | -| Facebook comments | `apify/facebook-comments-scraper` | Post comment monitoring | -| Facebook page metrics | `apify/facebook-pages-scraper` | Page ratings overview | -| Facebook reactions | `apify/facebook-likes-scraper` | Reaction type analysis | -| Instagram comments | `apify/instagram-comment-scraper` | Comment sentiment | -| Instagram hashtags | `apify/instagram-hashtag-scraper` | Brand hashtag monitoring | -| Instagram search | `apify/instagram-search-scraper` | Brand mention discovery | -| Instagram tagged posts | `apify/instagram-tagged-scraper` | Brand tag tracking | -| Instagram export | `apify/export-instagram-comments-posts` | Bulk comment export | -| Instagram comprehensive | `apify/instagram-scraper` | Full Instagram monitoring | -| Instagram API | `apify/instagram-api-scraper` | API-based monitoring | -| YouTube comments | `streamers/youtube-comments-scraper` | Video comment sentiment | -| TikTok comments | `clockworks/tiktok-comments-scraper` | TikTok sentiment | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `compass/crawler-google-places`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Results - -After completion, report: -- Number of reviews/mentions found -- File location and name -- Key fields available -- Suggested next steps (sentiment analysis, filtering) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-brand-reputation-monitoring/reference/scripts/run_actor.js b/skills/apify-brand-reputation-monitoring/reference/scripts/run_actor.js deleted file mode 100644 index edc49c6..0000000 --- a/skills/apify-brand-reputation-monitoring/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-brand-reputation-monitoring-1.1.1'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-competitor-intelligence/SKILL.md b/skills/apify-competitor-intelligence/SKILL.md deleted file mode 100644 index eb5bdc3..0000000 --- a/skills/apify-competitor-intelligence/SKILL.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -name: apify-competitor-intelligence -description: Analyze competitor strategies, content, pricing, ads, and market positioning across Google Maps, Booking.com, Facebook, Instagram, YouTube, and TikTok. ---- - -# Competitor Intelligence - -Analyze competitors using Apify Actors to extract data from multiple platforms. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Identify competitor analysis type (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the analysis script -- [ ] Step 5: Summarize findings -``` - -### Step 1: Identify Competitor Analysis Type - -Select the appropriate Actor based on analysis needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Competitor business data | `compass/crawler-google-places` | Location analysis | -| Competitor contact discovery | `poidata/google-maps-email-extractor` | Email extraction | -| Feature benchmarking | `compass/google-maps-extractor` | Detailed business data | -| Competitor review analysis | `compass/Google-Maps-Reviews-Scraper` | Review comparison | -| Hotel competitor data | `voyager/booking-scraper` | Hotel benchmarking | -| Hotel review comparison | `voyager/booking-reviews-scraper` | Review analysis | -| Competitor ad strategies | `apify/facebook-ads-scraper` | Ad creative analysis | -| Competitor page metrics | `apify/facebook-pages-scraper` | Page performance | -| Competitor content analysis | `apify/facebook-posts-scraper` | Post strategies | -| Competitor reels performance | `apify/facebook-reels-scraper` | Reels analysis | -| Competitor audience analysis | `apify/facebook-comments-scraper` | Comment sentiment | -| Competitor event monitoring | `apify/facebook-events-scraper` | Event tracking | -| Competitor audience overlap | `apify/facebook-followers-following-scraper` | Follower analysis | -| Competitor review benchmarking | `apify/facebook-reviews-scraper` | Review comparison | -| Competitor ad monitoring | `apify/facebook-search-scraper` | Ad discovery | -| Competitor profile metrics | `apify/instagram-profile-scraper` | Profile analysis | -| Competitor content monitoring | `apify/instagram-post-scraper` | Post tracking | -| Competitor engagement analysis | `apify/instagram-comment-scraper` | Comment analysis | -| Competitor reel performance | `apify/instagram-reel-scraper` | Reel metrics | -| Competitor growth tracking | `apify/instagram-followers-count-scraper` | Follower tracking | -| Comprehensive competitor data | `apify/instagram-scraper` | Full analysis | -| API-based competitor analysis | `apify/instagram-api-scraper` | API access | -| Competitor video analysis | `streamers/youtube-scraper` | Video metrics | -| Competitor sentiment analysis | `streamers/youtube-comments-scraper` | Comment sentiment | -| Competitor channel metrics | `streamers/youtube-channel-scraper` | Channel analysis | -| TikTok competitor analysis | `clockworks/tiktok-scraper` | TikTok data | -| Competitor video strategies | `clockworks/tiktok-video-scraper` | Video analysis | -| Competitor TikTok profiles | `clockworks/tiktok-profile-scraper` | Profile data | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `compass/crawler-google-places`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Findings - -After completion, report: -- Number of competitors analyzed -- File location and name -- Key competitive insights -- Suggested next steps (deeper analysis, benchmarking) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-competitor-intelligence/reference/scripts/run_actor.js b/skills/apify-competitor-intelligence/reference/scripts/run_actor.js deleted file mode 100644 index 6f373dd..0000000 --- a/skills/apify-competitor-intelligence/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-competitor-intelligence-1.0.1'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-content-analytics/SKILL.md b/skills/apify-content-analytics/SKILL.md deleted file mode 100644 index 021eeb5..0000000 --- a/skills/apify-content-analytics/SKILL.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -name: apify-content-analytics -description: Track engagement metrics, measure campaign ROI, and analyze content performance across Instagram, Facebook, YouTube, and TikTok. ---- - -# Content Analytics - -Track and analyze content performance using Apify Actors to extract engagement metrics from multiple platforms. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Identify content analytics type (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the analytics script -- [ ] Step 5: Summarize findings -``` - -### Step 1: Identify Content Analytics Type - -Select the appropriate Actor based on analytics needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Post engagement metrics | `apify/instagram-post-scraper` | Post performance | -| Reel performance | `apify/instagram-reel-scraper` | Reel analytics | -| Follower growth tracking | `apify/instagram-followers-count-scraper` | Growth metrics | -| Comment engagement | `apify/instagram-comment-scraper` | Comment analysis | -| Hashtag performance | `apify/instagram-hashtag-scraper` | Branded hashtags | -| Mention tracking | `apify/instagram-tagged-scraper` | Tag tracking | -| Comprehensive metrics | `apify/instagram-scraper` | Full data | -| API-based analytics | `apify/instagram-api-scraper` | API access | -| Facebook post performance | `apify/facebook-posts-scraper` | Post metrics | -| Reaction analysis | `apify/facebook-likes-scraper` | Engagement types | -| Facebook Reels metrics | `apify/facebook-reels-scraper` | Reels performance | -| Ad performance tracking | `apify/facebook-ads-scraper` | Ad analytics | -| Facebook comment analysis | `apify/facebook-comments-scraper` | Comment engagement | -| Page performance audit | `apify/facebook-pages-scraper` | Page metrics | -| YouTube video metrics | `streamers/youtube-scraper` | Video performance | -| YouTube Shorts analytics | `streamers/youtube-shorts-scraper` | Shorts performance | -| TikTok content metrics | `clockworks/tiktok-scraper` | TikTok analytics | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `apify/instagram-post-scraper`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Findings - -After completion, report: -- Number of content pieces analyzed -- File location and name -- Key performance insights -- Suggested next steps (deeper analysis, content optimization) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-content-analytics/reference/scripts/run_actor.js b/skills/apify-content-analytics/reference/scripts/run_actor.js deleted file mode 100644 index 418bc07..0000000 --- a/skills/apify-content-analytics/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-content-analytics-1.0.0'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-ecommerce/SKILL.md b/skills/apify-ecommerce/SKILL.md deleted file mode 100644 index 9735163..0000000 --- a/skills/apify-ecommerce/SKILL.md +++ /dev/null @@ -1,263 +0,0 @@ ---- -name: apify-ecommerce -description: Scrape e-commerce data for pricing intelligence, customer reviews, and seller discovery across Amazon, Walmart, eBay, IKEA, and 50+ marketplaces. Use when user asks to monitor prices, track competitors, analyze reviews, research products, or find sellers. ---- - -# E-commerce Data Extraction - -Extract product data, prices, reviews, and seller information from any e-commerce platform using Apify's E-commerce Scraping Tool. - -## Prerequisites - -- `.env` file with `APIFY_TOKEN` (at `~/.claude/.env`) -- Node.js 20.6+ (for native `--env-file` support) - -## Workflow Selection - -| User Need | Workflow | Best For | -|-----------|----------|----------| -| Track prices, compare products | Workflow 1: Products & Pricing | Price monitoring, MAP compliance, competitor analysis. Add AI summary for insights. | -| Analyze reviews (sentiment or quality) | Workflow 2: Reviews | Brand perception, customer sentiment, quality issues, defect patterns | -| Find sellers across stores | Workflow 3: Sellers | Unauthorized resellers, vendor discovery via Google Shopping | - -## Progress Tracking - -``` -Task Progress: -- [ ] Step 1: Select workflow and determine data source -- [ ] Step 2: Configure Actor input -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the extraction script -- [ ] Step 5: Summarize results -``` - ---- - -## Workflow 1: Products & Pricing - -**Use case:** Extract product data, prices, and stock status. Track competitor prices, detect MAP violations, benchmark products, or research markets. - -**Best for:** Pricing analysts, product managers, market researchers. - -### Input Options - -| Input Type | Field | Description | -|------------|-------|-------------| -| Product URLs | `detailsUrls` | Direct URLs to product pages (use object format) | -| Category URLs | `listingUrls` | URLs to category/search result pages | -| Keyword Search | `keyword` + `marketplaces` | Search term across selected marketplaces | - -### Example - Product URLs -```json -{ - "detailsUrls": [ - {"url": "https://www.amazon.com/dp/B09V3KXJPB"}, - {"url": "https://www.walmart.com/ip/123456789"} - ], - "additionalProperties": true -} -``` - -### Example - Keyword Search -```json -{ - "keyword": "Samsung Galaxy S24", - "marketplaces": ["www.amazon.com", "www.walmart.com"], - "additionalProperties": true, - "maxProductResults": 50 -} -``` - -### Optional: AI Summary - -Add these fields to get AI-generated insights: - -| Field | Description | -|-------|-------------| -| `fieldsToAnalyze` | Data points to analyze: `["name", "offers", "brand", "description"]` | -| `customPrompt` | Custom analysis instructions | - -**Example with AI summary:** -```json -{ - "keyword": "robot vacuum", - "marketplaces": ["www.amazon.com"], - "maxProductResults": 50, - "additionalProperties": true, - "fieldsToAnalyze": ["name", "offers", "brand"], - "customPrompt": "Summarize price range and identify top brands" -} -``` - -### Output Fields -- `name` - Product name -- `url` - Product URL -- `offers.price` - Current price -- `offers.priceCurrency` - Currency code (may vary by seller region) -- `brand.slogan` - Brand name (nested in object) -- `image` - Product image URL -- Additional seller/stock info when `additionalProperties: true` - -> **Note:** Currency may vary in results even for US searches, as prices reflect different seller regions. - ---- - -## Workflow 2: Customer Reviews - -**Use case:** Extract reviews for sentiment analysis, brand perception monitoring, or quality issue detection. - -**Best for:** Brand managers, customer experience teams, QA teams, product managers. - -### Input Options - -| Input Type | Field | Description | -|------------|-------|-------------| -| Product URLs | `reviewListingUrls` | Product pages to extract reviews from | -| Keyword Search | `keywordReviews` + `marketplacesReviews` | Search for product reviews by keyword | - -### Example - Extract Reviews from Product -```json -{ - "reviewListingUrls": [ - {"url": "https://www.amazon.com/dp/B09V3KXJPB"} - ], - "sortReview": "Most recent", - "additionalReviewProperties": true, - "maxReviewResults": 500 -} -``` - -### Example - Keyword Search -```json -{ - "keywordReviews": "wireless earbuds", - "marketplacesReviews": ["www.amazon.com"], - "sortReview": "Most recent", - "additionalReviewProperties": true, - "maxReviewResults": 200 -} -``` - -### Sort Options -- `Most recent` - Latest reviews first (recommended) -- `Most relevant` - Platform default relevance -- `Most helpful` - Highest voted reviews -- `Highest rated` - 5-star reviews first -- `Lowest rated` - 1-star reviews first - -> **Note:** The `sortReview: "Lowest rated"` option may not work consistently across all marketplaces. For quality analysis, collect a large sample and filter by rating in post-processing. - -### Quality Analysis Tips -- Set high `maxReviewResults` for statistical significance -- Look for recurring keywords: "broke", "defect", "quality", "returned" -- Filter results by rating if sorting doesn't work as expected -- Cross-reference with competitor products for benchmarking - ---- - -## Workflow 3: Seller Intelligence - -**Use case:** Find sellers across stores, discover unauthorized resellers, evaluate vendor options. - -**Best for:** Brand protection teams, procurement, supply chain managers. - -> **Note:** This workflow uses Google Shopping to find sellers across stores. Direct seller profile URLs are not reliably supported. - -### Input Configuration -```json -{ - "googleShoppingSearchKeyword": "Nike Air Max 90", - "scrapeSellersFromGoogleShopping": true, - "countryCode": "us", - "maxGoogleShoppingSellersPerProduct": 20, - "maxGoogleShoppingResults": 100 -} -``` - -### Options -| Field | Description | -|-------|-------------| -| `googleShoppingSearchKeyword` | Product name to search | -| `scrapeSellersFromGoogleShopping` | Set to `true` to extract sellers | -| `scrapeProductsFromGoogleShopping` | Set to `true` to also extract product details | -| `countryCode` | Target country (e.g., `us`, `uk`, `de`) | -| `maxGoogleShoppingSellersPerProduct` | Max sellers per product | -| `maxGoogleShoppingResults` | Total result limit | - ---- - -## Supported Marketplaces - -### Amazon (20+ regions) -`www.amazon.com`, `www.amazon.co.uk`, `www.amazon.de`, `www.amazon.fr`, `www.amazon.it`, `www.amazon.es`, `www.amazon.ca`, `www.amazon.com.au`, `www.amazon.co.jp`, `www.amazon.in`, `www.amazon.com.br`, `www.amazon.com.mx`, `www.amazon.nl`, `www.amazon.pl`, `www.amazon.se`, `www.amazon.ae`, `www.amazon.sa`, `www.amazon.sg`, `www.amazon.com.tr`, `www.amazon.eg` - -### Major US Retailers -`www.walmart.com`, `www.costco.com`, `www.costco.ca`, `www.homedepot.com` - -### European Retailers -`allegro.pl`, `allegro.cz`, `allegro.sk`, `www.alza.cz`, `www.alza.sk`, `www.alza.de`, `www.alza.at`, `www.alza.hu`, `www.kaufland.de`, `www.kaufland.pl`, `www.kaufland.cz`, `www.kaufland.sk`, `www.kaufland.at`, `www.kaufland.fr`, `www.kaufland.it`, `www.cdiscount.com` - -### IKEA (40+ country/language combinations) -Supports all major IKEA regional sites with multiple language options. - -### Google Shopping -Use for seller discovery across multiple stores. - ---- - -## Running the Extraction - -### Step 1: Set Skill Path -```bash -SKILL_PATH=~/.claude/skills/apify-ecommerce -``` - -### Step 2: Run Script - -**Quick answer (display in chat):** -```bash -node --env-file=~/.claude/.env $SKILL_PATH/reference/scripts/run_actor.js \ - --actor "apify/e-commerce-scraping-tool" \ - --input 'JSON_INPUT' -``` - -**CSV export:** -```bash -node --env-file=~/.claude/.env $SKILL_PATH/reference/scripts/run_actor.js \ - --actor "apify/e-commerce-scraping-tool" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_filename.csv \ - --format csv -``` - -**JSON export:** -```bash -node --env-file=~/.claude/.env $SKILL_PATH/reference/scripts/run_actor.js \ - --actor "apify/e-commerce-scraping-tool" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_filename.json \ - --format json -``` - -### Step 3: Summarize Results - -Report: -- Number of items extracted -- File location (if exported) -- Key insights based on workflow: - - **Products:** Price range, outliers, MAP violations - - **Reviews:** Average rating, sentiment trends, quality issues - - **Sellers:** Seller count, unauthorized sellers found - ---- - -## Error Handling - -| Error | Solution | -|-------|----------| -| `APIFY_TOKEN not found` | Ensure `~/.claude/.env` contains `APIFY_TOKEN=your_token` | -| `Actor not found` | Verify Actor ID: `apify/e-commerce-scraping-tool` | -| `Run FAILED` | Check Apify console link in error output | -| `Timeout` | Reduce `maxProductResults` or increase `--timeout` | -| `No results` | Verify URLs are valid and accessible | -| `Invalid marketplace` | Check marketplace value matches supported list exactly | diff --git a/skills/apify-ecommerce/reference/scripts/package.json b/skills/apify-ecommerce/reference/scripts/package.json deleted file mode 100644 index 3dbc1ca..0000000 --- a/skills/apify-ecommerce/reference/scripts/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "module" -} diff --git a/skills/apify-ecommerce/reference/scripts/run_actor.js b/skills/apify-ecommerce/reference/scripts/run_actor.js deleted file mode 100644 index 9c67d2e..0000000 --- a/skills/apify-ecommerce/reference/scripts/run_actor.js +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output data.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-ecommerce-1.0.0'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., apify/e-commerce-scraping-tool) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 products - node --env-file=.env scripts/run_actor.js \\ - --actor "apify/e-commerce-scraping-tool" \\ - --input '{"keyword": "bluetooth headphones", "marketplaces": ["www.amazon.com"], "maxProductResults": 10}' - - # Export prices to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "apify/e-commerce-scraping-tool" \\ - --input '{"detailsUrls": ["https://amazon.com/dp/B09V3KXJPB"]}' \\ - --output prices.csv --format csv - - # Export reviews to JSON - node --env-file=.env scripts/run_actor.js \\ - --actor "apify/e-commerce-scraping-tool" \\ - --input '{"reviewListingUrls": ["https://amazon.com/dp/B09V3KXJPB"], "maxReviewResults": 100}' \\ - --output reviews.json --format json -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-influencer-discovery/SKILL.md b/skills/apify-influencer-discovery/SKILL.md deleted file mode 100644 index 12404a0..0000000 --- a/skills/apify-influencer-discovery/SKILL.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -name: apify-influencer-discovery -description: Find and evaluate influencers for brand partnerships, verify authenticity, and track collaboration performance across Instagram, Facebook, YouTube, and TikTok. ---- - -# Influencer Discovery - -Discover and analyze influencers across multiple platforms using Apify Actors. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Determine discovery source (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the discovery script -- [ ] Step 5: Summarize results -``` - -### Step 1: Determine Discovery Source - -Select the appropriate Actor based on user needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Influencer profiles | `apify/instagram-profile-scraper` | Profile metrics, bio, follower counts | -| Find by hashtag | `apify/instagram-hashtag-scraper` | Discover influencers using specific hashtags | -| Reel engagement | `apify/instagram-reel-scraper` | Analyze reel performance and engagement | -| Discovery by niche | `apify/instagram-search-scraper` | Search for influencers by keyword/niche | -| Brand mentions | `apify/instagram-tagged-scraper` | Track who tags brands/products | -| Comprehensive data | `apify/instagram-scraper` | Full profile, posts, comments analysis | -| API-based discovery | `apify/instagram-api-scraper` | Fast API-based data extraction | -| Engagement analysis | `apify/export-instagram-comments-posts` | Export comments for sentiment analysis | -| Facebook content | `apify/facebook-posts-scraper` | Analyze Facebook post performance | -| Micro-influencers | `apify/facebook-groups-scraper` | Find influencers in niche groups | -| Influential pages | `apify/facebook-search-scraper` | Search for influential pages | -| YouTube creators | `streamers/youtube-channel-scraper` | Channel metrics and subscriber data | -| TikTok influencers | `clockworks/tiktok-scraper` | Comprehensive TikTok data extraction | -| TikTok (free) | `clockworks/free-tiktok-scraper` | Free TikTok data extractor | -| Live streamers | `clockworks/tiktok-live-scraper` | Discover live streaming influencers | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `apify/instagram-profile-scraper`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Results - -After completion, report: -- Number of influencers found -- File location and name -- Key metrics available (followers, engagement rate, etc.) -- Suggested next steps (filtering, outreach, deeper analysis) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-influencer-discovery/reference/scripts/run_actor.js b/skills/apify-influencer-discovery/reference/scripts/run_actor.js deleted file mode 100644 index e600ded..0000000 --- a/skills/apify-influencer-discovery/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-influencer-discovery-1.0.0'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-lead-generation/SKILL.md b/skills/apify-lead-generation/SKILL.md deleted file mode 100644 index 45e1d40..0000000 --- a/skills/apify-lead-generation/SKILL.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -name: apify-lead-generation -description: Generates B2B/B2C leads by scraping Google Maps, websites, Instagram, TikTok, Facebook, LinkedIn, YouTube, and Google Search. Use when user asks to find leads, prospects, businesses, build lead lists, enrich contacts, or scrape profiles for sales outreach. ---- - -# Lead Generation - -Scrape leads from multiple platforms using Apify Actors. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Determine lead source (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the lead finder script -- [ ] Step 5: Summarize results -``` - -### Step 1: Determine Lead Source - -Select the appropriate Actor based on user needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Local businesses | `compass/crawler-google-places` | Restaurants, gyms, shops | -| Contact enrichment | `vdrmota/contact-info-scraper` | Emails, phones from URLs | -| Instagram profiles | `apify/instagram-profile-scraper` | Influencer discovery | -| Instagram posts/comments | `apify/instagram-scraper` | Posts, comments, hashtags, places | -| Instagram search | `apify/instagram-search-scraper` | Places, users, hashtags discovery | -| TikTok videos/hashtags | `clockworks/tiktok-scraper` | Comprehensive TikTok data extraction | -| TikTok hashtags/profiles | `clockworks/free-tiktok-scraper` | Free TikTok data extractor | -| TikTok user search | `clockworks/tiktok-user-search-scraper` | Find users by keywords | -| TikTok profiles | `clockworks/tiktok-profile-scraper` | Creator outreach | -| TikTok followers/following | `clockworks/tiktok-followers-scraper` | Audience analysis, segmentation | -| Facebook pages | `apify/facebook-pages-scraper` | Business contacts | -| Facebook page contacts | `apify/facebook-page-contact-information` | Extract emails, phones, addresses | -| Facebook groups | `apify/facebook-groups-scraper` | Buying intent signals | -| Facebook events | `apify/facebook-events-scraper` | Event networking, partnerships | -| Google Search | `apify/google-search-scraper` | Broad lead discovery | -| YouTube channels | `streamers/youtube-scraper` | Creator partnerships | -| Google Maps emails | `poidata/google-maps-email-extractor` | Direct email extraction | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `compass/crawler-google-places`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Results - -After completion, report: -- Number of leads found -- File location and name -- Key fields available -- Suggested next steps (filtering, enrichment) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-lead-generation/reference/scripts/run_actor.js b/skills/apify-lead-generation/reference/scripts/run_actor.js deleted file mode 100644 index 6cd4acc..0000000 --- a/skills/apify-lead-generation/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-lead-generation-1.1.11'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-market-research/SKILL.md b/skills/apify-market-research/SKILL.md deleted file mode 100644 index 95e926b..0000000 --- a/skills/apify-market-research/SKILL.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -name: apify-market-research -description: Analyze market conditions, geographic opportunities, pricing, consumer behavior, and product validation across Google Maps, Facebook, Instagram, Booking.com, and TripAdvisor. ---- - -# Market Research - -Conduct market research using Apify Actors to extract data from multiple platforms. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Identify market research type (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the analysis script -- [ ] Step 5: Summarize findings -``` - -### Step 1: Identify Market Research Type - -Select the appropriate Actor based on research needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Market density | `compass/crawler-google-places` | Location analysis | -| Geospatial analysis | `compass/google-maps-extractor` | Business mapping | -| Regional interest | `apify/google-trends-scraper` | Trend data | -| Pricing and demand | `apify/facebook-marketplace-scraper` | Market pricing | -| Event market | `apify/facebook-events-scraper` | Event analysis | -| Consumer needs | `apify/facebook-groups-scraper` | Group research | -| Market landscape | `apify/facebook-pages-scraper` | Business pages | -| Business density | `apify/facebook-page-contact-information` | Contact data | -| Cultural insights | `apify/facebook-photos-scraper` | Visual research | -| Niche targeting | `apify/instagram-hashtag-scraper` | Hashtag research | -| Hashtag stats | `apify/instagram-hashtag-stats` | Market sizing | -| Market activity | `apify/instagram-reel-scraper` | Activity analysis | -| Market intelligence | `apify/instagram-scraper` | Full data | -| Product launch research | `apify/instagram-api-scraper` | API access | -| Hospitality market | `voyager/booking-scraper` | Hotel data | -| Tourism insights | `maxcopell/tripadvisor-reviews` | Review analysis | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `compass/crawler-google-places`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Findings - -After completion, report: -- Number of results found -- File location and name -- Key market insights -- Suggested next steps (deeper analysis, validation) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-market-research/reference/scripts/run_actor.js b/skills/apify-market-research/reference/scripts/run_actor.js deleted file mode 100644 index 7a0a904..0000000 --- a/skills/apify-market-research/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-market-research-1.0.0'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-trend-analysis/SKILL.md b/skills/apify-trend-analysis/SKILL.md deleted file mode 100644 index 7692cde..0000000 --- a/skills/apify-trend-analysis/SKILL.md +++ /dev/null @@ -1,122 +0,0 @@ ---- -name: apify-trend-analysis -description: Discover and track emerging trends across Google Trends, Instagram, Facebook, YouTube, and TikTok to inform content strategy. ---- - -# Trend Analysis - -Discover and track emerging trends using Apify Actors to extract data from multiple platforms. - -## Prerequisites -(No need to check it upfront) - -- `.env` file with `APIFY_TOKEN` -- Node.js 20.6+ (for native `--env-file` support) -- `mcpc` CLI tool: `npm install -g @apify/mcpc` - -## Workflow - -Copy this checklist and track progress: - -``` -Task Progress: -- [ ] Step 1: Identify trend type (select Actor) -- [ ] Step 2: Fetch Actor schema via mcpc -- [ ] Step 3: Ask user preferences (format, filename) -- [ ] Step 4: Run the analysis script -- [ ] Step 5: Summarize findings -``` - -### Step 1: Identify Trend Type - -Select the appropriate Actor based on research needs: - -| User Need | Actor ID | Best For | -|-----------|----------|----------| -| Search trends | `apify/google-trends-scraper` | Google Trends data | -| Hashtag tracking | `apify/instagram-hashtag-scraper` | Hashtag content | -| Hashtag metrics | `apify/instagram-hashtag-stats` | Performance stats | -| Visual trends | `apify/instagram-post-scraper` | Post analysis | -| Trending discovery | `apify/instagram-search-scraper` | Search trends | -| Comprehensive tracking | `apify/instagram-scraper` | Full data | -| API-based trends | `apify/instagram-api-scraper` | API access | -| Engagement trends | `apify/export-instagram-comments-posts` | Comment tracking | -| Product trends | `apify/facebook-marketplace-scraper` | Marketplace data | -| Visual analysis | `apify/facebook-photos-scraper` | Photo trends | -| Community trends | `apify/facebook-groups-scraper` | Group monitoring | -| YouTube Shorts | `streamers/youtube-shorts-scraper` | Short-form trends | -| YouTube hashtags | `streamers/youtube-video-scraper-by-hashtag` | Hashtag videos | -| TikTok hashtags | `clockworks/tiktok-hashtag-scraper` | Hashtag content | -| Trending sounds | `clockworks/tiktok-sound-scraper` | Audio trends | -| TikTok ads | `clockworks/tiktok-ads-scraper` | Ad trends | -| Discover page | `clockworks/tiktok-discover-scraper` | Discover trends | -| Explore trends | `clockworks/tiktok-explore-scraper` | Explore content | -| Trending content | `clockworks/tiktok-trends-scraper` | Viral content | - -### Step 2: Fetch Actor Schema - -Fetch the Actor's input schema and details dynamically using mcpc: - -```bash -export $(grep APIFY_TOKEN .env | xargs) && mcpc --json mcp.apify.com --header "Authorization: Bearer $APIFY_TOKEN" tools-call fetch-actor-details actor:="ACTOR_ID" | jq -r ".content" -``` - -Replace `ACTOR_ID` with the selected Actor (e.g., `apify/google-trends-scraper`). - -This returns: -- Actor description and README -- Required and optional input parameters -- Output fields (if available) - -### Step 3: Ask User Preferences - -Before running, ask: -1. **Output format**: - - **Quick answer** - Display top few results in chat (no file saved) - - **CSV** - Full export with all fields - - **JSON** - Full export in JSON format -2. **Number of results**: Based on character of use case - -### Step 4: Run the Script - -**Quick answer (display in chat, no file):** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' -``` - -**CSV:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.csv \ - --format csv -``` - -**JSON:** -```bash -node --env-file=.env ${CLAUDE_PLUGIN_ROOT}/reference/scripts/run_actor.js \ - --actor "ACTOR_ID" \ - --input 'JSON_INPUT' \ - --output YYYY-MM-DD_OUTPUT_FILE.json \ - --format json -``` - -### Step 5: Summarize Findings - -After completion, report: -- Number of results found -- File location and name -- Key trend insights -- Suggested next steps (deeper analysis, content opportunities) - - -## Error Handling - -`APIFY_TOKEN not found` - Ask user to create `.env` with `APIFY_TOKEN=your_token` -`mcpc not found` - Ask user to install `npm install -g @apify/mcpc` -`Actor not found` - Check Actor ID spelling -`Run FAILED` - Ask user to check Apify console link in error output -`Timeout` - Reduce input size or increase `--timeout` diff --git a/skills/apify-trend-analysis/reference/scripts/run_actor.js b/skills/apify-trend-analysis/reference/scripts/run_actor.js deleted file mode 100644 index 5512427..0000000 --- a/skills/apify-trend-analysis/reference/scripts/run_actor.js +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/env node -/** - * Apify Actor Runner - Runs Apify actors and exports results. - * - * Usage: - * # Quick answer (display in chat, no file saved) - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - * - * # Export to file - * node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' --output leads.csv --format csv - */ - -import { parseArgs } from 'node:util'; -import { writeFileSync, statSync } from 'node:fs'; - -// User-Agent for tracking skill usage in Apify analytics -const USER_AGENT = 'apify-agent-skills/apify-trend-analysis-1.0.0'; - -// Parse command-line arguments -function parseCliArgs() { - const options = { - actor: { type: 'string', short: 'a' }, - input: { type: 'string', short: 'i' }, - output: { type: 'string', short: 'o' }, - format: { type: 'string', short: 'f', default: 'csv' }, - timeout: { type: 'string', short: 't', default: '600' }, - 'poll-interval': { type: 'string', default: '5' }, - help: { type: 'boolean', short: 'h' }, - }; - - const { values } = parseArgs({ options, allowPositionals: false }); - - if (values.help) { - printHelp(); - process.exit(0); - } - - if (!values.actor) { - console.error('Error: --actor is required'); - printHelp(); - process.exit(1); - } - - if (!values.input) { - console.error('Error: --input is required'); - printHelp(); - process.exit(1); - } - - return { - actor: values.actor, - input: values.input, - output: values.output, - format: values.format || 'csv', - timeout: parseInt(values.timeout, 10), - pollInterval: parseInt(values['poll-interval'], 10), - }; -} - -function printHelp() { - console.log(` -Apify Actor Runner - Run Apify actors and export results - -Usage: - node --env-file=.env scripts/run_actor.js --actor ACTOR_ID --input '{}' - -Options: - --actor, -a Actor ID (e.g., compass/crawler-google-places) [required] - --input, -i Actor input as JSON string [required] - --output, -o Output file path (optional - if not provided, displays quick answer) - --format, -f Output format: csv, json (default: csv) - --timeout, -t Max wait time in seconds (default: 600) - --poll-interval Seconds between status checks (default: 5) - --help, -h Show this help message - -Output Formats: - JSON (all data) --output file.json --format json - CSV (all data) --output file.csv --format csv - Quick answer (no --output) - displays top 5 in chat - -Examples: - # Quick answer - display top 5 in chat - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' - - # Export all data to CSV - node --env-file=.env scripts/run_actor.js \\ - --actor "compass/crawler-google-places" \\ - --input '{"searchStringsArray": ["coffee shops"], "locationQuery": "Seattle, USA"}' \\ - --output leads.csv --format csv -`); -} - -// Start an actor run and return { runId, datasetId } -async function startActor(token, actorId, inputJson) { - // Convert "author/actor" format to "author~actor" for API compatibility - const apiActorId = actorId.replace('/', '~'); - const url = `https://api.apify.com/v2/acts/${apiActorId}/runs?token=${encodeURIComponent(token)}`; - - let data; - try { - data = JSON.parse(inputJson); - } catch (e) { - console.error(`Error: Invalid JSON input: ${e.message}`); - process.exit(1); - } - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': `${USER_AGENT}/start_actor`, - }, - body: JSON.stringify(data), - }); - - if (response.status === 404) { - console.error(`Error: Actor '${actorId}' not found`); - process.exit(1); - } - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: API request failed (${response.status}): ${text}`); - process.exit(1); - } - - const result = await response.json(); - return { - runId: result.data.id, - datasetId: result.data.defaultDatasetId, - }; -} - -// Poll run status until complete or timeout -async function pollUntilComplete(token, runId, timeout, interval) { - const url = `https://api.apify.com/v2/actor-runs/${runId}?token=${encodeURIComponent(token)}`; - const startTime = Date.now(); - let lastStatus = null; - - while (true) { - const response = await fetch(url); - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to get run status: ${text}`); - process.exit(1); - } - - const result = await response.json(); - const status = result.data.status; - - // Only print when status changes - if (status !== lastStatus) { - console.log(`Status: ${status}`); - lastStatus = status; - } - - if (['SUCCEEDED', 'FAILED', 'ABORTED', 'TIMED-OUT'].includes(status)) { - return status; - } - - const elapsed = (Date.now() - startTime) / 1000; - if (elapsed > timeout) { - console.error(`Warning: Timeout after ${timeout}s, actor still running`); - return 'TIMED-OUT'; - } - - await sleep(interval * 1000); - } -} - -// Download dataset items -async function downloadResults(token, datasetId, outputPath, format) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/download_${format}`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - - if (format === 'json') { - writeFileSync(outputPath, JSON.stringify(data, null, 2)); - } else { - // CSV output - if (data.length > 0) { - const fieldnames = Object.keys(data[0]); - const csvLines = [fieldnames.join(',')]; - - for (const row of data) { - const values = fieldnames.map((key) => { - let value = row[key]; - - // Truncate long text fields - if (typeof value === 'string' && value.length > 200) { - value = value.slice(0, 200) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - value = JSON.stringify(value) || ''; - } - - // CSV escape: wrap in quotes if contains comma, quote, or newline - if (value === null || value === undefined) { - return ''; - } - const strValue = String(value); - if (strValue.includes(',') || strValue.includes('"') || strValue.includes('\n')) { - return `"${strValue.replace(/"/g, '""')}"`; - } - return strValue; - }); - csvLines.push(values.join(',')); - } - - writeFileSync(outputPath, csvLines.join('\n')); - } else { - writeFileSync(outputPath, ''); - } - } - - console.log(`Saved to: ${outputPath}`); -} - -// Display top 5 results in chat format -async function displayQuickAnswer(token, datasetId) { - const url = `https://api.apify.com/v2/datasets/${datasetId}/items?token=${encodeURIComponent(token)}&format=json`; - - const response = await fetch(url, { - headers: { - 'User-Agent': `${USER_AGENT}/quick_answer`, - }, - }); - - if (!response.ok) { - const text = await response.text(); - console.error(`Error: Failed to download results: ${text}`); - process.exit(1); - } - - const data = await response.json(); - const total = data.length; - - if (total === 0) { - console.log('\nNo results found.'); - return; - } - - // Display top 5 - console.log(`\n${'='.repeat(60)}`); - console.log(`TOP 5 RESULTS (of ${total} total)`); - console.log('='.repeat(60)); - - for (let i = 0; i < Math.min(5, data.length); i++) { - const item = data[i]; - console.log(`\n--- Result ${i + 1} ---`); - - for (const [key, value] of Object.entries(item)) { - let displayValue = value; - - // Truncate long values - if (typeof value === 'string' && value.length > 100) { - displayValue = value.slice(0, 100) + '...'; - } else if (Array.isArray(value) || (typeof value === 'object' && value !== null)) { - const jsonStr = JSON.stringify(value); - displayValue = jsonStr.length > 100 ? jsonStr.slice(0, 100) + '...' : jsonStr; - } - - console.log(` ${key}: ${displayValue}`); - } - } - - console.log(`\n${'='.repeat(60)}`); - if (total > 5) { - console.log(`Showing 5 of ${total} results.`); - } - console.log(`Full data available at: https://console.apify.com/storage/datasets/${datasetId}`); - console.log('='.repeat(60)); -} - -// Report summary of downloaded data -function reportSummary(outputPath, format) { - const stats = statSync(outputPath); - const size = stats.size; - - let count; - try { - const content = require('fs').readFileSync(outputPath, 'utf-8'); - if (format === 'json') { - const data = JSON.parse(content); - count = Array.isArray(data) ? data.length : 1; - } else { - // CSV - count lines minus header - const lines = content.split('\n').filter((line) => line.trim()); - count = Math.max(0, lines.length - 1); - } - } catch { - count = 'unknown'; - } - - console.log(`Records: ${count}`); - console.log(`Size: ${size.toLocaleString()} bytes`); -} - -// Helper: sleep for ms -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// Main function -async function main() { - // Parse args first so --help works without token - const args = parseCliArgs(); - - // Check for APIFY_TOKEN - const token = process.env.APIFY_TOKEN; - if (!token) { - console.error('Error: APIFY_TOKEN not found in .env file'); - console.error(''); - console.error('Add your token to .env file:'); - console.error(' APIFY_TOKEN=your_token_here'); - console.error(''); - console.error('Get your token: https://console.apify.com/account/integrations'); - process.exit(1); - } - - // Start the actor run - console.log(`Starting actor: ${args.actor}`); - const { runId, datasetId } = await startActor(token, args.actor, args.input); - console.log(`Run ID: ${runId}`); - console.log(`Dataset ID: ${datasetId}`); - - // Poll for completion - const status = await pollUntilComplete(token, runId, args.timeout, args.pollInterval); - - if (status !== 'SUCCEEDED') { - console.error(`Error: Actor run ${status}`); - console.error(`Details: https://console.apify.com/actors/runs/${runId}`); - process.exit(1); - } - - // Determine output mode - if (args.output) { - // File output mode - await downloadResults(token, datasetId, args.output, args.format); - reportSummary(args.output, args.format); - } else { - // Quick answer mode - display in chat - await displayQuickAnswer(token, datasetId); - } -} - -main().catch((err) => { - console.error(`Error: ${err.message}`); - process.exit(1); -}); diff --git a/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js b/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js index d19b242..3d0a1a0 100644 --- a/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js +++ b/skills/apify-ultimate-scraper/reference/scripts/fetch_actor_details.js @@ -105,7 +105,7 @@ async function main() { } } - // Compose output (matching mcpc fetch-actor-details structure) + // Compose output const stats = actorInfo.stats || {}; const output = { actorId: args.actor, From 55d6aceb65f6db616a251ecaddcd9f09d5d14bf9 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:20:47 +0100 Subject: [PATCH 04/11] docs: restore Installation section and add Output formats to README Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7d7d921..799925d 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,6 @@ A collection of AI agent skills for web scraping, data extraction, and Actor development on the Apify platform. > Looking for more specialized skills? Check out [apify/awesome-skills](https://github.com/apify/awesome-skills) — a community collection of domain-specific skills for lead generation, brand monitoring, competitor intelligence, and more. -> -> Using Cursor? See [apify/cursor-plugins](https://github.com/apify/cursor-plugins) for ready-to-install Cursor marketplace plugins. ## Skills @@ -22,15 +20,40 @@ Create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, o Convert existing projects into Apify Actors — supports JS/TS (SDK), Python (async context manager), and any language (CLI wrapper). -## Usage +## Installation -Any AI tool that supports Markdown context can use these skills by pointing to the SKILL.md files: +```bash +npx skills add apify/agent-skills +``` -- `skills/apify-ultimate-scraper/SKILL.md` -- `skills/apify-actor-development/SKILL.md` -- `skills/apify-actorization/SKILL.md` +### Claude Code -For Codex and Gemini CLI, use the auto-generated index at `agents/AGENTS.md`. +```bash +# Add the marketplace +/plugin marketplace add https://github.com/apify/agent-skills + +# Install a skill +/plugin install apify-ultimate-scraper@apify-agent-skills +``` + +### Cursor / Windsurf + +Add to your project's `.cursor/settings.json` or use the same Claude Code plugin format. + +### Codex / Gemini CLI + +Point your agent to the `agents/AGENTS.md` file which contains skill descriptions and paths: + +```bash +# Gemini CLI uses gemini-extension.json automatically +# For Codex, reference agents/AGENTS.md in your configuration +``` + +### Other AI tools + +Any AI tool that supports Markdown context can use the skills by pointing to: +- `agents/AGENTS.md` - auto-generated skill index +- `skills/*/SKILL.md` - individual skill documentation ## Prerequisites @@ -38,6 +61,12 @@ For Codex and Gemini CLI, use the auto-generated index at `agents/AGENTS.md`. 2. **API token** — get from [Apify Console](https://console.apify.com/account/integrations), add `APIFY_TOKEN=your_token` to `.env` 3. **Node.js 20.6+** (for the scraper skill) +## Output formats + +- **Quick answer** - top 5 results displayed in chat (no file saved) +- **CSV** - full export with all fields +- **JSON** - full data export + ## Pricing Apify Actors use pay-per-result pricing. Check individual Actor pricing on the [Apify platform](https://apify.com). From a384eda2a3c4a19dfb0f7dd3397ddb37cbee17c1 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:22:30 +0100 Subject: [PATCH 05/11] docs: remove Output formats section from README Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 799925d..4cca839 100644 --- a/README.md +++ b/README.md @@ -61,12 +61,6 @@ Any AI tool that supports Markdown context can use the skills by pointing to: 2. **API token** — get from [Apify Console](https://console.apify.com/account/integrations), add `APIFY_TOKEN=your_token` to `.env` 3. **Node.js 20.6+** (for the scraper skill) -## Output formats - -- **Quick answer** - top 5 results displayed in chat (no file saved) -- **CSV** - full export with all fields -- **JSON** - full data export - ## Pricing Apify Actors use pay-per-result pricing. Check individual Actor pricing on the [Apify platform](https://apify.com). From a18d5a22aff325c0d23b28cc2cec8f9b6f416c3b Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:38:30 +0100 Subject: [PATCH 06/11] docs: improve skill names in README, add folder links and Store search info Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4cca839..edba278 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,17 @@ A collection of AI agent skills for web scraping, data extraction, and Actor dev ## Skills -### `apify-ultimate-scraper` +### [Universal Scraper](skills/apify-ultimate-scraper/) (`apify-ultimate-scraper`) -Universal AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. +AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. Can also search the [Apify Store](https://apify.com/store) to find the right Actor for any platform not listed here. **Use cases**: lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, e-commerce pricing, reviews. -### `apify-actor-development` +### [Actor Development](skills/apify-actor-development/) (`apify-actor-development`) Create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, or Python. -### `apify-actorization` +### [Actorization](skills/apify-actorization/) (`apify-actorization`) Convert existing projects into Apify Actors — supports JS/TS (SDK), Python (async context manager), and any language (CLI wrapper). From ebb07fcc9197306be955dd08b7204f2aaa299eab Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:44:35 +0100 Subject: [PATCH 07/11] docs: fix skill names - Ultimate scraper, lowercase headings Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index edba278..8fe1fa3 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,13 @@ A collection of AI agent skills for web scraping, data extraction, and Actor dev ## Skills -### [Universal Scraper](skills/apify-ultimate-scraper/) (`apify-ultimate-scraper`) +### [Ultimate scraper](skills/apify-ultimate-scraper/) (`apify-ultimate-scraper`) AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. Can also search the [Apify Store](https://apify.com/store) to find the right Actor for any platform not listed here. **Use cases**: lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, e-commerce pricing, reviews. -### [Actor Development](skills/apify-actor-development/) (`apify-actor-development`) +### [Actor development](skills/apify-actor-development/) (`apify-actor-development`) Create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, or Python. From c67b52964d952896dc3717fb8cce9b775c825153 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 10:53:50 +0100 Subject: [PATCH 08/11] feat: restore .claude-plugin, commands, workflow, and gemini-extension Marketplace.json updated for 3 skills (ultimate-scraper, actor-development, actorization). Workflow simplified to only check AGENTS.md. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 62 +++++++ .claude-plugin/plugin.json | 19 +++ .github/workflows/generate-agents.yml | 29 ++++ commands/create-actor.md | 232 ++++++++++++++++++++++++++ gemini-extension.json | 6 + 5 files changed, 348 insertions(+) create mode 100644 .claude-plugin/marketplace.json create mode 100644 .claude-plugin/plugin.json create mode 100644 .github/workflows/generate-agents.yml create mode 100644 commands/create-actor.md create mode 100644 gemini-extension.json diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000..367ccdc --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,62 @@ +{ + "name": "apify-agent-skills", + "owner": { + "name": "Apify", + "email": "support@apify.com" + }, + "metadata": { + "description": "Official Apify Agent Skills for web scraping, data extraction, and automation", + "version": "2.0.0" + }, + "plugins": [ + { + "name": "apify-ultimate-scraper", + "source": "./skills/apify-ultimate-scraper", + "skills": "./", + "description": "Universal AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Google Search, Google Trends, Booking.com, TripAdvisor, Amazon, Walmart, eBay, and more for lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, e-commerce pricing, and reviews", + "keywords": [ + "scraping", + "web-scraper", + "data-extraction", + "apify", + "instagram", + "facebook", + "tiktok", + "youtube", + "google-maps" + ], + "category": "data-extraction", + "version": "2.0.0" + }, + { + "name": "apify-actor-development", + "source": "./skills/apify-actor-development", + "skills": "./", + "description": "Develop, debug, and deploy Apify Actors - serverless cloud programs for web scraping, automation, and data processing", + "keywords": [ + "apify", + "actor", + "development", + "deploy", + "serverless" + ], + "category": "development", + "version": "2.0.0" + }, + { + "name": "apify-actorization", + "source": "./skills/apify-actorization", + "skills": "./", + "description": "Convert existing projects into Apify Actors - serverless cloud programs. Actorize JavaScript/TypeScript (SDK with Actor.init/exit), Python (async context manager), or any language (CLI wrapper)", + "keywords": [ + "apify", + "actor", + "actorization", + "migration", + "convert" + ], + "category": "development", + "version": "2.0.0" + } + ] +} diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..4f11558 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,19 @@ +{ + "name": "apify-agent-skills", + "version": "1.6.1", + "description": "Official Apify agent skills for web scraping, data extraction, and automation", + "author": { + "name": "Apify", + "email": "support@apify.com" + }, + "homepage": "https://github.com/apify/agent-skills", + "repository": "https://github.com/apify/agent-skills", + "license": "Apache-2.0", + "keywords": [ + "apify", + "scraping", + "automation", + "leads", + "data-extraction" + ] +} diff --git a/.github/workflows/generate-agents.yml b/.github/workflows/generate-agents.yml new file mode 100644 index 0000000..0d7cb0c --- /dev/null +++ b/.github/workflows/generate-agents.yml @@ -0,0 +1,29 @@ +name: Check AGENTS.md + +on: + pull_request: + paths: + - "scripts/AGENTS_TEMPLATE.md" + - "scripts/generate_agents.py" + - "**/SKILL.md" + - "agents/AGENTS.md" + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + + - name: Generate AGENTS.md + run: uv run scripts/generate_agents.py + + - name: Ensure AGENTS.md is up to date + run: | + git diff --quiet -- agents/AGENTS.md || { + echo "::error::agents/AGENTS.md is outdated. Run 'uv run scripts/generate_agents.py' and commit the changes." + exit 1 + } diff --git a/commands/create-actor.md b/commands/create-actor.md new file mode 100644 index 0000000..9f886f8 --- /dev/null +++ b/commands/create-actor.md @@ -0,0 +1,232 @@ +--- +description: Guided Apify Actor development with best practices and systematic workflow +argument-hint: Optional actor description +--- + +# Actor Development + +You are helping a developer create an Apify Actor - a serverless cloud program for web scraping, automation, and data processing. Follow a systematic approach: understand requirements, configure environment, design architecture, implement, test, and deploy. + +## Core Principles + +- **Ask clarifying questions**: Identify target websites, data requirements, edge cases, and constraints before implementation +- **Follow Apify best practices**: Use appropriate crawlers (Cheerio vs Playwright), implement proper error handling, respect rate limits +- **Validate early**: Check CLI installation and authentication before starting +- **Use TodoWrite**: Track all progress throughout +- **Security first**: Use `apify/log` for censoring sensitive data, validate input, handle errors gracefully + +--- + +## Phase 1: Discovery + +**Goal**: Understand what actor needs to be built + +Initial request: $ARGUMENTS + +**Actions**: +1. Create todo list with all phases +2. Ask user for clarification if needed: + - What is the actor's primary purpose? (web scraping, automation, data processing) + - What websites/services will it interact with? + - What data should it extract or what actions should it perform? + - Any specific requirements or constraints? +3. Summarize understanding and confirm with user + +--- + +## Phase 2: Environment Setup + +**Goal**: Verify Apify CLI is installed and authenticated + +**CRITICAL**: Do not proceed without proper setup + +**Actions**: +1. Check if Apify CLI is installed: `apify --help` +2. If not installed, guide user to install: + ```bash + curl -fsSL https://apify.com/install-cli.sh | bash + # Or: brew install apify-cli (Mac) + # Or: npm install -g apify-cli + ``` +3. Verify authentication: `apify info` +4. If not logged in: + - Check for APIFY_TOKEN environment variable + - If missing, ask user to generate token at https://console.apify.com/settings/integrations + - Login with: `apify login -t $APIFY_TOKEN` + +--- + +## Phase 3: Language Selection + +**Goal**: Choose programming language and template + +**Actions**: +1. **Ask user which language they prefer:** + - JavaScript (skills/apify-actor-development/references/actor-template-js.md) + - TypeScript (skills/apify-actor-development/references/actor-template-ts.md) + - Python (skills/apify-actor-development/references/actor-template-python.md) +2. Note: Additional packages (Crawlee, Playwright, etc.) can be installed later as needed + +--- + +## Phase 4: Requirements & Architecture Design + +**Goal**: Define input/output schemas and implementation approach + +**Actions**: +1. Clarify detailed requirements: + - What input parameters should the actor accept? + - What output format is needed? (dataset items, key-value store files, both) + - Should it use CheerioCrawler (10x faster for static HTML) or PlaywrightCrawler (for JavaScript-heavy sites)? + - Concurrency settings? (HTTP: 10-50, Browser: 1-5) + - Rate limiting and retry strategies? + - Should standby mode be enabled? +2. Design architecture: + - Input schema structure + - Output/dataset schema structure + - Key-value store schema (if needed) + - Error handling approach + - Data validation and cleaning strategy +3. Present architecture to user and get approval + +--- + +## Phase 5: Actor Creation + +**Goal**: Create actor from template and configure schemas + +**DO NOT START WITHOUT USER APPROVAL** + +**Actions**: +1. Wait for explicit user approval +2. Copy appropriate language template from `skills/apify-actor-development/references/` directory +3. Update `.actor/actor.json`: + - Set actor name and version + - **IMPORTANT**: Fill in `generatedBy` property with current model name + - Configure runtime, memory, timeout + - Set `usesStandbyMode` if applicable +4. Create/update `.actor/input_schema.json` with input parameters +5. Create/update `.actor/output_schema.json` with output structure +6. Create/update `.actor/dataset_schema.json` if using datasets +7. Create/update `.actor/key_value_store_schema.json` if using key-value store +8. Update todos as you progress + +**Reference documentation:** +- [skills/apify-actor-development/references/actor-json.md](skills/apify-actor-development/references/actor-json.md) +- [skills/apify-actor-development/references/input-schema.md](skills/apify-actor-development/references/input-schema.md) +- [skills/apify-actor-development/references/output-schema.md](skills/apify-actor-development/references/output-schema.md) +- [skills/apify-actor-development/references/dataset-schema.md](skills/apify-actor-development/references/dataset-schema.md) +- [skills/apify-actor-development/references/key-value-store-schema.md](skills/apify-actor-development/references/key-value-store-schema.md) + +--- + +## Phase 6: Implementation + +**Goal**: Implement actor logic following best practices + +**Actions**: +1. Implement actor code in `src/main.py`, `src/main.js`, or `src/main.ts` +2. Follow best practices: + - ✓ Use Apify SDK (`apify`) for code running on Apify platform + - ✓ Validate input early with proper error handling + - ✓ Use CheerioCrawler for static HTML (10x faster) + - ✓ Use PlaywrightCrawler only for JavaScript-heavy sites + - ✓ Use router pattern for complex crawls + - ✓ Implement retry strategies with exponential backoff + - ✓ Use proper concurrency settings + - ✓ Clean and validate data before pushing to dataset + - ✓ **Always use `apify/log` package** - censors sensitive data + - ✓ Implement readiness probe handler if using standby mode + - ✗ Don't use browser crawlers when HTTP/Cheerio works + - ✗ Don't hard code values that should be in input schema + - ✗ Don't skip input validation or error handling + - ✗ Don't overload servers - use appropriate concurrency and delays +3. Implement standby mode readiness probe if `usesStandbyMode: true` (see [skills/apify-actor-development/references/standby-mode.md](skills/apify-actor-development/references/standby-mode.md)) +4. Use proper logging (see [skills/apify-actor-development/references/logging.md](skills/apify-actor-development/references/logging.md)) +5. Update todos as you progress + +--- + +## Phase 7: Documentation + +**Goal**: Create comprehensive README for marketplace + +**Actions**: +1. Create README.md with: + - Clear description of what the actor does + - Input parameters with examples + - Output format with examples + - Usage instructions + - Limitations and known issues + - Example runs +2. Include code examples for common use cases +3. Mention rate limits, costs, or legal considerations if applicable + +--- + +## Phase 8: Local Testing + +**Goal**: Test actor locally before deployment + +**Actions**: +1. Install dependencies: + - JavaScript/TypeScript: `npm install` + - Python: `pip install -r requirements.txt` +2. Create test input file at `storage/key_value_stores/default/INPUT.json` with sample parameters +3. Run actor locally: `apify run` +4. Verify: + - Input is parsed correctly + - Actor completes successfully + - Output is in expected format + - Error handling works + - Logging is appropriate +5. Fix any issues found +6. Test edge cases and error scenarios + +--- + +## Phase 9: Deployment + +**Goal**: Deploy actor to Apify platform + +**DO NOT DEPLOY WITHOUT USER APPROVAL** + +**Actions**: +1. **Ask user if they want to deploy now** +2. If yes, deploy with: `apify push` +3. Actor will be deployed with name from `.actor/actor.json` +4. Provide user with: + - Deployment confirmation + - Actor URL on Apify platform + - Instructions for running on platform + +--- + +## Phase 10: Summary + +**Goal**: Document what was accomplished + +**Actions**: +1. Mark all todos complete +2. Summarize: + - What actor was built + - Key features and capabilities + - Input/output schemas + - Files created/modified + - Deployment status + - Suggested next steps (testing on platform, publishing to store, monitoring) + +--- + +## Additional Resources + +**MCP Tools** (if configured): +- `search-apify-docs` - Search documentation +- `fetch-apify-docs` - Get full doc pages + +**Documentation:** +- [docs.apify.com/llms.txt](https://docs.apify.com/llms.txt) - Apify quick reference +- [docs.apify.com/llms-full.txt](https://docs.apify.com/llms-full.txt) - Apify complete docs +- [crawlee.dev/llms.txt](https://crawlee.dev/llms.txt) - Crawlee quick reference +- [crawlee.dev/llms-full.txt](https://crawlee.dev/llms-full.txt) - Crawlee complete docs +- [whitepaper.actor](https://raw.githubusercontent.com/apify/actor-whitepaper/refs/heads/master/README.md) - Complete Actor specification diff --git a/gemini-extension.json b/gemini-extension.json new file mode 100644 index 0000000..e9daa47 --- /dev/null +++ b/gemini-extension.json @@ -0,0 +1,6 @@ +{ + "name": "apify-agent-skills", + "description": "Provides access to Apify Agent Skills for web scraping, data extraction, and automation.", + "version": "1.0.0", + "contextFileName": "agents/AGENTS.md" +} From 71f4b292b637b2956cd79746fd89695527a2b065 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 11:01:21 +0100 Subject: [PATCH 09/11] fix: restore marketplace.json validation and revert .gitignore changes Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/generate-agents.yml | 5 +-- .gitignore | 3 ++ scripts/generate_agents.py | 46 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate-agents.yml b/.github/workflows/generate-agents.yml index 0d7cb0c..5530fde 100644 --- a/.github/workflows/generate-agents.yml +++ b/.github/workflows/generate-agents.yml @@ -1,4 +1,4 @@ -name: Check AGENTS.md +name: Check AGENTS.md and marketplace.json on: pull_request: @@ -7,6 +7,7 @@ on: - "scripts/generate_agents.py" - "**/SKILL.md" - "agents/AGENTS.md" + - ".claude-plugin/marketplace.json" jobs: validate: @@ -18,7 +19,7 @@ jobs: - name: Set up uv uses: astral-sh/setup-uv@v4 - - name: Generate AGENTS.md + - name: Generate AGENTS.md and validate marketplace.json run: uv run scripts/generate_agents.py - name: Ensure AGENTS.md is up to date diff --git a/.gitignore b/.gitignore index 9663ddc..2f157f1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,10 @@ # DEVELOPMENT .vscode .claude +context/ +CLAUDE.md PLAN.md PLAN-*.md .mcp.json +scripts/hooks/ .idea diff --git a/scripts/generate_agents.py b/scripts/generate_agents.py index 461ef4f..434c2fe 100644 --- a/scripts/generate_agents.py +++ b/scripts/generate_agents.py @@ -5,19 +5,24 @@ # /// """Generate AGENTS.md from AGENTS_TEMPLATE.md and SKILL.md frontmatter. +Also validates that marketplace.json is in sync with discovered skills. + Usage: uv run scripts/generate_agents.py """ from __future__ import annotations +import json import re +import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent TEMPLATE_PATH = ROOT / "scripts" / "AGENTS_TEMPLATE.md" OUTPUT_PATH = ROOT / "agents" / "AGENTS.md" +MARKETPLACE_PATH = ROOT / ".claude-plugin" / "marketplace.json" def load_template() -> str: @@ -76,6 +81,38 @@ def repl(match: re.Match[str]) -> str: return content +def validate_marketplace(skills: list[dict[str, str]]) -> list[str]: + """Validate marketplace.json against discovered skills. Returns error messages.""" + if not MARKETPLACE_PATH.exists(): + return [f"marketplace.json not found at {MARKETPLACE_PATH}"] + + marketplace = json.loads(MARKETPLACE_PATH.read_text(encoding="utf-8")) + plugins = marketplace.get("plugins", []) + errors: list[str] = [] + + # Every plugin with skills should have at least one SKILL.md + for plugin in plugins: + source = plugin.get("source", "").lstrip("./") + plugin_skills = [s for s in skills if s["path"].startswith(source)] + if not plugin_skills: + errors.append( + f"Plugin '{plugin['name']}' at '{source}' has no SKILL.md files" + ) + + # Every discovered skill should be covered by a plugin + for skill in skills: + found = any( + skill["path"].startswith(p.get("source", "").lstrip("./")) + for p in plugins + ) + if not found: + errors.append( + f"Skill '{skill['name']}' at '{skill['path']}' is not covered by any plugin" + ) + + return errors + + def main() -> None: template = load_template() skills = collect_skills() @@ -84,6 +121,15 @@ def main() -> None: OUTPUT_PATH.write_text(output, encoding="utf-8") print(f"Wrote {OUTPUT_PATH} with {len(skills)} skills.") + # Validate marketplace.json + errors = validate_marketplace(skills) + if errors: + print("\nMarketplace.json validation errors:", file=sys.stderr) + for error in errors: + print(f" - {error}", file=sys.stderr) + sys.exit(1) + print("Marketplace.json validation passed.") + if __name__ == "__main__": main() From c24bcc0944c2f241ab7e77fe40a8f949952b29e1 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 11:03:13 +0100 Subject: [PATCH 10/11] chore: remove CLAUDE.md from tracking (gitignored) Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 2486e15..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,25 +0,0 @@ -This repository contains official Apify agent skills for web scraping, data extraction, and automation. - -## Structure - -- `skills/apify-ultimate-scraper/` - Universal web scraper skill (55+ platforms) -- `skills/apify-actor-development/` - Actor development skill (JS/TS/Python) -- `skills/apify-actorization/` - Actorization skill (convert projects to Actors) -- `agents/AGENTS.md` - Auto-generated skill index for Codex/Gemini CLI -- `scripts/generate_agents.py` - Script to regenerate AGENTS.md - -## Development - -Run `uv run scripts/generate_agents.py` to regenerate `agents/AGENTS.md`. - -## Committing Changes - -Use conventional commit messages (feat:, fix:, docs:, chore:, etc.). - -## Apify MCP - -APIFY_TOKEN could be found in `.env` file - -You've got also access to Apify MCP server - it could be useful if you need to search or reach for Apify Actors and it's input schemas - -IMPORTANT: always use name of Apify Actors in format "author/Actor" (never "author~Actor") From 7552acc1f87e9bfcd6733471e156ad277dac04c2 Mon Sep 17 00:00:00 2001 From: Dusan Vystrcil Date: Mon, 23 Mar 2026 11:13:40 +0100 Subject: [PATCH 11/11] feat: add apify-generate-output-schema skill, group skills in README Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 15 + README.md | 16 +- agents/AGENTS.md | 2 + skills/apify-generate-output-schema/SKILL.md | 415 +++++++++++++++++++ 4 files changed, 438 insertions(+), 10 deletions(-) create mode 100644 skills/apify-generate-output-schema/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 367ccdc..dc4e83d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -57,6 +57,21 @@ ], "category": "development", "version": "2.0.0" + }, + { + "name": "apify-generate-output-schema", + "source": "./skills/apify-generate-output-schema", + "skills": "./", + "description": "Generate output schemas (dataset_schema.json, output_schema.json, key_value_store_schema.json) for an Apify Actor by analyzing its source code", + "keywords": [ + "apify", + "actor", + "schema", + "output", + "dataset" + ], + "category": "development", + "version": "2.0.0" } ] } diff --git a/README.md b/README.md index 8fe1fa3..4dac599 100644 --- a/README.md +++ b/README.md @@ -6,19 +6,15 @@ A collection of AI agent skills for web scraping, data extraction, and Actor dev ## Skills -### [Ultimate scraper](skills/apify-ultimate-scraper/) (`apify-ultimate-scraper`) +### Scraping -AI-powered web scraper for 55+ platforms. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. Can also search the [Apify Store](https://apify.com/store) to find the right Actor for any platform not listed here. +- **[Ultimate scraper](skills/apify-ultimate-scraper/)** (`apify-ultimate-scraper`) — AI-powered web scraper for 55+ platforms including Instagram, Facebook, TikTok, YouTube, Google Maps, Amazon, Walmart, eBay, Booking.com, TripAdvisor, and more. Can also search the [Apify Store](https://apify.com/store) to find the right Actor for any platform not listed here. -**Use cases**: lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, e-commerce pricing, reviews. +### Development -### [Actor development](skills/apify-actor-development/) (`apify-actor-development`) - -Create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, or Python. - -### [Actorization](skills/apify-actorization/) (`apify-actorization`) - -Convert existing projects into Apify Actors — supports JS/TS (SDK), Python (async context manager), and any language (CLI wrapper). +- **[Actor development](skills/apify-actor-development/)** (`apify-actor-development`) — create, debug, and deploy Apify Actors from scratch in JavaScript, TypeScript, or Python. +- **[Actorization](skills/apify-actorization/)** (`apify-actorization`) — convert existing projects into Apify Actors. Supports JS/TS (SDK), Python (async context manager), and any language (CLI wrapper). +- **[Generate output schema](skills/apify-generate-output-schema/)** (`apify-generate-output-schema`) — generate output schemas (`dataset_schema.json`, `output_schema.json`, `key_value_store_schema.json`) for an Apify Actor by analyzing its source code. ## Installation diff --git a/agents/AGENTS.md b/agents/AGENTS.md index bffa49d..4bd726d 100644 --- a/agents/AGENTS.md +++ b/agents/AGENTS.md @@ -5,6 +5,7 @@ You have additional SKILLs documented in directories containing a "SKILL.md" fil These skills are: - apify-actor-development -> "skills/apify-actor-development/SKILL.md" - apify-actorization -> "skills/apify-actorization/SKILL.md" + - apify-generate-output-schema -> "skills/apify-generate-output-schema/SKILL.md" - apify-ultimate-scraper -> "skills/apify-ultimate-scraper/SKILL.md" IMPORTANT: You MUST read the SKILL.md file whenever the description of the skills matches the user intent, or may help accomplish their task. @@ -13,6 +14,7 @@ IMPORTANT: You MUST read the SKILL.md file whenever the description of the skill apify-actor-development: `Develop, debug, and deploy Apify Actors - serverless cloud programs for web scraping, automation, and data processing. Use when creating new Actors, modifying existing ones, or troubleshooting Actor code.` apify-actorization: `Convert existing projects into Apify Actors - serverless cloud programs. Actorize JavaScript/TypeScript (SDK with Actor.init/exit), Python (async context manager), or any language (CLI wrapper). Use when migrating code to Apify, wrapping CLI tools as Actors, or adding Actor SDK to existing projects.` +apify-generate-output-schema: `Generate output schemas (dataset_schema.json, output_schema.json, key_value_store_schema.json) for an Apify Actor by analyzing its source code. Use when creating or updating Actor output schemas.` apify-ultimate-scraper: `Universal AI-powered web scraper for any platform. Scrape data from Instagram, Facebook, TikTok, YouTube, Google Maps, Google Search, Google Trends, Booking.com, and TripAdvisor. Use for lead generation, brand monitoring, competitor analysis, influencer discovery, trend research, content analytics, audience analysis, or any data extraction task.` diff --git a/skills/apify-generate-output-schema/SKILL.md b/skills/apify-generate-output-schema/SKILL.md new file mode 100644 index 0000000..d305935 --- /dev/null +++ b/skills/apify-generate-output-schema/SKILL.md @@ -0,0 +1,415 @@ +--- +name: apify-generate-output-schema +description: Generate output schemas (dataset_schema.json, output_schema.json, key_value_store_schema.json) for an Apify Actor by analyzing its source code. Use when creating or updating Actor output schemas. +--- + +# Generate Actor Output Schema + +You are generating output schema files for an Apify Actor. The output schema tells Apify Console how to display run results. You will analyze the Actor's source code, create `dataset_schema.json`, `output_schema.json`, and `key_value_store_schema.json` (if the Actor uses key-value store), and update `actor.json`. + +## Core Principles + +- **Analyze code first**: Read the Actor's source to understand what data it actually pushes to the dataset — never guess +- **Every field is nullable**: APIs and websites are unpredictable — always set `"nullable": true` +- **Anonymize examples**: Never use real user IDs, usernames, or personal data in examples +- **Verify against code**: If TypeScript types exist, cross-check the schema against both the type definition AND the code that produces the values +- **Reuse existing patterns**: Before generating schemas, check if other Actors in the same repository already have output schemas — match their structure, naming conventions, description style, and formatting +- **Don't reinvent the wheel**: Reuse existing type definitions, interfaces, and utilities from the codebase instead of creating duplicate definitions + +--- + +## Phase 1: Discover Actor Structure + +**Goal**: Locate the Actor and understand its output + +Initial request: $ARGUMENTS + +**Actions**: +1. Create todo list with all phases +2. Find the `.actor/` directory containing `actor.json` +3. Read `actor.json` to understand the Actor's configuration +4. Check if `dataset_schema.json`, `output_schema.json`, and `key_value_store_schema.json` already exist +5. **Search for existing schemas in the repository**: Look for other `.actor/` directories or schema files (e.g., `**/dataset_schema.json`, `**/output_schema.json`, `**/key_value_store_schema.json`) to learn the repo's conventions — match their description style, field naming, example formatting, and overall structure +6. Find all places where data is pushed to the dataset: + - **JavaScript/TypeScript**: Search for `Actor.pushData(`, `dataset.pushData(`, `Dataset.pushData(` + - **Python**: Search for `Actor.push_data(`, `dataset.push_data(`, `Dataset.push_data(` +7. Find all places where data is stored in the key-value store: + - **JavaScript/TypeScript**: Search for `Actor.setValue(`, `keyValueStore.setValue(`, `KeyValueStore.setValue(` + - **Python**: Search for `Actor.set_value(`, `key_value_store.set_value(`, `KeyValueStore.set_value(` +8. Find output type definitions — **reuse them directly** instead of recreating from scratch: + - **TypeScript**: Look for output type interfaces/types (e.g., in `src/types/`, `src/types/output.ts`). If an interface or type already defines the output shape, derive the schema fields from it — do not create a parallel definition + - **Python**: Look for TypedDict, dataclass, or Pydantic model definitions. Use the existing field names, types, and docstrings as the source of truth +9. Check for existing shared schema utilities or helper functions in the codebase that handle schema generation or validation — reuse them rather than creating new logic +10. If inline `storages.dataset` or `storages.keyValueStore` config exists in `actor.json`, note it for migration + +Present findings to user: list all discovered dataset output fields, key-value store keys, their types, and where they come from. + +--- + +## Phase 2: Generate `dataset_schema.json` + +**Goal**: Create a complete dataset schema with field definitions and display views + +### File structure + +```json +{ + "actorSpecification": 1, + "fields": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + // ALL output fields here — every field the Actor can produce, + // not just the ones shown in the overview view + }, + "required": [], + "additionalProperties": true + }, + "views": { + "overview": { + "title": "Overview", + "description": "Most important fields at a glance", + "transformation": { + "fields": [ + // 8-12 most important field names + ] + }, + "display": { + "component": "table", + "properties": { + // Display config for each overview field + } + } + } + } +} +``` + +### Consistency with existing schemas + +If existing output schemas were found in the repository during Phase 1 (step 5), follow their conventions: +- Match the **description writing style** (sentence case vs. lowercase, period vs. no period, etc.) +- Match the **field naming convention** (camelCase vs. snake_case) — this must also match the actual keys produced by the Actor code +- Match the **example value style** (e.g., date formats, URL patterns, placeholder names) +- Match the **view structure** (number of fields in overview, display format choices) +- Match the **JSON formatting** (indentation, property ordering, spacing) — all schemas in the same repository must use identical formatting, including standalone Actors + +When the Actor code already has well-defined TypeScript interfaces or Python type classes, derive fields directly from those types rather than re-analyzing pushData/push_data calls from scratch. The type definition is the canonical source. + +### Hard rules (no exceptions) + +| Rule | Detail | +|------|--------| +| **All fields in `properties`** | The `fields.properties` object must contain **every** field the Actor can output, not just the fields shown in the overview view. The views section selects a subset for display — the `properties` section must be the complete superset | +| `"nullable": true` | On **every** field — APIs are unpredictable | +| `"additionalProperties": true` | On the **top-level `fields` object** AND on **every nested object** within `properties`. This is the most commonly missed rule — it must appear at both levels | +| `"required": []` | Always empty array — on the **top-level `fields` object** AND on **every nested object** within `properties` | +| Anonymized examples | No real user IDs, usernames, or content | +| `"type"` required with `"nullable"` | AJV rejects `nullable` without a `type` on the same field | + +> **Warning — most common mistakes**: +> 1. Only including fields that appear in the overview view. The `fields.properties` must list ALL output fields, even if they are not in the `views` section. +> 2. Only adding `"required": []` and `"additionalProperties": true` on nested object-type properties but forgetting them on the top-level `fields` object. Both levels need them. + +> **Note**: `nullable` is an Apify-specific extension to JSON Schema draft-07. It is intentional and correct. + +### Field type patterns + +**String field:** +```json +"title": { + "type": "string", + "description": "Title of the scraped item", + "nullable": true, + "example": "Example Item Title" +} +``` + +**Number field:** +```json +"viewCount": { + "type": "number", + "description": "Number of views", + "nullable": true, + "example": 15000 +} +``` + +**Boolean field:** +```json +"isVerified": { + "type": "boolean", + "description": "Whether the account is verified", + "nullable": true, + "example": true +} +``` + +**Array field:** +```json +"hashtags": { + "type": "array", + "description": "Hashtags associated with the item", + "items": { "type": "string" }, + "nullable": true, + "example": ["#example", "#demo"] +} +``` + +**Nested object field:** +```json +"authorInfo": { + "type": "object", + "description": "Information about the author", + "properties": { + "name": { "type": "string", "nullable": true }, + "url": { "type": "string", "nullable": true } + }, + "required": [], + "additionalProperties": true, + "nullable": true, + "example": { "name": "Example Author", "url": "https://example.com/author" } +} +``` + +**Enum field:** +```json +"contentType": { + "type": "string", + "description": "Type of content", + "enum": ["article", "video", "image"], + "nullable": true, + "example": "article" +} +``` + +**Union type (e.g., TypeScript `ObjectType | string`):** +```json +"metadata": { + "type": ["object", "string"], + "description": "Structured metadata object, or error string if unavailable", + "nullable": true, + "example": { "key": "value" } +} +``` + +### Anonymized example values + +Use realistic but generic values. Follow platform ID format conventions: + +| Field type | Example approach | +|---|---| +| IDs | Match platform format and length (e.g., 11 chars for YouTube video IDs) | +| Usernames | `"exampleuser"`, `"sampleuser123"` | +| Display names | `"Example Channel"`, `"Sample Author"` | +| URLs | Use platform's standard URL format with fake IDs | +| Dates | `"2025-01-15T12:00:00.000Z"` (ISO 8601) | +| Text content | Generic descriptive text, e.g., `"This is an example description."` | + +### Views section + +- `transformation.fields`: List 8–12 most important field names (order = column order in UI) +- `display.properties`: One entry per overview field with `label` and `format` +- Available formats: `"text"`, `"number"`, `"date"`, `"link"`, `"boolean"`, `"image"`, `"array"`, `"object"` + +Pick fields that give users the most useful at-a-glance summary of the data. + +--- + +## Phase 3: Generate `key_value_store_schema.json` (if applicable) + +**Goal**: Define key-value store collections if the Actor stores data in the key-value store + +> **Skip this phase** if no `Actor.setValue()` / `Actor.set_value()` calls were found in Phase 1 (beyond the default `INPUT` key). + +### File structure + +```json +{ + "actorKeyValueStoreSchemaVersion": 1, + "title": "", + "description": "", + "collections": { + "": { + "title": "", + "description": "", + "keyPrefix": "" + } + } +} +``` + +### How to identify collections + +Group the discovered `setValue` / `set_value` calls by key pattern: + +1. **Fixed keys** (e.g., `"RESULTS"`, `"summary"`) — use `"key"` (exact match) +2. **Dynamic keys with a prefix** (e.g., `"screenshot-${id}"`, `f"image-{name}"`) — use `"keyPrefix"` + +Each group becomes a collection. + +### Collection properties + +| Property | Required | Description | +|----------|----------|-------------| +| `title` | Yes | Shown in UI tabs | +| `description` | No | Shown in UI tooltips | +| `key` | Conditional | Exact key for single-key collections (use `key` OR `keyPrefix`, not both) | +| `keyPrefix` | Conditional | Prefix for multi-key collections (use `key` OR `keyPrefix`, not both) | +| `contentTypes` | No | Restrict allowed MIME types (e.g., `["image/jpeg"]`, `["application/json"]`) | +| `jsonSchema` | No | JSON Schema draft-07 for validating `application/json` content | + +### Examples + +**Single file output (e.g., a report):** +```json +{ + "actorKeyValueStoreSchemaVersion": 1, + "title": "Analysis Results", + "description": "Key-value store containing analysis output", + "collections": { + "report": { + "title": "Report", + "description": "Final analysis report", + "key": "REPORT", + "contentTypes": ["application/json"] + } + } +} +``` + +**Multiple files with prefix (e.g., screenshots):** +```json +{ + "actorKeyValueStoreSchemaVersion": 1, + "title": "Scraped Files", + "description": "Key-value store containing downloaded files and screenshots", + "collections": { + "screenshots": { + "title": "Screenshots", + "description": "Page screenshots captured during scraping", + "keyPrefix": "screenshot-", + "contentTypes": ["image/png", "image/jpeg"] + }, + "documents": { + "title": "Documents", + "description": "Downloaded document files", + "keyPrefix": "doc-", + "contentTypes": ["application/pdf", "text/html"] + } + } +} +``` + +--- + +## Phase 4: Generate `output_schema.json` + +**Goal**: Create the output schema that tells Apify Console where to find results + +For most Actors that push data to a dataset, this is a minimal file: + +```json +{ + "actorOutputSchemaVersion": 1, + "title": "", + "description": "", + "properties": { + "dataset": { + "type": "string", + "title": "Results", + "description": "Dataset containing all scraped data", + "template": "{{links.apiDefaultDatasetUrl}}/items" + } + } +} +``` + +> **Critical**: Each property entry **must** include `"type": "string"` — this is an Apify-specific convention. The Apify meta-validator rejects properties without it (and rejects `"type": "object"` — only `"string"` is valid here). + +If `key_value_store_schema.json` was generated in Phase 3, add a second property: +```json +"files": { + "type": "string", + "title": "Files", + "description": "Key-value store containing downloaded files", + "template": "{{links.apiDefaultKeyValueStoreUrl}}/keys" +} +``` + +### Available template variables + +- `{{links.apiDefaultDatasetUrl}}` — API URL of default dataset +- `{{links.apiDefaultKeyValueStoreUrl}}` — API URL of default key-value store +- `{{links.publicRunUrl}}` — Public run URL +- `{{links.consoleRunUrl}}` — Console run URL +- `{{links.apiRunUrl}}` — API run URL +- `{{links.containerRunUrl}}` — URL of webserver running inside the run +- `{{run.defaultDatasetId}}` — ID of the default dataset +- `{{run.defaultKeyValueStoreId}}` — ID of the default key-value store + +--- + +## Phase 5: Update `actor.json` + +**Goal**: Wire the schema files into the Actor configuration + +**Actions**: +1. Read the current `actor.json` +2. Add or update the `storages.dataset` reference: + ```json + "storages": { + "dataset": "./dataset_schema.json" + } + ``` +3. If `key_value_store_schema.json` was generated, add the reference: + ```json + "storages": { + "dataset": "./dataset_schema.json", + "keyValueStore": "./key_value_store_schema.json" + } + ``` +4. Add or update the `output` reference: + ```json + "output": "./output_schema.json" + ``` +5. If `actor.json` had inline `storages.dataset` or `storages.keyValueStore` objects (not string paths), migrate their content into the respective schema files and replace the inline objects with file path strings + +--- + +## Phase 6: Review and Validate + +**Goal**: Ensure correctness and completeness + +**Checklist**: +- [ ] **Every** output field from the source code is in `dataset_schema.json` `fields.properties` — not just the overview view fields but ALL fields the Actor can produce +- [ ] Every field has `"nullable": true` +- [ ] The **top-level `fields` object** has both `"additionalProperties": true` and `"required": []` +- [ ] Every **nested object** within `properties` also has `"additionalProperties": true` and `"required": []` +- [ ] Every field has a `"description"` and an `"example"` +- [ ] All example values are anonymized +- [ ] `"type"` is present on every field that has `"nullable"` +- [ ] Views list 8–12 most useful fields with correct display formats +- [ ] `output_schema.json` has `"type": "string"` on every property +- [ ] If key-value store is used: `key_value_store_schema.json` has collections matching all `setValue`/`set_value` calls +- [ ] If key-value store is used: each collection uses either `key` or `keyPrefix` (not both) +- [ ] `actor.json` references all generated schema files +- [ ] Schema field names match the actual keys in the code (camelCase/snake_case consistency) +- [ ] If existing schemas were found in the repo, the new schema follows their conventions (description style, example format, view structure) +- [ ] Schema fields are derived from existing type definitions (interfaces, TypedDicts, dataclasses) where available — no duplicated or divergent field definitions + +Present the generated schemas to the user for review before writing them. + +--- + +## Phase 7: Summary + +**Goal**: Document what was created + +Report: +- Files created or updated +- Number of fields in the dataset schema +- Number of collections in the key-value store schema (if generated) +- Fields selected for the overview view +- Any fields that need user clarification (ambiguous types, unclear nullability) +- Suggested next steps (test locally with `apify run`, verify output tab in Console)