From e0905ef97909cd81c9a15b32df1478b252444e71 Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Tue, 12 Aug 2025 17:10:52 +0900
Subject: [PATCH 1/7] minor hotfix: wrong statement on wan2.2

---
 README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 5b1d92f..d25967f 100644
--- a/README.md
+++ b/README.md
@@ -23,11 +23,7 @@ Ever burned through credits in minutes? Searching Reddit for that one optimizati
 
 ![Classic AI Frustration](assets/guy_freaking_out2.png)
 
-**SCAPO** extracts **specific, actionable optimization techniques** from Reddit about AI services - not generic "write better prompts" advice, but real techniques like:
-- "Use HeyGen API v1 for unlimited ElevenLabs access at $10/month"
-- "GitHub Copilot has a 300 request/day limit"
-- "Avoid slowing ElevenLabs speech >5% to prevent stutters"
-- "Set temperature=0.7 for wan2.2 video generation"
+**SCAPO** extracts **specific, actionable optimization techniques** from Reddit about AI services - not generic "write better prompts" advice, but real discussions.
 
 ## ✨ Two Approaches
 

From 0c0facf2ee9b62d049071a36e382f6a3accde81c Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Tue, 12 Aug 2025 17:21:11 +0900
Subject: [PATCH 2/7] fixed: left-out acknowledgements

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index d25967f..9ad7341 100644
--- a/README.md
+++ b/README.md
@@ -296,6 +296,8 @@ Built as part of the CZero Engine project to improve AI application development.
 - Reddit communities for sharing real experiences
 - OpenRouter for accessible AI APIs
 - Coffee ☕ for making this possible
+- Ollama and LMstudio for awesome local LLM experience
+- All opensource contributors in this AI space
 
 ---
 

From 2616490b1423ccfde0f04119bb3c19b72d72f851 Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Tue, 12 Aug 2025 20:23:33 +0900
Subject: [PATCH 3/7] checkpoint:one-pass run of 22 high-priority services

---
 QUICKSTART.md                                 |  10 +-
 README.md                                     |   3 +
 models/audio/eleven-labs/cost_optimization.md |  16 ++-
 models/audio/eleven-labs/metadata.json        |   6 +-
 models/audio/eleven-labs/parameters.json      |  14 ++-
 models/audio/eleven-labs/pitfalls.md          |  16 ++-
 models/audio/eleven-labs/prompting.md         |  10 +-
 models/code/cursor/cost_optimization.md       |  20 ++++
 .../characterai => code/cursor}/metadata.json |   8 +-
 models/code/cursor/parameters.json            |  15 +++
 models/code/cursor/pitfalls.md                |  21 ++++
 models/code/cursor/prompting.md               |  17 +++
 .../code/github-copilot/cost_optimization.md  |  15 +--
 models/code/github-copilot/metadata.json      |   8 +-
 models/code/github-copilot/parameters.json    |  18 +--
 models/code/github-copilot/pitfalls.md        |  18 ++-
 models/code/github-copilot/prompting.md       |   8 +-
 .../general/leonardo-ai/cost_optimization.md  |  12 ++
 models/general/leonardo-ai/metadata.json      |  13 +++
 models/general/leonardo-ai/parameters.json    |  13 +++
 models/general/leonardo-ai/pitfalls.md        |   8 ++
 models/multimodal/characterai/parameters.json |  17 ---
 models/multimodal/characterai/pitfalls.md     |  16 ---
 models/multimodal/characterai/prompting.md    |  18 ---
 models/text/characterai/metadata.json         |  13 +++
 models/text/characterai/parameters.json       |  14 +++
 models/text/characterai/pitfalls.md           |  18 +++
 models/text/characterai/prompting.md          |  16 +++
 models/text/fliki/cost_optimization.md        |   8 ++
 models/text/fliki/metadata.json               |  13 +++
 models/text/fliki/parameters.json             |  12 ++
 models/text/fliki/prompting.md                |  12 ++
 models/text/ideogram/cost_optimization.md     |   8 ++
 models/text/ideogram/metadata.json            |  13 +++
 models/text/ideogram/parameters.json          |  12 ++
 models/text/ideogram/pitfalls.md              |   8 ++
 models/text/ideogram/prompting.md             |  13 +++
 models/text/playht/cost_optimization.md       |   8 ++
 models/text/playht/metadata.json              |  13 +++
 models/text/playht/parameters.json            |  12 ++
 models/text/resemble-ai/cost_optimization.md  |   8 ++
 models/text/resemble-ai/metadata.json         |  13 +++
 models/text/resemble-ai/parameters.json       |  12 ++
 models/video/heygen/metadata.json             |  13 +++
 models/video/heygen/prompting.md              |  18 +++
 models/video/pika/metadata.json               |  13 +++
 models/video/pika/pitfalls.md                 |   8 ++
 models/video/pika/prompting.md                |  10 ++
 models/video/runway/cost_optimization.md      |   9 +-
 models/video/runway/metadata.json             |   6 +-
 models/video/runway/parameters.json           |   7 +-
 models/video/runway/pitfalls.md               |   9 +-
 models/video/runway/prompting.md              |  10 +-
 src/cli.py                                    | 107 ++++++++++++++++++
 src/services/batch_llm_processor.py           |  12 +-
 55 files changed, 627 insertions(+), 141 deletions(-)
 create mode 100644 models/code/cursor/cost_optimization.md
 rename models/{multimodal/characterai => code/cursor}/metadata.json (52%)
 create mode 100644 models/code/cursor/parameters.json
 create mode 100644 models/code/cursor/pitfalls.md
 create mode 100644 models/code/cursor/prompting.md
 create mode 100644 models/general/leonardo-ai/cost_optimization.md
 create mode 100644 models/general/leonardo-ai/metadata.json
 create mode 100644 models/general/leonardo-ai/parameters.json
 create mode 100644 models/general/leonardo-ai/pitfalls.md
 delete mode 100644 models/multimodal/characterai/parameters.json
 delete mode 100644 models/multimodal/characterai/pitfalls.md
 delete mode 100644 models/multimodal/characterai/prompting.md
 create mode 100644 models/text/characterai/metadata.json
 create mode 100644 models/text/characterai/parameters.json
 create mode 100644 models/text/characterai/pitfalls.md
 create mode 100644 models/text/characterai/prompting.md
 create mode 100644 models/text/fliki/cost_optimization.md
 create mode 100644 models/text/fliki/metadata.json
 create mode 100644 models/text/fliki/parameters.json
 create mode 100644 models/text/fliki/prompting.md
 create mode 100644 models/text/ideogram/cost_optimization.md
 create mode 100644 models/text/ideogram/metadata.json
 create mode 100644 models/text/ideogram/parameters.json
 create mode 100644 models/text/ideogram/pitfalls.md
 create mode 100644 models/text/ideogram/prompting.md
 create mode 100644 models/text/playht/cost_optimization.md
 create mode 100644 models/text/playht/metadata.json
 create mode 100644 models/text/playht/parameters.json
 create mode 100644 models/text/resemble-ai/cost_optimization.md
 create mode 100644 models/text/resemble-ai/metadata.json
 create mode 100644 models/text/resemble-ai/parameters.json
 create mode 100644 models/video/heygen/metadata.json
 create mode 100644 models/video/heygen/prompting.md
 create mode 100644 models/video/pika/metadata.json
 create mode 100644 models/video/pika/pitfalls.md
 create mode 100644 models/video/pika/prompting.md

diff --git a/QUICKSTART.md b/QUICKSTART.md
index c8eadf9..4b4ae2b 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -60,13 +60,17 @@ scapo scrape targeted --service "GitHub Copilot" --limit 20
 
 # Or batch process by category
 scapo scrape batch --category video --limit 15
-scapo scrape batch --max-services 3 --priority ultra
+
+# Process ALL priority services one by one
+scapo scrape all --priority ultra --limit 20    # Process all ultra priority services
+scapo scrape all --dry-run                      # Preview what will be processed
 ```
 
 ### Key Commands:
-- `discover --update` - Find services from GitHub Awesome lists
+- `discover --update` - Find services from GitHub Awesome lists  
 - `targeted --service NAME` - Extract tips for one service
-- `batch --category TYPE` - Process multiple services
+- `batch --category TYPE` - Process multiple services (limited)
+- `all --priority LEVEL` - Process ALL services one by one
 - `update-status` - See what needs updating
 
 ## 📚 Approach 2: Legacy Sources
diff --git a/README.md b/README.md
index 9ad7341..469fa71 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,9 @@ scapo scrape targeted --service "Midjourney" --limit 20
 
 # Or batch process multiple services
 scapo scrape batch --category video --limit 15
+
+# Process ALL priority services one by one (no limits!)
+scapo scrape all --priority ultra --limit 20
 ```
 
 #### Option B: Legacy Sources
diff --git a/models/audio/eleven-labs/cost_optimization.md b/models/audio/eleven-labs/cost_optimization.md
index a84fb7c..9dd260b 100644
--- a/models/audio/eleven-labs/cost_optimization.md
+++ b/models/audio/eleven-labs/cost_optimization.md
@@ -1,11 +1,17 @@
 # Eleven Labs - Cost Optimization Guide
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Pricing Information
 
-- $100-$200/month for multiple hours of audio per week
-- Creator plan: $22/month for 100k characters
-- $22/month provides approximately 2 hours of content
-- Business subscription: over $1,300/month
+- $10/month or $96/year for Reader trial
+- $99/month plan
+- $29/month for Reader unlimited plan
+- $22/month for Creator plan (100k characters)
+- $1,300+/month for Business subscription
+
+## Money-Saving Tips
+
+- Free plan: 12 minutes/month via web UI, 2h30 via API
+- Approximately 400,000 credits were removed; 60% of credits left should last until June 5th.
 
diff --git a/models/audio/eleven-labs/metadata.json b/models/audio/eleven-labs/metadata.json
index 6334f55..ffa53d6 100644
--- a/models/audio/eleven-labs/metadata.json
+++ b/models/audio/eleven-labs/metadata.json
@@ -1,13 +1,13 @@
 {
   "service": "Eleven Labs",
   "category": "audio",
-  "last_updated": "2025-08-11T23:07:25.519029",
-  "extraction_timestamp": "2025-08-11T23:06:24.446102",
+  "last_updated": "2025-08-12T20:08:46.442319",
+  "extraction_timestamp": "2025-08-12T20:08:37.495998",
   "data_sources": [
     "Reddit API",
     "Community discussions"
   ],
-  "posts_analyzed": 49,
+  "posts_analyzed": 79,
   "confidence": "medium",
   "version": "1.0.0"
 }
\ No newline at end of file
diff --git a/models/audio/eleven-labs/parameters.json b/models/audio/eleven-labs/parameters.json
index c33ac5c..4564290 100644
--- a/models/audio/eleven-labs/parameters.json
+++ b/models/audio/eleven-labs/parameters.json
@@ -1,13 +1,21 @@
 {
   "service": "Eleven Labs",
-  "last_updated": "2025-08-11T23:07:25.519029",
+  "last_updated": "2025-08-12T20:08:46.346345",
   "recommended_settings": {
     "setting_0": {
-      "description": "In Manim implementation: set_speech_service(ElevenLabsService(voice_name=\"Mun W\"))"
+      "description": "Model ID: gemini-2.0-flash"
+    },
+    "setting_1": {
+      "description": "API Key: (user-provided)"
+    },
+    "setting_2": {
+      "description": "Server URL: https://9df9e70d40a2.ngrok-free.app/v1/big-chief"
     }
   },
   "cost_optimization": {
-    "pricing": "Business subscription: over $1,300/month"
+    "pricing": "$1,300+/month for Business subscription",
+    "tip_1": "Approximately 400,000 credits were removed; 60% of credits left should last until June 5th.",
+    "tip_2": "Free plan: 12 minutes/month via web UI, 2h30 via API"
   },
   "sources": [
     "Reddit community",
diff --git a/models/audio/eleven-labs/pitfalls.md b/models/audio/eleven-labs/pitfalls.md
index 8cdd8b1..ecf442a 100644
--- a/models/audio/eleven-labs/pitfalls.md
+++ b/models/audio/eleven-labs/pitfalls.md
@@ -1,14 +1,22 @@
 # Eleven Labs - Common Pitfalls & Issues
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Technical Issues
 
-### ⚠️ Error when accessing Eleven Labs API from self-hosted n8n (specific error message not provided)
+### ⚠️ Unable to switch back to a Custom LLM after testing with built-in model gemini-2.0-flash; interface shows 'Fix the errors to proceed' even though Server URL, Model ID, and API Key are correctly filled.
+**Fix**: Store API keys in environment variables or use a secrets manager.
+
+## Policy & Account Issues
+
+### ⚠️ 400,000 credits were wiped from account on the $99/month plan; subscription renewal failed due to paywall issues, leaving 60% of credits unused.
+**Note**: Be aware of terms of service regarding account creation.
 
 ## Cost & Limits
 
-### 💰 Credits don't transfer to the next month
+### 💰 Credits from ElevenLabs free plan do not roll over to the next month.
+
+### 💰 $29/month for Reader unlimited plan
 
-### 💰 Creator plan: $22/month for 100k characters
+### 💰 $22/month for Creator plan (100k characters)
 
diff --git a/models/audio/eleven-labs/prompting.md b/models/audio/eleven-labs/prompting.md
index b84d395..76b915b 100644
--- a/models/audio/eleven-labs/prompting.md
+++ b/models/audio/eleven-labs/prompting.md
@@ -1,14 +1,12 @@
 # Eleven Labs Prompting Guide
 
-*Last updated: 2025-08-11*
-
-## Technical Tips
-
-- For Manim voiceovers, use ElevenLabsService with voice_name='Mun W' parameter
+*Last updated: 2025-08-12*
 
 ## Recommended Settings
 
-- In Manim implementation: set_speech_service(ElevenLabsService(voice_name="Mun W"))
+- Model ID: gemini-2.0-flash
+- API Key: (user-provided)
+- Server URL: https://9df9e70d40a2.ngrok-free.app/v1/big-chief
 
 ## Sources
 
diff --git a/models/code/cursor/cost_optimization.md b/models/code/cursor/cost_optimization.md
new file mode 100644
index 0000000..4cb6880
--- /dev/null
+++ b/models/code/cursor/cost_optimization.md
@@ -0,0 +1,20 @@
+# Cursor - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Pricing Information
+
+- API costs have decreased in recent months; prompt caching further lowers costs.
+- Using an API key for Anthropic's Sonnet 4 can reduce cost to about $0.73 per 1,000 tokens compared to $10–15+ per 1,000 tokens when using Cursor’s built‑in billing.
+- Monthly subscription remains $20.
+
+## Money-Saving Tips
+
+- Use the free Meta Llama models available in Cursor: meta-llama/llama-3.1-405b-instruct, meta-llama/llama-3.2-90b-vision-instruct, meta-llama/llama-3.1-70b-instruct.
+- Pro plan indexing limit: 100,000 files
+- deepseekcoder‑v2 is cheaper than other models.
+
+## Alternative Access Methods
+
+- Enable prompt caching in Cursor to reduce API costs; the deepseekcoder‑v2 model is noted to be cheaper than other models.
+
diff --git a/models/multimodal/characterai/metadata.json b/models/code/cursor/metadata.json
similarity index 52%
rename from models/multimodal/characterai/metadata.json
rename to models/code/cursor/metadata.json
index 0f27d4f..f0787d7 100644
--- a/models/multimodal/characterai/metadata.json
+++ b/models/code/cursor/metadata.json
@@ -1,13 +1,13 @@
 {
-  "service": "Character.AI",
-  "category": "multimodal",
-  "last_updated": "2025-08-11T22:13:29.427007",
+  "service": "Cursor",
+  "category": "code",
+  "last_updated": "2025-08-12T20:05:11.499882",
   "extraction_timestamp": null,
   "data_sources": [
     "Reddit API",
     "Community discussions"
   ],
-  "posts_analyzed": 75,
+  "posts_analyzed": 100,
   "confidence": "medium",
   "version": "1.0.0"
 }
\ No newline at end of file
diff --git a/models/code/cursor/parameters.json b/models/code/cursor/parameters.json
new file mode 100644
index 0000000..8c88a18
--- /dev/null
+++ b/models/code/cursor/parameters.json
@@ -0,0 +1,15 @@
+{
+  "service": "Cursor",
+  "last_updated": "2025-08-12T20:05:11.435746",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "tip_0": "API costs have decreased in recent months; prompt caching further lowers costs.",
+    "tip_1": "Pro plan indexing limit: 100,000 files",
+    "pricing": "Monthly subscription remains $20.",
+    "tip_3": "deepseekcoder\u2011v2 is cheaper than other models."
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/code/cursor/pitfalls.md b/models/code/cursor/pitfalls.md
new file mode 100644
index 0000000..369aa8f
--- /dev/null
+++ b/models/code/cursor/pitfalls.md
@@ -0,0 +1,21 @@
+# Cursor - Common Pitfalls & Issues
+
+*Last updated: 2025-08-12*
+
+## Technical Issues
+
+### ⚠️ Cursor charges usage‑based pricing even when using an API key without a subscription.
+**Fix**: Store API keys in environment variables or use a secrets manager.
+
+### ⚠️ BYOK (Bring Your Own API Key) no longer works: error message "Agent and Edit rely on custom models that cannot be billed to".
+**Fix**: Store API keys in environment variables or use a secrets manager.
+
+### ⚠️ Using own OpenAI API key still subject to Free Tier limitations
+**Fix**: Store API keys in environment variables or use a secrets manager.
+
+## Cost & Limits
+
+### 💰 Pro plan indexing limit of 100,000 files
+
+### 💰 Pro plan indexing limit: 100,000 files
+
diff --git a/models/code/cursor/prompting.md b/models/code/cursor/prompting.md
new file mode 100644
index 0000000..a42f563
--- /dev/null
+++ b/models/code/cursor/prompting.md
@@ -0,0 +1,17 @@
+# Cursor Prompting Guide
+
+*Last updated: 2025-08-12*
+
+## Technical Tips
+
+- Enable prompt caching in Cursor to reduce API costs; the deepseekcoder‑v2 model is noted to be cheaper than other models.
+
+## Usage Tips
+
+- Use the free Meta Llama models available in Cursor: meta-llama/llama-3.1-405b-instruct, meta-llama/llama-3.2-90b-vision-instruct, meta-llama/llama-3.1-70b-instruct.
+- Run any AI model (e.g., GROQ or local models) in Cursor by setting up a proxy server; use the R1SONQWEN implementation for a working example.
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/code/github-copilot/cost_optimization.md b/models/code/github-copilot/cost_optimization.md
index 8de50cd..6c882d4 100644
--- a/models/code/github-copilot/cost_optimization.md
+++ b/models/code/github-copilot/cost_optimization.md
@@ -1,19 +1,14 @@
 # GitHub Copilot - Cost Optimization Guide
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Pricing Information
 
-- GitHub Copilot Pro: $10/month with unlimited standard usage (fair use policies apply)
-- More cost-effective than Cursor Pro ($20/month with only 500 premium requests)
+- $10/month for the standard Copilot plan
 
 ## Money-Saving Tips
 
-- Unclear if Azure Credits can be used to pay for GitHub Copilot
-- GitHub Copilot now has a free tier
-- GitHub Copilot Pro has a daily request limit of 300 requests for all models including GPT-4.1
-- GitHub Copilot with Sonnet 4 is a cheaper option but slower
-- Copilot Pro is mentioned as a higher tier than the free tier
-- GitHub Copilot with Sonnet 4 is described as 'cheap' compared to alternatives
-- This limit equals about 4 hours of usage per day
+- Copilot Pro: 300 premium requests/month (before Pro+ upgrade)
+- Free tier: 2,000 completions/month, 50 chat prompts/month
+- Paid tier: unlimited completions and chat prompts
 
diff --git a/models/code/github-copilot/metadata.json b/models/code/github-copilot/metadata.json
index e1e17a5..35e3889 100644
--- a/models/code/github-copilot/metadata.json
+++ b/models/code/github-copilot/metadata.json
@@ -1,13 +1,13 @@
 {
   "service": "GitHub Copilot",
-  "category": "coding",
-  "last_updated": "2025-08-11T22:04:52.548712",
-  "extraction_timestamp": "2025-08-11T22:03:30.694771",
+  "category": "code",
+  "last_updated": "2025-08-12T20:04:22.865982",
+  "extraction_timestamp": "2025-08-12T20:04:20.280914",
   "data_sources": [
     "Reddit API",
     "Community discussions"
   ],
-  "posts_analyzed": 15,
+  "posts_analyzed": 82,
   "confidence": "medium",
   "version": "1.0.0"
 }
\ No newline at end of file
diff --git a/models/code/github-copilot/parameters.json b/models/code/github-copilot/parameters.json
index 1abf98d..313405f 100644
--- a/models/code/github-copilot/parameters.json
+++ b/models/code/github-copilot/parameters.json
@@ -1,19 +1,19 @@
 {
   "service": "GitHub Copilot",
-  "last_updated": "2025-08-11T22:04:52.544713",
+  "last_updated": "2025-08-12T20:04:22.768265",
   "recommended_settings": {
     "setting_0": {
-      "description": "GitHub Copilot can be configured to use Sonnet 4"
+      "description": "github.copilot.chat.agent.autoFix"
+    },
+    "setting_1": {
+      "description": "chat.tools.autoApprove"
     }
   },
   "cost_optimization": {
-    "tip_0": "Unclear if Azure Credits can be used to pay for GitHub Copilot",
-    "pricing": "More cost-effective than Cursor Pro ($20/month with only 500 premium requests)",
-    "tip_2": "GitHub Copilot now has a free tier",
-    "tip_3": "GitHub Copilot Pro has a daily request limit of 300 requests for all models including GPT-4.1",
-    "tip_4": "Copilot Pro is mentioned as a higher tier than the free tier",
-    "tip_5": "GitHub Copilot with Sonnet 4 is described as 'cheap' compared to alternatives",
-    "tip_6": "This limit equals about 4 hours of usage per day"
+    "pricing": "$10/month for the standard Copilot plan",
+    "tip_1": "Copilot Pro: 300 premium requests/month (before Pro+ upgrade)",
+    "tip_2": "Free tier: 2,000 completions/month, 50 chat prompts/month",
+    "unlimited_option": "Paid tier: unlimited completions and chat prompts"
   },
   "sources": [
     "Reddit community",
diff --git a/models/code/github-copilot/pitfalls.md b/models/code/github-copilot/pitfalls.md
index d2b709f..33c7f00 100644
--- a/models/code/github-copilot/pitfalls.md
+++ b/models/code/github-copilot/pitfalls.md
@@ -1,23 +1,21 @@
 # GitHub Copilot - Common Pitfalls & Issues
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Technical Issues
 
-### ⚠️ Slower performance compared to alternatives like Claude Code
+### ⚠️ Extension 'GitHub.copilot-chat' cannot use API proposal: chatParticipantPrivate. Its package.json#enabledApiProposals-property declares: but NOT chatParticipantPrivate.
 
-## Policy & Account Issues
+### ⚠️ 300 requests per day limit on VS Code LM API for Copilot Pro
 
-### ⚠️ Technical issue with signing up for the free tier after trial expiration (continuous redirect between pages)
-**Note**: Be aware of terms of service regarding account creation.
+### ⚠️ GitHub Copilot extension lost Gemini API access; only Groq and OpenRouter API keys can be added.
+**Fix**: Store API keys in environment variables or use a secrets manager.
 
 ## Cost & Limits
 
-### 💰 Daily request limit of 300 requests for all models including GPT-4.1 (equals about 4 hours of usage per day)
+### 💰 300 premium requests per month limit on Copilot Pro
 
-### 💰 GitHub Copilot Pro: $10/month with unlimited standard usage (fair use policies apply)
+### 💰 Free tier limited to 2,000 completions per month and 50 chat prompts
 
-### 💰 GitHub Copilot Pro has a daily request limit of 300 requests for all models including GPT-4.1
-
-### 💰 This limit equals about 4 hours of usage per day
+### 💰 Paid tier: unlimited completions and chat prompts
 
diff --git a/models/code/github-copilot/prompting.md b/models/code/github-copilot/prompting.md
index 458c909..bd10987 100644
--- a/models/code/github-copilot/prompting.md
+++ b/models/code/github-copilot/prompting.md
@@ -1,14 +1,16 @@
 # GitHub Copilot Prompting Guide
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Usage Tips
 
-- Create a copilot-instructions.md file with specific sections including: Terminology (domain-specific terms), Architecture (key files and design decisions), and Task Plan
+- When Gemini API access is unavailable in the GitHub Copilot extension, add a Groq or OpenRouter API key via the extension settings to regain model selection.
+- Create a custom VS Code extension that uses the GitHub Copilot API as an LLM provider to bypass the standard completion limits
 
 ## Recommended Settings
 
-- GitHub Copilot can be configured to use Sonnet 4
+- github.copilot.chat.agent.autoFix
+- chat.tools.autoApprove
 
 ## Sources
 
diff --git a/models/general/leonardo-ai/cost_optimization.md b/models/general/leonardo-ai/cost_optimization.md
new file mode 100644
index 0000000..84c2c8a
--- /dev/null
+++ b/models/general/leonardo-ai/cost_optimization.md
@@ -0,0 +1,12 @@
+# Leonardo AI - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Pricing Information
+
+- Maestro Unlimited Plan: $60 per month for 60,000 fast tokens (2,500 tokens per 8‑second clip, costing $2.5 per clip).
+
+## Money-Saving Tips
+
+- Free plan available, free trial.
+
diff --git a/models/general/leonardo-ai/metadata.json b/models/general/leonardo-ai/metadata.json
new file mode 100644
index 0000000..641a893
--- /dev/null
+++ b/models/general/leonardo-ai/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Leonardo AI",
+  "category": "general",
+  "last_updated": "2025-08-12T20:05:49.276178",
+  "extraction_timestamp": "2025-08-12T20:05:47.750787",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 45,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/general/leonardo-ai/parameters.json b/models/general/leonardo-ai/parameters.json
new file mode 100644
index 0000000..b37aa61
--- /dev/null
+++ b/models/general/leonardo-ai/parameters.json
@@ -0,0 +1,13 @@
+{
+  "service": "Leonardo AI",
+  "last_updated": "2025-08-12T20:05:49.209009",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "tip_0": "Free plan available, free trial.",
+    "pricing": "Maestro Unlimited Plan: $60 per month for 60,000 fast tokens (2,500 tokens per 8\u2011second clip, costing $2.5 per clip)."
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/general/leonardo-ai/pitfalls.md b/models/general/leonardo-ai/pitfalls.md
new file mode 100644
index 0000000..d611e0a
--- /dev/null
+++ b/models/general/leonardo-ai/pitfalls.md
@@ -0,0 +1,8 @@
+# Leonardo AI - Common Pitfalls & Issues
+
+*Last updated: 2025-08-12*
+
+## Cost & Limits
+
+### 💰 Maestro Unlimited Plan: $60 per month for 60,000 fast tokens (2,500 tokens per 8‑second clip, costing $2.5 per clip).
+
diff --git a/models/multimodal/characterai/parameters.json b/models/multimodal/characterai/parameters.json
deleted file mode 100644
index 65fc345..0000000
--- a/models/multimodal/characterai/parameters.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "service": "Character.AI",
-  "last_updated": "2025-08-11T22:13:29.425767",
-  "recommended_settings": {
-    "setting_0": {
-      "description": "Goro setting in experimental section of Style tab"
-    },
-    "setting_1": {
-      "description": "Limited chat style for users over 18"
-    }
-  },
-  "cost_optimization": {},
-  "sources": [
-    "Reddit community",
-    "User reports"
-  ]
-}
\ No newline at end of file
diff --git a/models/multimodal/characterai/pitfalls.md b/models/multimodal/characterai/pitfalls.md
deleted file mode 100644
index 2b641e1..0000000
--- a/models/multimodal/characterai/pitfalls.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Character.AI - Common Pitfalls & Issues
-
-*Last updated: 2025-08-11*
-
-## Technical Issues
-
-### ⚠️ Image not displaying bug
-
-### ⚠️ Bug that stops Character.AI from replying to messages
-
-### ⚠️ Keyboard closing bug on mobile devices
-
-## Cost & Limits
-
-### 💰 Character.AI has a 32,000 character limit when creating AI characters
-
diff --git a/models/multimodal/characterai/prompting.md b/models/multimodal/characterai/prompting.md
deleted file mode 100644
index f9907bd..0000000
--- a/models/multimodal/characterai/prompting.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# Character.AI Prompting Guide
-
-*Last updated: 2025-08-11*
-
-## Usage Tips
-
-- To avoid keyboard closing bug on mobile: Use a Bluetooth keyboard
-- The filtering system includes multiple layers: pre-processing filter, context-aware content classification (BERT-based), response generation filter, and user monitoring
-
-## Recommended Settings
-
-- Goro setting in experimental section of Style tab
-- Limited chat style for users over 18
-
-## Sources
-
-- Reddit community discussions
-- User-reported experiences
diff --git a/models/text/characterai/metadata.json b/models/text/characterai/metadata.json
new file mode 100644
index 0000000..810d0fe
--- /dev/null
+++ b/models/text/characterai/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Character.AI",
+  "category": "text",
+  "last_updated": "2025-08-12T20:04:44.876236",
+  "extraction_timestamp": "2025-08-12T20:04:40.376447",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 100,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/text/characterai/parameters.json b/models/text/characterai/parameters.json
new file mode 100644
index 0000000..53924aa
--- /dev/null
+++ b/models/text/characterai/parameters.json
@@ -0,0 +1,14 @@
+{
+  "service": "Character.AI",
+  "last_updated": "2025-08-12T20:04:44.782084",
+  "recommended_settings": {
+    "setting_0": {
+      "description": "Enable 'Goro' in the experimental settings under the Style tab"
+    }
+  },
+  "cost_optimization": {},
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/text/characterai/pitfalls.md b/models/text/characterai/pitfalls.md
new file mode 100644
index 0000000..e0b44c1
--- /dev/null
+++ b/models/text/characterai/pitfalls.md
@@ -0,0 +1,18 @@
+# Character.AI - Common Pitfalls & Issues
+
+*Last updated: 2025-08-12*
+
+## Technical Issues
+
+### ⚠️ image bug where character images do not load
+
+### ⚠️ keyboard bug where the on-screen keyboard closes unexpectedly
+
+### ⚠️ bug that stops replying to your messages
+
+### ⚠️ disappearing characters bug where characters are unavailable for some users but available for others
+
+## Cost & Limits
+
+### 💰 Character creation character limit of 32,000 characters per AI
+
diff --git a/models/text/characterai/prompting.md b/models/text/characterai/prompting.md
new file mode 100644
index 0000000..59e4012
--- /dev/null
+++ b/models/text/characterai/prompting.md
@@ -0,0 +1,16 @@
+# Character.AI Prompting Guide
+
+*Last updated: 2025-08-12*
+
+## Technical Tips
+
+- Use Janitor AI to jailbreak Character AI and access the NSFW version via prompts
+
+## Recommended Settings
+
+- Enable 'Goro' in the experimental settings under the Style tab
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/text/fliki/cost_optimization.md b/models/text/fliki/cost_optimization.md
new file mode 100644
index 0000000..0594357
--- /dev/null
+++ b/models/text/fliki/cost_optimization.md
@@ -0,0 +1,8 @@
+# Fliki - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Money-Saving Tips
+
+- 25% off premium plans for a whole year
+
diff --git a/models/text/fliki/metadata.json b/models/text/fliki/metadata.json
new file mode 100644
index 0000000..3313b01
--- /dev/null
+++ b/models/text/fliki/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Fliki",
+  "category": "text",
+  "last_updated": "2025-08-12T20:07:02.281408",
+  "extraction_timestamp": null,
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 23,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/text/fliki/parameters.json b/models/text/fliki/parameters.json
new file mode 100644
index 0000000..55d84e4
--- /dev/null
+++ b/models/text/fliki/parameters.json
@@ -0,0 +1,12 @@
+{
+  "service": "Fliki",
+  "last_updated": "2025-08-12T20:07:02.186904",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "tip_0": "25% off premium plans for a whole year"
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/text/fliki/prompting.md b/models/text/fliki/prompting.md
new file mode 100644
index 0000000..36aab56
--- /dev/null
+++ b/models/text/fliki/prompting.md
@@ -0,0 +1,12 @@
+# Fliki Prompting Guide
+
+*Last updated: 2025-08-12*
+
+## Usage Tips
+
+- Use the Fliki discount page (https://fliki.ai/pricing?via=subreddits) which includes the coupon, sign up with a new email, choose a plan and period, then proceed with subscription to activate a whole year at 25% discount.
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/text/ideogram/cost_optimization.md b/models/text/ideogram/cost_optimization.md
new file mode 100644
index 0000000..a4d986f
--- /dev/null
+++ b/models/text/ideogram/cost_optimization.md
@@ -0,0 +1,8 @@
+# Ideogram - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Money-Saving Tips
+
+- 216 credits per day available for paid users using 3‑credit generations.
+
diff --git a/models/text/ideogram/metadata.json b/models/text/ideogram/metadata.json
new file mode 100644
index 0000000..ae3a3fa
--- /dev/null
+++ b/models/text/ideogram/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Ideogram",
+  "category": "text",
+  "last_updated": "2025-08-12T20:05:32.879741",
+  "extraction_timestamp": "2025-08-12T20:05:25.727949",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 49,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/text/ideogram/parameters.json b/models/text/ideogram/parameters.json
new file mode 100644
index 0000000..4def834
--- /dev/null
+++ b/models/text/ideogram/parameters.json
@@ -0,0 +1,12 @@
+{
+  "service": "Ideogram",
+  "last_updated": "2025-08-12T20:05:32.795932",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "tip_0": "216 credits per day available for paid users using 3\u2011credit generations."
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/text/ideogram/pitfalls.md b/models/text/ideogram/pitfalls.md
new file mode 100644
index 0000000..873e7f5
--- /dev/null
+++ b/models/text/ideogram/pitfalls.md
@@ -0,0 +1,8 @@
+# Ideogram - Common Pitfalls & Issues
+
+*Last updated: 2025-08-12*
+
+## Technical Issues
+
+### ⚠️ Leader/another role cannot be drafted due to a bug involving ideoligion objects such as shrines and ideograms.
+
diff --git a/models/text/ideogram/prompting.md b/models/text/ideogram/prompting.md
new file mode 100644
index 0000000..95bac25
--- /dev/null
+++ b/models/text/ideogram/prompting.md
@@ -0,0 +1,13 @@
+# Ideogram Prompting Guide
+
+*Last updated: 2025-08-12*
+
+## Usage Tips
+
+- In dev mods, use the destroy tool to delete any object directly related to your ideoligion (shrines, ideograms, etc.), then randomize symbols and regenerate all buildings to resolve the drafting issue.
+- Use 3‑credit generations to maximize throughput: 72 batches per day, totaling 216 credits per day.
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/text/playht/cost_optimization.md b/models/text/playht/cost_optimization.md
new file mode 100644
index 0000000..3c4303c
--- /dev/null
+++ b/models/text/playht/cost_optimization.md
@@ -0,0 +1,8 @@
+# Play.ht - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Pricing Information
+
+- $100 per month
+
diff --git a/models/text/playht/metadata.json b/models/text/playht/metadata.json
new file mode 100644
index 0000000..9b35b00
--- /dev/null
+++ b/models/text/playht/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Play.ht",
+  "category": "text",
+  "last_updated": "2025-08-12T20:09:36.214880",
+  "extraction_timestamp": "2025-08-12T20:09:36.064867",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 28,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/text/playht/parameters.json b/models/text/playht/parameters.json
new file mode 100644
index 0000000..ba866c0
--- /dev/null
+++ b/models/text/playht/parameters.json
@@ -0,0 +1,12 @@
+{
+  "service": "Play.ht",
+  "last_updated": "2025-08-12T20:09:36.149442",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "pricing": "$100 per month"
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/text/resemble-ai/cost_optimization.md b/models/text/resemble-ai/cost_optimization.md
new file mode 100644
index 0000000..40bf4a8
--- /dev/null
+++ b/models/text/resemble-ai/cost_optimization.md
@@ -0,0 +1,8 @@
+# Resemble AI - Cost Optimization Guide
+
+*Last updated: 2025-08-12*
+
+## Pricing Information
+
+- basic package starts at $0.006 per second
+
diff --git a/models/text/resemble-ai/metadata.json b/models/text/resemble-ai/metadata.json
new file mode 100644
index 0000000..96c6987
--- /dev/null
+++ b/models/text/resemble-ai/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Resemble AI",
+  "category": "text",
+  "last_updated": "2025-08-12T20:09:01.048329",
+  "extraction_timestamp": "2025-08-12T20:09:00.838795",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 15,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/text/resemble-ai/parameters.json b/models/text/resemble-ai/parameters.json
new file mode 100644
index 0000000..92c7bbc
--- /dev/null
+++ b/models/text/resemble-ai/parameters.json
@@ -0,0 +1,12 @@
+{
+  "service": "Resemble AI",
+  "last_updated": "2025-08-12T20:09:00.954576",
+  "recommended_settings": {},
+  "cost_optimization": {
+    "pricing": "basic package starts at $0.006 per second"
+  },
+  "sources": [
+    "Reddit community",
+    "User reports"
+  ]
+}
\ No newline at end of file
diff --git a/models/video/heygen/metadata.json b/models/video/heygen/metadata.json
new file mode 100644
index 0000000..38c02e5
--- /dev/null
+++ b/models/video/heygen/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "HeyGen",
+  "category": "video",
+  "last_updated": "2025-08-12T20:08:18.502912",
+  "extraction_timestamp": null,
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 48,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/video/heygen/prompting.md b/models/video/heygen/prompting.md
new file mode 100644
index 0000000..3f8fe9d
--- /dev/null
+++ b/models/video/heygen/prompting.md
@@ -0,0 +1,18 @@
+# HeyGen Prompting Guide
+
+*Last updated: 2025-08-12*
+
+## Technical Tips
+
+- To create a custom avatar with HeyGen, first record about 30 minutes of clean, high‑quality audio and use ElevenLabs’ Professional Voice Clone to train a custom voice model.
+- After the avatar is generated, start a new video project in HeyGen and connect the custom voice model from ElevenLabs to the avatar.
+
+## Usage Tips
+
+- Upload a clear, 2‑minute video of yourself to HeyGen’s avatar creation page.
+- On HeyGen, click the "Create New Avatar" button, select the "Hyper‑Realistic" option, and upload the 2‑minute video to generate the avatar.
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/video/pika/metadata.json b/models/video/pika/metadata.json
new file mode 100644
index 0000000..d4f4cf2
--- /dev/null
+++ b/models/video/pika/metadata.json
@@ -0,0 +1,13 @@
+{
+  "service": "Pika",
+  "category": "video",
+  "last_updated": "2025-08-12T20:07:40.706852",
+  "extraction_timestamp": "2025-08-12T20:07:30.860248",
+  "data_sources": [
+    "Reddit API",
+    "Community discussions"
+  ],
+  "posts_analyzed": 86,
+  "confidence": "medium",
+  "version": "1.0.0"
+}
\ No newline at end of file
diff --git a/models/video/pika/pitfalls.md b/models/video/pika/pitfalls.md
new file mode 100644
index 0000000..e824d4e
--- /dev/null
+++ b/models/video/pika/pitfalls.md
@@ -0,0 +1,8 @@
+# Pika - Common Pitfalls & Issues
+
+*Last updated: 2025-08-12*
+
+## Technical Issues
+
+### ⚠️ Blank window bug when using Pika backup on Intel HD 4000 GPU
+
diff --git a/models/video/pika/prompting.md b/models/video/pika/prompting.md
new file mode 100644
index 0000000..1f51089
--- /dev/null
+++ b/models/video/pika/prompting.md
@@ -0,0 +1,10 @@
+# Pika Prompting Guide
+
+*Last updated: 2025-08-12*
+
+*No specific prompting tips available yet. Check back for updates.*
+
+## Sources
+
+- Reddit community discussions
+- User-reported experiences
diff --git a/models/video/runway/cost_optimization.md b/models/video/runway/cost_optimization.md
index 5d9720d..147fe00 100644
--- a/models/video/runway/cost_optimization.md
+++ b/models/video/runway/cost_optimization.md
@@ -1,9 +1,12 @@
 # Runway - Cost Optimization Guide
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Pricing Information
 
-- Gen-4 References costs $0.08 (8 credits) per image generation
-- Unlimited plan costs $95/month
+- $0.08 (8 credits) per image generation
+
+## Money-Saving Tips
+
+- Daily image generation limit is 3 per day.
 
diff --git a/models/video/runway/metadata.json b/models/video/runway/metadata.json
index cde6d0f..2bc79dc 100644
--- a/models/video/runway/metadata.json
+++ b/models/video/runway/metadata.json
@@ -1,13 +1,13 @@
 {
   "service": "Runway",
   "category": "video",
-  "last_updated": "2025-08-11T23:04:57.379218",
-  "extraction_timestamp": "2025-08-11T23:04:54.909165",
+  "last_updated": "2025-08-12T20:10:15.923183",
+  "extraction_timestamp": null,
   "data_sources": [
     "Reddit API",
     "Community discussions"
   ],
-  "posts_analyzed": 30,
+  "posts_analyzed": 100,
   "confidence": "medium",
   "version": "1.0.0"
 }
\ No newline at end of file
diff --git a/models/video/runway/parameters.json b/models/video/runway/parameters.json
index 696ae6e..9d887c9 100644
--- a/models/video/runway/parameters.json
+++ b/models/video/runway/parameters.json
@@ -1,13 +1,14 @@
 {
   "service": "Runway",
-  "last_updated": "2025-08-11T23:04:57.371483",
+  "last_updated": "2025-08-12T20:10:15.827498",
   "recommended_settings": {
     "setting_0": {
-      "description": "For Gen-4 References, use up to 3 reference images per request"
+      "description": "Maximum of 3 reference images per request"
     }
   },
   "cost_optimization": {
-    "pricing": "Unlimited plan costs $95/month"
+    "tip_0": "Daily image generation limit is 3 per day.",
+    "pricing": "$0.08 (8 credits) per image generation"
   },
   "sources": [
     "Reddit community",
diff --git a/models/video/runway/pitfalls.md b/models/video/runway/pitfalls.md
index 90a2902..c29c048 100644
--- a/models/video/runway/pitfalls.md
+++ b/models/video/runway/pitfalls.md
@@ -1,13 +1,8 @@
 # Runway - Common Pitfalls & Issues
 
-*Last updated: 2025-08-11*
-
-## Policy & Account Issues
-
-### ⚠️ Runway throttles Unlimited [$95/month] accounts
-**Note**: Be aware of terms of service regarding account creation.
+*Last updated: 2025-08-12*
 
 ## Cost & Limits
 
-### 💰 Unlimited plan costs $95/month
+### 💰 Daily image generation limit is 3 per day.
 
diff --git a/models/video/runway/prompting.md b/models/video/runway/prompting.md
index 3acca8f..4f3c323 100644
--- a/models/video/runway/prompting.md
+++ b/models/video/runway/prompting.md
@@ -1,18 +1,14 @@
 # Runway Prompting Guide
 
-*Last updated: 2025-08-11*
+*Last updated: 2025-08-12*
 
 ## Usage Tips
 
-- Use Gen-4 References in the API for image generation
-- Use the API at https://useapi.net/docs/api-runwayml-v1
-- Use Python SDK v3.1 for Runway API integration
-- Use up to 3 reference images per request for better results
-- Use automation workaround available at https://useapi.net/docs/articles/runway-bash
+- Use Python SDK v3.1 from https://github.com/runwayml/sdk-python
 
 ## Recommended Settings
 
-- For Gen-4 References, use up to 3 reference images per request
+- Maximum of 3 reference images per request
 
 ## Sources
 
diff --git a/src/cli.py b/src/cli.py
index 01edbc2..1969ea4 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -748,6 +748,113 @@ def update_status():
         console.print(f"\n[dim]Tip: Run 'scapo scrape batch --max-services {min(3, len(status['stale_services']))}' to update stale services[/dim]")
 
 
+@scrape.command(name="all")
+@click.option('-l', '--limit', default=20, help='Max posts per search (default: 20)')
+@click.option('-c', '--category', help='Filter by category (video, audio, code, etc)')
+@click.option('-p', '--priority', 
+              type=click.Choice(['ultra', 'critical', 'high', 'all']),
+              default='ultra',
+              help='Service priority level')
+@click.option('--dry-run', is_flag=True, help='Show what would be processed without running')
+@click.option('--delay', default=5, help='Delay in seconds between services (default: 5)')
+def scrape_all(limit: int, category: str, priority: str, dry_run: bool, delay: int):
+    """Process all priority services one by one."""
+    show_banner()
+    
+    from src.scrapers.targeted_search_generator import TargetedSearchGenerator
+    from src.scrapers.intelligent_browser_scraper import IntelligentBrowserScraper
+    from src.services.batch_llm_processor import BatchLLMProcessor
+    from src.services.llm_processor import LLMProcessorFactory
+    from src.services.model_entry_generator import ModelEntryGenerator
+    from src.services.service_alias_manager import ServiceAliasManager
+    import asyncio
+    import time
+    from pathlib import Path
+    import json
+    
+    # Initialize components
+    generator = TargetedSearchGenerator()
+    alias_manager = ServiceAliasManager()
+    
+    # Get all priority services
+    priority_services = list(generator.priority_services)
+    
+    # Filter discovered services that match priority services
+    filtered_services = {}
+    for service_key, service_data in generator.services.items():
+        display_name = service_data['display_name'].lower()
+        if any(priority in display_name for priority in priority_services):
+            # Apply category filter if specified
+            if category and service_data['category'] != category:
+                continue
+            # Mark with ultra priority for our priority services
+            service_data['priority'] = 'ultra'
+            filtered_services[service_key] = service_data
+    
+    # Apply priority filter
+    if priority != 'all':
+        filtered_services = {k: v for k, v in filtered_services.items() 
+                            if v.get('priority') == priority}
+    
+    services_to_process = list(filtered_services.values())
+    
+    console.print(f"[cyan]Found {len(services_to_process)} services to process[/cyan]")
+    
+    if dry_run:
+        console.print("\n[yellow]DRY RUN - Services that would be processed:[/yellow]")
+        for i, service in enumerate(services_to_process, 1):
+            console.print(f"  {i}. {service['display_name']} ({service['category']})")
+        console.print(f"\n[dim]Total: {len(services_to_process)} services × 5 queries × {limit} posts = {len(services_to_process) * 5 * limit} posts[/dim]")
+        return
+    
+    if not Confirm.ask(f"\n[yellow]Process {len(services_to_process)} services individually?[/yellow]", default=True):
+        console.print("[red]Cancelled[/red]")
+        return
+    
+    # Process each service one by one
+    successful = 0
+    failed = 0
+    
+    for i, service_data in enumerate(services_to_process, 1):
+        service_name = service_data['display_name']
+        
+        console.print(f"\n[cyan][{i}/{len(services_to_process)}] Processing {service_name}...[/cyan]")
+        
+        try:
+            # Run targeted scraper for this service
+            from subprocess import run, PIPE
+            result = run(
+                ['uv', 'run', 'scapo', 'scrape', 'targeted', 
+                 '--service', service_name, 
+                 '--limit', str(limit),
+                 '--max-queries', '5'],
+                capture_output=True,
+                text=True
+            )
+            
+            if result.returncode == 0:
+                successful += 1
+                console.print(f"  ✅ {service_name} completed")
+            else:
+                failed += 1
+                console.print(f"  ❌ {service_name} failed: {result.stderr[:100]}")
+        
+        except Exception as e:
+            failed += 1
+            console.print(f"  ❌ {service_name} error: {str(e)[:100]}")
+        
+        # Delay between services (except for the last one)
+        if i < len(services_to_process):
+            console.print(f"  [dim]Waiting {delay} seconds before next service...[/dim]")
+            time.sleep(delay)
+    
+    # Summary
+    console.print(f"\n[green]✨ Processing complete![/green]")
+    console.print(f"  Successful: {successful}")
+    console.print(f"  Failed: {failed}")
+    console.print(f"  Total: {len(services_to_process)}")
+
+
 @scrape.command(name="status")
 def scrape_status():
     """Show detailed scraper status with visual elements."""
diff --git a/src/services/batch_llm_processor.py b/src/services/batch_llm_processor.py
index 4904698..faa602d 100644
--- a/src/services/batch_llm_processor.py
+++ b/src/services/batch_llm_processor.py
@@ -56,10 +56,16 @@ def __init__(self, model_name: str = "gpt-3.5-turbo"):
     def _get_dynamic_context_limit(self, model_name: str) -> Optional[int]:
         """Try to get context limit from OpenRouter API"""
         try:
-            # Only try if we have an API key
-            if os.getenv("OPENROUTER_API_KEY"):
+            # Try to get API key from settings or environment
+            from src.core.config import Settings
+            settings = Settings()
+            api_key = settings.openrouter_api_key or os.getenv("OPENROUTER_API_KEY")
+            
+            if api_key:
                 from src.services.openrouter_context import OpenRouterContextManager
-                manager = OpenRouterContextManager()
+                manager = OpenRouterContextManager(api_key=api_key)
+                # Load from cache first
+                manager.load_cache()
                 context = manager.get_context_length(model_name)
                 if context:
                     logger.info(f"Got context limit from OpenRouter: {context}")

From 64ce26614f2d5f0717dd6f03aa2003eafa0387a9 Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Tue, 12 Aug 2025 21:28:44 +0900
Subject: [PATCH 4/7] fixed: token context handling, updated readme, quickstart

---
 .env.example                                  |   6 +-
 QUICKSTART.md                                 |  22 ++--
 README.md                                     |  15 ++-
 .../code/github-copilot/cost_optimization.md  |   8 +-
 models/code/github-copilot/metadata.json      |   6 +-
 models/code/github-copilot/parameters.json    |  16 +--
 src/cli.py                                    |   7 +-
 src/core/config.py                            |   6 +-
 src/scrapers/base.py                          |   1 -
 src/scrapers/intelligent_browser_scraper.py   |   4 +-
 src/services/adaptive_processor.py            |  34 ++---
 src/services/batch_llm_processor.py           |  53 ++------
 src/services/llm_processor.py                 | 122 ++++++++++++++----
 src/services/scraper_service.py               |   5 -
 14 files changed, 168 insertions(+), 137 deletions(-)

diff --git a/.env.example b/.env.example
index b89fc3e..e340946 100644
--- a/.env.example
+++ b/.env.example
@@ -11,9 +11,9 @@ LOCAL_LLM_URL=http://localhost:11434  # Ollama: http://localhost:11434, LM Studi
 LOCAL_LLM_MODEL=llama3  # Model name for Ollama (ignored by LM Studio)
 LOCAL_LLM_TYPE=ollama  # Options: ollama, lmstudio
 
-# LLM Character Limits
-LLM_MAX_CHARS=4000  # Maximum characters to send to LLM (user-friendly limit)
-LLM_CHAR_HARD_LIMIT=50000  # Absolute safety limit to prevent excessive API costs
+# Local LLM Context Configuration (Important for performance!)
+LOCAL_LLM_MAX_CONTEXT=4096  # Maximum context tokens for your local model (e.g., 4096, 8192, 32768)
+LOCAL_LLM_OPTIMAL_CHUNK=1024  # Optimal chunk size for batching (typically 1/4 of max context)
 
 # Quality Filtering
 LLM_QUALITY_THRESHOLD=0.6  # Minimum quality score for practices (0.0-1.0, higher = stricter)
diff --git a/QUICKSTART.md b/QUICKSTART.md
index 4b4ae2b..57f1970 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -32,6 +32,9 @@ LLM_PROVIDER=local
 LOCAL_LLM_TYPE=ollama
 LOCAL_LLM_URL=http://localhost:11434
 LOCAL_LLM_MODEL=model_alias
+# Important: Set your model's context size!
+LOCAL_LLM_MAX_CONTEXT=8192  # e.g., 4096, 8192, 32768
+LOCAL_LLM_OPTIMAL_CHUNK=2048  # Typically 1/4 of max
 ```
 #### Option C: LM Studio (Local)
 1. Install [LM Studio](https://lmstudio.ai/)
@@ -42,6 +45,9 @@ LOCAL_LLM_MODEL=model_alias
 LLM_PROVIDER=local
 LOCAL_LLM_TYPE=lmstudio
 LOCAL_LLM_URL=http://localhost:1234
+# Important: Set your model's context size!
+LOCAL_LLM_MAX_CONTEXT=8192  # Check your model's specs
+LOCAL_LLM_OPTIMAL_CHUNK=2048  # Typically 1/4 of max
 ```
 
 ### 3. Choose Your Approach
@@ -115,19 +121,19 @@ models/
 │       └── parameters.json     # Recommended settings
 ```
 
-## ⚙️ Optimization Tips
+## ⚙️ The --limit flag
 
-### For Better Extraction:
 ```bash
-# More posts = better tips (15-20 minimum)
-scapo scrape targeted --service "HeyGen" --limit 20
+# ❌ Too few posts = no useful tips found
+scapo scrape targeted --service "HeyGen" --limit 5     # ~20% success rate
 
-# Multiple search types
-scapo scrape targeted --service "Midjourney" --max-queries 10
+# ✅ Sweet spot = reliable extraction  
+scapo scrape targeted --service "HeyGen" --limit 20    # ~80% success rate
 
-# Process similar services together
-scapo scrape batch --category audio --limit 15
+# 🎯 Maximum insights = comprehensive coverage
+scapo scrape targeted --service "HeyGen" --limit 30    # Finds rare edge cases
 ```
+**Why it matters:** LLMs need multiple examples to identify patterns. More posts = higher chance of finding specific pricing, bugs, and workarounds.
 
 ### Adjust Quality Threshold:
 ```bash
diff --git a/README.md b/README.md
index 469fa71..499df72 100644
--- a/README.md
+++ b/README.md
@@ -210,6 +210,10 @@ LLM_PROVIDER=openrouter
 OPENROUTER_API_KEY=your_key
 OPENROUTER_MODEL=your_favorite_model
 
+# Local LLM Context (Important for Ollama/LM Studio!)
+LOCAL_LLM_MAX_CONTEXT=8192              # Your model's context size in tokens
+LOCAL_LLM_OPTIMAL_CHUNK=2048            # Optimal batch size (typically 1/4 of max)
+
 # Extraction Quality
 LLM_QUALITY_THRESHOLD=0.6               # Min quality (0.0-1.0)
 
@@ -218,10 +222,13 @@ SCRAPING_DELAY_SECONDS=2                # Be respectful
 MAX_POSTS_PER_SCRAPE=100               # Limit per source
 ```
 
-### Recommended Settings for Quality Extraction
-- **Posts per query**: 15-20 minimum (more posts = better tips)
-- **Queries per service**: 5-10 different search types
-- **Batch size**: 3 services at a time for focused extraction
+### Why --limit Matters (More Posts = Better Tips)
+```bash
+--limit 5   # ❌ Often finds nothing (too few samples)
+--limit 15  # ✅ Good baseline (finds common issues)  
+--limit 25  # 🎯 Optimal (uncovers hidden gems & edge cases)
+```
+so, hand-wavy breakdown: With 5 posts, extraction success ~20%. With 20+ posts, success jumps to ~80%.
 
 ## 🎨 Interactive TUI
 
diff --git a/models/code/github-copilot/cost_optimization.md b/models/code/github-copilot/cost_optimization.md
index 6c882d4..8301f6c 100644
--- a/models/code/github-copilot/cost_optimization.md
+++ b/models/code/github-copilot/cost_optimization.md
@@ -2,13 +2,7 @@
 
 *Last updated: 2025-08-12*
 
-## Pricing Information
-
-- $10/month for the standard Copilot plan
-
 ## Money-Saving Tips
 
-- Copilot Pro: 300 premium requests/month (before Pro+ upgrade)
-- Free tier: 2,000 completions/month, 50 chat prompts/month
-- Paid tier: unlimited completions and chat prompts
+- €8.4 per month for GPT-4 to Copilot and text chatbox
 
diff --git a/models/code/github-copilot/metadata.json b/models/code/github-copilot/metadata.json
index 35e3889..9d2d659 100644
--- a/models/code/github-copilot/metadata.json
+++ b/models/code/github-copilot/metadata.json
@@ -1,13 +1,13 @@
 {
   "service": "GitHub Copilot",
   "category": "code",
-  "last_updated": "2025-08-12T20:04:22.865982",
-  "extraction_timestamp": "2025-08-12T20:04:20.280914",
+  "last_updated": "2025-08-12T21:24:27.281886",
+  "extraction_timestamp": "2025-08-12T21:24:27.096753",
   "data_sources": [
     "Reddit API",
     "Community discussions"
   ],
-  "posts_analyzed": 82,
+  "posts_analyzed": 2,
   "confidence": "medium",
   "version": "1.0.0"
 }
\ No newline at end of file
diff --git a/models/code/github-copilot/parameters.json b/models/code/github-copilot/parameters.json
index 313405f..5766d0d 100644
--- a/models/code/github-copilot/parameters.json
+++ b/models/code/github-copilot/parameters.json
@@ -1,19 +1,9 @@
 {
   "service": "GitHub Copilot",
-  "last_updated": "2025-08-12T20:04:22.768265",
-  "recommended_settings": {
-    "setting_0": {
-      "description": "github.copilot.chat.agent.autoFix"
-    },
-    "setting_1": {
-      "description": "chat.tools.autoApprove"
-    }
-  },
+  "last_updated": "2025-08-12T21:24:27.207908",
+  "recommended_settings": {},
   "cost_optimization": {
-    "pricing": "$10/month for the standard Copilot plan",
-    "tip_1": "Copilot Pro: 300 premium requests/month (before Pro+ upgrade)",
-    "tip_2": "Free tier: 2,000 completions/month, 50 chat prompts/month",
-    "unlimited_option": "Paid tier: unlimited completions and chat prompts"
+    "tip_0": "\u20ac8.4 per month for GPT-4 to Copilot and text chatbox"
   },
   "sources": [
     "Reddit community",
diff --git a/src/cli.py b/src/cli.py
index 1969ea4..e9ef696 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -170,9 +170,8 @@ def scrape():
 @click.option("--sources", "-s", multiple=True, 
               help="Sources to scrape (e.g., reddit:LocalLLaMA)")
 @click.option("--limit", "-l", default=10, help="Maximum posts per source")
-@click.option("--llm-max-chars", "-c", type=int, help="Max characters for LLM processing")
 @click.option("--interactive", "-i", is_flag=True, help="Interactive source selection")
-def run_scraper(sources, limit, llm_max_chars, interactive):
+def run_scraper(sources, limit, interactive):
     """Run intelligent scraper with enhanced UI."""
     show_banner()
     
@@ -223,8 +222,7 @@ async def _run():
         console.print(Panel(
             f"[bold]Scraping Plan[/bold]\n\n"
             f"Sources:\n{source_text}\n\n"
-            f"Post limit: [cyan]{limit}[/cyan] per source\n"
-            f"LLM processing: [cyan]{'Limited' if llm_max_chars else 'Full'}[/cyan]",
+            f"Post limit: [cyan]{limit}[/cyan] per source",
             border_style="blue"
         ))
         
@@ -251,7 +249,6 @@ async def _run():
             result = await service.run_scrapers(
                 sources=sources_list,
                 max_posts_per_source=limit,
-                llm_max_chars=llm_max_chars,
             )
             
             progress.update(task, completed=100)
diff --git a/src/core/config.py b/src/core/config.py
index e644e75..dbfb70c 100644
--- a/src/core/config.py
+++ b/src/core/config.py
@@ -49,9 +49,11 @@ class Settings(BaseSettings):
     local_llm_model: str = Field(default="llama3", description="Local LLM model")
     local_llm_type: str = Field(default="ollama", description="Local LLM type: ollama, lmstudio")
     llm_processing_enabled: bool = Field(default=True, description="Enable LLM processing of content")
-    llm_max_chars: int = Field(default=4000, description="Maximum characters to send to LLM (user-friendly)")
-    llm_char_hard_limit: int = Field(default=50000, description="Absolute maximum characters (safety limit)")
     llm_quality_threshold: float = Field(default=0.6, description="Minimum quality score for practices (0.0-1.0)")
+    
+    # Local LLM context configuration
+    local_llm_max_context: Optional[int] = Field(None, description="Maximum context tokens for local LLM (e.g., 4096, 8192, 32768)")
+    local_llm_optimal_chunk: Optional[int] = Field(None, description="Optimal chunk size for local LLM processing")
 
     @field_validator("models_dir", "scrapers_dir")
     @classmethod
diff --git a/src/scrapers/base.py b/src/scrapers/base.py
index 7bcb19c..270db4a 100644
--- a/src/scrapers/base.py
+++ b/src/scrapers/base.py
@@ -204,7 +204,6 @@ async def _enhance_with_llm(
                 model=settings.local_llm_model,
                 api_type=settings.local_llm_type,
                 api_key=settings.openrouter_api_key,
-                max_chars=settings.llm_max_chars,
             )
             
             enhanced_practices = initial_practices.copy()
diff --git a/src/scrapers/intelligent_browser_scraper.py b/src/scrapers/intelligent_browser_scraper.py
index 133be85..1a9b6f7 100644
--- a/src/scrapers/intelligent_browser_scraper.py
+++ b/src/scrapers/intelligent_browser_scraper.py
@@ -83,7 +83,7 @@ def extract_best_practices(self, posts: List[ScrapedPost]) -> Dict[str, Any]:
         """Required abstract method - handled by LLM processing instead."""
         return {}
     
-    def _get_llm_processor(self, max_chars: Optional[int] = None):
+    def _get_llm_processor(self):
         """Get or create a cached LLM processor instance."""
         if self._llm_processor is None:
             if settings.llm_provider == "openrouter":
@@ -91,14 +91,12 @@ def _get_llm_processor(self, max_chars: Optional[int] = None):
                     provider="openrouter",
                     api_key=settings.openrouter_api_key,
                     model=settings.openrouter_model,
-                    max_chars=max_chars or settings.llm_max_chars
                 )
             else:
                 self._llm_processor = LLMProcessorFactory.create_processor(
                     provider="local",
                     base_url=settings.local_llm_url,
                     model=settings.local_llm_model,
-                    max_chars=max_chars or settings.llm_max_chars
                 )
         return self._llm_processor
         
diff --git a/src/services/adaptive_processor.py b/src/services/adaptive_processor.py
index 5e4f694..da9ba5d 100644
--- a/src/services/adaptive_processor.py
+++ b/src/services/adaptive_processor.py
@@ -25,27 +25,21 @@ class LLMCapabilities:
     def detect_capabilities(cls, provider: str, model: str) -> 'LLMCapabilities':
         """Detect LLM capabilities based on provider and model."""
         
-        # Local LLMs (limited context)
+        # Local LLMs - use environment variables if set
         if provider == "local":
-            if "llama" in model.lower():
-                if "3" in model:
-                    return cls(provider, model, 8192, False, 2000)
-                elif "2" in model:
-                    return cls(provider, model, 4096, False, 1500)
-                else:
-                    return cls(provider, model, 2048, False, 1000)
-            elif "mistral" in model.lower():
-                return cls(provider, model, 8192, False, 2000)
-            elif "phi" in model.lower():
-                return cls(provider, model, 4096, False, 1500)
-            elif "qwen" in model.lower():
-                if "32k" in model.lower():
-                    return cls(provider, model, 32768, True, 8000)
-                else:
-                    return cls(provider, model, 8192, True, 2000)
-            else:
-                # Conservative defaults for unknown local models
-                return cls(provider, model, 2048, False, 1000)
+            # Check for user-configured values
+            if settings.local_llm_max_context:
+                max_context = settings.local_llm_max_context
+                optimal_chunk = settings.local_llm_optimal_chunk or max_context // 4
+                logger.info(f"Using user-configured local LLM context: {max_context} tokens, {optimal_chunk} chunk size")
+                return cls(provider, model, max_context, False, optimal_chunk)
+            
+            # Fallback to conservative defaults if not configured
+            logger.warning(f"No LOCAL_LLM_MAX_CONTEXT set for {model}. Using conservative defaults.")
+            logger.info("Set LOCAL_LLM_MAX_CONTEXT and LOCAL_LLM_OPTIMAL_CHUNK in .env for better performance")
+            
+            # Very conservative defaults for safety
+            return cls(provider, model, 2048, False, 500)
         
         # Cloud providers (larger context)
         elif provider == "openrouter":
diff --git a/src/services/batch_llm_processor.py b/src/services/batch_llm_processor.py
index faa602d..6a184df 100644
--- a/src/services/batch_llm_processor.py
+++ b/src/services/batch_llm_processor.py
@@ -16,21 +16,8 @@
 class BatchLLMProcessor:
     """Processes multiple posts in a single LLM call with context window awareness"""
     
-    # Conservative token limits for different model families
-    CONTEXT_LIMITS = {
-        'gpt-4': 8000,           # 8k context (conservative for gpt-4)
-        'gpt-4-32k': 30000,      # 32k context
-        'gpt-4-turbo': 120000,   # 128k context
-        'gpt-3.5-turbo': 15000,  # 16k context
-        'claude-3': 180000,      # 200k context
-        'claude-2': 90000,       # 100k context
-        'llama': 3500,           # 4k context (conservative)
-        'mistral': 30000,        # 32k context
-        'deepseek': 30000,       # 32k context
-        'glm': 120000,           # GLM-4 models have 128k context
-        'z-ai': 120000,          # z-ai models typically have large context
-        'default': 8000          # Safe default for cloud models
-    }
+    # Default context limit if we can't determine from API or env
+    DEFAULT_CONTEXT_LIMIT = 4096  # Conservative default
     
     # Reserved tokens for system prompt and response
     RESERVED_TOKENS = {
@@ -43,10 +30,18 @@ def __init__(self, model_name: str = "gpt-3.5-turbo"):
         self.model_name = model_name
         self.encoder = self._get_encoder(model_name)
         
-        # Try to get context from OpenRouter first
+        # Try to get context from OpenRouter API or environment variables
         self.context_limit = self._get_dynamic_context_limit(model_name)
         if not self.context_limit:
-            self.context_limit = self._get_context_limit(model_name)
+            # For local models, check environment variable
+            from src.core.config import settings
+            if settings.llm_provider == "local" and settings.local_llm_max_context:
+                self.context_limit = settings.local_llm_max_context
+                logger.info(f"Using LOCAL_LLM_MAX_CONTEXT: {self.context_limit}")
+            else:
+                # Fall back to conservative default
+                self.context_limit = self.DEFAULT_CONTEXT_LIMIT
+                logger.warning(f"Using default context limit: {self.context_limit}. Set LOCAL_LLM_MAX_CONTEXT for better performance.")
         
         self.usable_tokens = self._calculate_usable_tokens()
         
@@ -89,30 +84,6 @@ def _get_encoder(self, model_name: str):
             logger.warning(f"Could not get specific encoder for {model_name}: {e}")
             return tiktoken.get_encoding('cl100k_base')
     
-    def _get_context_limit(self, model_name: str) -> int:
-        """Get conservative context limit for the model"""
-        model_lower = model_name.lower()
-        
-        # Check for specific model patterns
-        for key in self.CONTEXT_LIMITS:
-            if key in model_lower:
-                return self.CONTEXT_LIMITS[key]
-        
-        # Check for context size indicators in model name
-        if '32k' in model_lower:
-            return 30000
-        elif '16k' in model_lower:
-            return 15000
-        elif '8k' in model_lower:
-            return 7500
-        elif '4k' in model_lower:
-            return 3500
-        elif '100k' in model_lower or '128k' in model_lower:
-            return 90000
-        elif '200k' in model_lower:
-            return 180000
-        
-        return self.CONTEXT_LIMITS['default']
     
     def _calculate_usable_tokens(self) -> int:
         """Calculate tokens available for actual content"""
diff --git a/src/services/llm_processor.py b/src/services/llm_processor.py
index a3164d8..3ec75e7 100644
--- a/src/services/llm_processor.py
+++ b/src/services/llm_processor.py
@@ -8,6 +8,7 @@
 from pydantic import BaseModel, Field
 import litellm
 from litellm import acompletion, RateLimitError, AuthenticationError
+import tiktoken
 
 from src.core.logging import get_logger
 from src.core.config import settings
@@ -36,9 +37,15 @@ class ProcessedPractice(BaseModel):
 class BaseLLMProcessor(ABC):
     """Base class for LLM processors using LiteLLM."""
     
-    def __init__(self, max_chars: Optional[int] = None):
-        self.max_chars = max_chars or settings.llm_max_chars
+    # Default context limit if we can't determine from API or env
+    DEFAULT_CONTEXT_LIMIT = 4096  # Conservative default
+    
+    def __init__(self, model_name: str = None):
+        self.model_name = model_name or "gpt-3.5-turbo"
         self.logger = logger
+        self.encoder = self._get_encoder(self.model_name)
+        self.context_limit = self._get_context_limit()
+        self.usable_tokens = self._calculate_usable_tokens()
         
     @abstractmethod
     async def process_content(self, content: str, content_type: str) -> List[ProcessedPractice]:
@@ -50,23 +57,93 @@ async def process_raw_prompt(self, prompt: str) -> str:
         """Process a raw prompt and return the response as a string."""
         pass
     
-    def truncate_to_limit(self, text: str) -> Tuple[str, bool]:
-        """Truncate text to character limit.
+    def _get_encoder(self, model_name: str):
+        """Get appropriate tokenizer for the model"""
+        try:
+            # Try to get model-specific encoder
+            if 'gpt-4' in model_name.lower():
+                return tiktoken.encoding_for_model('gpt-4')
+            elif 'gpt-3.5' in model_name.lower():
+                return tiktoken.encoding_for_model('gpt-3.5-turbo')
+            else:
+                # Default to cl100k_base for most modern models
+                return tiktoken.get_encoding('cl100k_base')
+        except Exception as e:
+            self.logger.warning(f"Could not get specific encoder for {model_name}: {e}")
+            return tiktoken.get_encoding('cl100k_base')
+    
+    def _get_context_limit(self) -> int:
+        """Get context limit for the model"""
+        # Try to get from OpenRouter API first (if available)
+        try:
+            from src.services.openrouter_context import OpenRouterContextManager
+            api_key = settings.openrouter_api_key
+            if api_key and settings.llm_provider == "openrouter":
+                manager = OpenRouterContextManager(api_key=api_key)
+                manager.load_cache()
+                context = manager.get_context_length(self.model_name)
+                if context:
+                    self.logger.info(f"Got context limit from OpenRouter: {context}")
+                    return context
+        except Exception as e:
+            self.logger.debug(f"Could not get context from OpenRouter: {e}")
+        
+        # For local models, check environment variable
+        if settings.llm_provider == "local" and settings.local_llm_max_context:
+            self.logger.info(f"Using LOCAL_LLM_MAX_CONTEXT: {settings.local_llm_max_context}")
+            return settings.local_llm_max_context
+        
+        # Fall back to conservative default
+        self.logger.warning(f"Using default context limit: {self.DEFAULT_CONTEXT_LIMIT}")
+        return self.DEFAULT_CONTEXT_LIMIT
+    
+    def _calculate_usable_tokens(self) -> int:
+        """Calculate tokens available for actual content"""
+        # Reserve tokens for system prompt and response
+        reserved_tokens = 1500  # Conservative reservation
+        usable = self.context_limit - reserved_tokens
+        
+        if usable < 1000:
+            self.logger.warning(f"Very limited context space: {usable} tokens")
+            return max(500, usable)  # Minimum 500 tokens for content
+        
+        return usable
+    
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text"""
+        try:
+            return len(self.encoder.encode(text))
+        except Exception as e:
+            self.logger.warning(f"Token counting failed, using approximation: {e}")
+            # Fallback: approximate 1 token per 4 characters
+            return len(text) // 4
+    
+    def truncate_to_token_limit(self, text: str) -> Tuple[str, bool]:
+        """Truncate text to fit within token limit.
         
         Returns:
             Tuple of (truncated_text, was_truncated)
         """
-        # Apply hard limit first
-        if len(text) > settings.llm_char_hard_limit:
-            text = text[:settings.llm_char_hard_limit]
-            self.logger.warning(f"Applied hard limit of {settings.llm_char_hard_limit} chars")
+        token_count = self.count_tokens(text)
         
-        # Apply user-specified limit
-        if len(text) > self.max_chars:
-            self.logger.info(f"Truncating content from {len(text)} to {self.max_chars} chars")
-            return text[:self.max_chars] + "\n\n[Content truncated...]", True
+        if token_count <= self.usable_tokens:
+            return text, False
         
-        return text, False
+        # Binary search to find the right truncation point
+        left, right = 0, len(text)
+        best_fit = ""
+        
+        while left < right:
+            mid = (left + right) // 2
+            truncated = text[:mid]
+            if self.count_tokens(truncated) <= self.usable_tokens:
+                best_fit = truncated
+                left = mid + 1
+            else:
+                right = mid
+        
+        self.logger.info(f"Truncated content from {token_count} to {self.count_tokens(best_fit)} tokens")
+        return best_fit + "\n\n[Content truncated...]", True
     
     def create_extraction_prompt(self, content: str, content_type: str) -> str:
         """Create a prompt for extracting practices from content."""
@@ -107,13 +184,14 @@ def __init__(self,
                  provider: str = None,
                  model: str = None,
                  api_key: str = None,
-                 base_url: str = None,
-                 max_chars: Optional[int] = None):
-        super().__init__(max_chars)
-        
-        # Set up provider-specific configuration
+                 base_url: str = None):
+        # Set up provider-specific configuration first
         self.provider = provider or settings.llm_provider
-        self.model_name = model or (settings.openrouter_model if self.provider == "openrouter" else settings.local_llm_model)
+        model_name = model or (settings.openrouter_model if self.provider == "openrouter" else settings.local_llm_model)
+        super().__init__(model_name)
+        
+        # Store the full model name for litellm
+        self.model_name = model_name
         
         # Initialize smart processors
         self.capabilities = LLMCapabilities.detect_capabilities(self.provider, self.model_name)
@@ -222,10 +300,10 @@ async def process_content(self, content: str, content_type: str) -> List[Process
                 
         except Exception as e:
             self.logger.error(f"Smart processing failed, falling back to basic truncation: {e}")
-            # Fallback to basic truncation
-            content, was_truncated = self.truncate_to_limit(content)
+            # Fallback to token-based truncation
+            content, was_truncated = self.truncate_to_token_limit(content)
             if was_truncated:
-                self.logger.info(f"Content truncated to {self.max_chars} characters for {content_type}")
+                self.logger.info(f"Content truncated to fit token limit for {content_type}")
             
             prompt = self.create_extraction_prompt(content, content_type)
             practices = await self._extract_practices_from_chunk(prompt, content_type)
diff --git a/src/services/scraper_service.py b/src/services/scraper_service.py
index 30bafb9..087e38e 100644
--- a/src/services/scraper_service.py
+++ b/src/services/scraper_service.py
@@ -62,7 +62,6 @@ async def run_scrapers(
         self,
         sources: List[str] = None,
         max_posts_per_source: int = 10,
-        llm_max_chars: Optional[int] = None,
     ) -> Dict[str, Any]:
         """Run intelligent scraper on specified sources."""
         
@@ -70,10 +69,6 @@ async def run_scrapers(
         if sources is None:
             sources = self._get_default_sources()
         
-        # Override LLM max chars if provided
-        if llm_max_chars is not None:
-            settings.llm_max_chars = llm_max_chars
-            logger.info(f"Using custom LLM character limit: {llm_max_chars}")
         
         status = self.scraper_status["intelligent"]
         

From 4d4233f60d000002b664f0dbf738ddd23fa365fd Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Wed, 13 Aug 2025 16:55:38 +0900
Subject: [PATCH 5/7] updated: readme, .github contents for repo management

---
 .github/CODEOWNERS                          | 24 ++++++++
 .github/ISSUE_TEMPLATE/feature_request.yaml | 61 +++++++++++++++++++++
 .github/dependabot.yml                      | 45 +++++++++++++++
 .github/release.yml                         | 34 ++++++++++++
 .github/topics.txt                          | 39 +++++++++++++
 CITATION.cff                                | 28 ++++++++++
 CODE_OF_CONDUCT.md                          | 58 ++++++++++++++++++++
 README.md                                   | 14 +++--
 8 files changed, 299 insertions(+), 4 deletions(-)
 create mode 100644 .github/CODEOWNERS
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yaml
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/release.yml
 create mode 100644 .github/topics.txt
 create mode 100644 CITATION.cff
 create mode 100644 CODE_OF_CONDUCT.md

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..8cadd0b
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,24 @@
+# CODEOWNERS file helps GitHub route pull requests to the right reviewers
+# This also shows up in GitHub's interface showing who maintains what
+
+# Global owners
+* @czero-cc
+
+# Documentation
+*.md @czero-cc
+/docs/ @czero-cc
+
+# Core functionality
+/src/core/ @czero-cc
+/src/scrapers/ @czero-cc
+/src/services/ @czero-cc
+
+# Models and data
+/models/ @czero-cc
+/data/ @czero-cc
+
+# MCP Server
+/mcp/ @czero-cc
+
+# CI/CD
+/.github/ @czero-cc
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml
new file mode 100644
index 0000000..993ed37
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yaml
@@ -0,0 +1,61 @@
+name: "✨ Feature Request"
+description: Suggest an idea or improvement for SCAPO
+title: "[Feature]: "
+labels: ["enhancement", "needs-triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for helping make SCAPO better! 🚀
+
+  - type: dropdown
+    id: feature-type
+    attributes:
+      label: Feature Type
+      description: What kind of feature are you requesting?
+      options:
+        - New AI service support
+        - Scraping improvement
+        - Data extraction enhancement
+        - UI/UX improvement
+        - API feature
+        - Documentation
+        - Other
+    validations:
+      required: true
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem Statement
+      description: What problem does this feature solve?
+      placeholder: |
+        I'm always frustrated when...
+        It would be helpful if...
+    validations:
+      required: true
+
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed Solution
+      description: How would you like to see this implemented?
+      placeholder: Describe your ideal solution
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives Considered
+      description: Have you considered any alternative solutions?
+      placeholder: Optional - any other approaches you've thought about
+
+  - type: checkboxes
+    id: contribution
+    attributes:
+      label: Contribution
+      options:
+        - label: I'm willing to help implement this feature
+        - label: I can provide test cases or examples
+        - label: I can help with documentation
\ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..965bd2b
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,45 @@
+# Dependabot configuration for automatic dependency updates
+# This keeps the project secure and up-to-date
+
+version: 2
+updates:
+  # Python dependencies
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+    open-pull-requests-limit: 5
+    labels:
+      - "dependencies"
+      - "python"
+    commit-message:
+      prefix: "chore"
+      include: "scope"
+
+  # npm dependencies for MCP server
+  - package-ecosystem: "npm"
+    directory: "/mcp"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+    open-pull-requests-limit: 3
+    labels:
+      - "dependencies"
+      - "javascript"
+      - "mcp"
+    commit-message:
+      prefix: "chore"
+      include: "scope"
+
+  # GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    labels:
+      - "dependencies"
+      - "ci"
+    commit-message:
+      prefix: "ci"
+      include: "scope"
\ No newline at end of file
diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 0000000..bb76c0f
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,34 @@
+# GitHub Release configuration
+# This helps auto-generate better release notes
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+    authors:
+      - dependabot
+  categories:
+    - title: 🚀 New Features
+      labels:
+        - enhancement
+        - feature
+    - title: 🐛 Bug Fixes
+      labels:
+        - bug
+        - fix
+    - title: 📚 Documentation
+      labels:
+        - documentation
+        - docs
+    - title: 🔧 Maintenance
+      labels:
+        - chore
+        - maintenance
+        - dependencies
+    - title: 🎉 New AI Services
+      labels:
+        - new-service
+        - model
+    - title: Other Changes
+      labels:
+        - "*"
\ No newline at end of file
diff --git a/.github/topics.txt b/.github/topics.txt
new file mode 100644
index 0000000..5584bcc
--- /dev/null
+++ b/.github/topics.txt
@@ -0,0 +1,39 @@
+# GitHub Topics to add (via repo settings):
+# These help with GitHub search and discovery
+
+prompt-engineering
+ai-optimization
+llm-tools
+cost-optimization
+reddit-scraper
+ai-tips
+generative-ai
+mcp-server
+openai
+anthropic
+claude
+gpt-4
+midjourney
+stable-diffusion
+eleven-labs
+github-copilot
+ai-tools
+machine-learning
+nlp
+text-generation
+image-generation
+video-generation
+audio-generation
+community-driven
+knowledge-base
+best-practices
+optimization-tips
+cost-reduction
+performance-optimization
+web-scraping
+python
+automation
+developer-tools
+ai-services
+prompt-templates
+parameter-tuning
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..d02549a
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,28 @@
+# This CITATION file helps researchers cite your project properly
+cff-version: 1.2.0
+title: 'SCAPO: Stay Calm and Prompt On'
+message: 'If you use this software, please cite it as below.'
+type: software
+authors:
+  - name: "CZero Engine Team"
+    email: "info@czero.cc"
+    affiliation: "Fiefworks, Inc."
+repository-code: 'https://github.com/czero-cc/scapo'
+url: 'https://czero.cc'
+abstract: 'A community-driven knowledge base that automatically extracts specific AI service optimization techniques from Reddit discussions, providing actionable tips for cost reduction and performance improvement.'
+keywords:
+  - prompt-engineering
+  - ai-optimization
+  - llm-tools
+  - cost-optimization
+  - reddit-scraper
+  - ai-tips
+  - generative-ai
+  - mcp-server
+  - openai
+  - anthropic
+  - midjourney
+  - stable-diffusion
+license: MIT
+version: 0.1.0
+date-released: '2024-08-01'
\ No newline at end of file
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..05985d4
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,58 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+* Sharing optimization tips and discoveries openly
+
+Examples of unacceptable behavior:
+
+* The use of sexualized language or imagery
+* Trolling, insulting or derogatory comments, and personal attacks
+* Public or private harassment
+* Publishing others' private information without permission
+* Other conduct which could reasonably be considered inappropriate
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+info@czero.cc.
+
+All complaints will be reviewed and investigated promptly and fairly.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
\ No newline at end of file
diff --git a/README.md b/README.md
index 499df72..2296e1d 100644
--- a/README.md
+++ b/README.md
@@ -6,12 +6,15 @@
 
 **The Community-Driven Knowledge Base for AI Service Optimization**
 
+[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Made with Love](https://img.shields.io/badge/Made%20with-❤️-red.svg)](https://github.com/czero-cc/scapo)
-[![No API Keys](https://img.shields.io/badge/API%20Keys-Not%20Required-brightgreen.svg)]()
+[![No API Keys](https://img.shields.io/badge/API%20Keys-Not%20Required-brightgreen.svg)]()<br/>
 [![LLM Powered](https://img.shields.io/badge/LLM-Powered-blue.svg)]()
 [![Browser Magic](https://img.shields.io/badge/Scraping-Browser%20Based-orange.svg)]()
 [![MCP Ready](https://img.shields.io/badge/Claude-MCP%20Ready-purple.svg)]()
 [![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg)](CONTRIBUTING.md)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
 ### 🎯 Real optimization tips from real users for AI services
 
@@ -19,6 +22,8 @@
 
 ## 🤔 What is SCAPO?
 
+**Keywords**: AI cost optimization, prompt engineering, LLM tips, OpenAI, Claude, Anthropic, Midjourney, Stable Diffusion, ElevenLabs, GitHub Copilot, reduce AI costs, AI service best practices, Reddit scraper, community knowledge base
+
 Ever burned through credits in minutes? Searching Reddit for that one optimization tip? Getting generic advice when you need specific settings?
 
 ![Classic AI Frustration](assets/guy_freaking_out2.png)
@@ -293,7 +298,7 @@ Help us build the community knowledge base for AI service optimization!
 - [Configuration Guide](docs/CONFIGURATION.md)
 - [Quick Start Guide](QUICKSTART.md) 
 - [Contributing Guide](CONTRIBUTING.md)
-- [Add New Source Tutorial](docs/ADD_NEW_SOURCE.md)
+- [Add New Source Tutorial (legacy method)](docs/ADD_NEW_SOURCE.md)
 
 ## 📜 License
 
@@ -304,9 +309,10 @@ Built as part of the CZero Engine project to improve AI application development.
 ## 🙏 Acknowledgments
 
 - Reddit communities for sharing real experiences
-- OpenRouter for accessible AI APIs
+- [OpenRouter](https://openrouter.ai/) for accessible AI APIs
 - Coffee ☕ for making this possible
-- Ollama and LMstudio for awesome local LLM experience
+- [Ollama](https://ollama.com/) and [LMstudio](https://lmstudio.ai/) for awesome local LLM experience
+- [Awesome Generative AI](https://github.com/steven2358/awesome-generative-ai) & [Awesome AI Tools](https://github.com/mahseema/awesome-ai-tools) for service discovery
 - All opensource contributors in this AI space
 
 ---

From a8a6cb410b7cccbf903ba696e40424a5f25d28d3 Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Wed, 13 Aug 2025 16:59:55 +0900
Subject: [PATCH 6/7] fixed:citation date alias

---
 CITATION.cff | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index d02549a..51383c4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -25,4 +25,4 @@ keywords:
   - stable-diffusion
 license: MIT
 version: 0.1.0
-date-released: '2024-08-01'
\ No newline at end of file
+date-released: '2025-08-13'
\ No newline at end of file

From d8d0e6d3c36c4851c1b4e4862933743c3aead380 Mon Sep 17 00:00:00 2001
From: arahangua <arahangua@gmail.com>
Date: Wed, 13 Aug 2025 17:15:15 +0900
Subject: [PATCH 7/7] fixed:dependabot to target dev branch

---
 .github/dependabot.yml           |  3 +++
 .github/scripts/create_labels.sh | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 .github/scripts/create_labels.sh

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 965bd2b..cea6e6c 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,6 +6,7 @@ updates:
   # Python dependencies
   - package-ecosystem: "pip"
     directory: "/"
+    target-branch: "dev"
     schedule:
       interval: "weekly"
       day: "monday"
@@ -20,6 +21,7 @@ updates:
   # npm dependencies for MCP server
   - package-ecosystem: "npm"
     directory: "/mcp"
+    target-branch: "dev"
     schedule:
       interval: "weekly"
       day: "monday"
@@ -35,6 +37,7 @@ updates:
   # GitHub Actions
   - package-ecosystem: "github-actions"
     directory: "/"
+    target-branch: "dev"
     schedule:
       interval: "monthly"
     labels:
diff --git a/.github/scripts/create_labels.sh b/.github/scripts/create_labels.sh
new file mode 100644
index 0000000..0daa287
--- /dev/null
+++ b/.github/scripts/create_labels.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# Script to create GitHub labels for Dependabot
+# Run this script with GitHub CLI (gh) installed and authenticated
+
+echo "Creating GitHub labels for Dependabot..."
+
+# Create labels with appropriate colors
+gh label create dependencies --description "Dependency updates" --color "0366d6" 2>/dev/null || echo "Label 'dependencies' already exists"
+gh label create javascript --description "JavaScript related changes" --color "f7df1e" 2>/dev/null || echo "Label 'javascript' already exists"
+gh label create mcp --description "MCP server related" --color "008672" 2>/dev/null || echo "Label 'mcp' already exists"
+gh label create python --description "Python related changes" --color "3776ab" 2>/dev/null || echo "Label 'python' already exists"
+gh label create ci --description "Continuous Integration" --color "e11d21" 2>/dev/null || echo "Label 'ci' already exists"
+
+echo "Label creation complete!"
+echo ""
+echo "Current labels:"
+gh label list | grep -E "dependencies|javascript|mcp|python|ci"
\ No newline at end of file