diff --git a/api/astrai-summarize.js b/api/astrai-summarize.js new file mode 100644 index 00000000..9987f1d4 --- /dev/null +++ b/api/astrai-summarize.js @@ -0,0 +1,251 @@ +/** + * Astrai Intelligent Router Summarization Endpoint with Redis Caching + * Routes to the optimal model/provider (OpenAI, Anthropic, Groq, etc.) + * based on cost, latency, and task complexity. + * Set ASTRAI_API_KEY to enable. Supports "auto" model selection. + * Server-side Redis cache for cross-user deduplication + */ + +import { getCachedJson, setCachedJson, hashString } from './_upstash-cache.js'; +import { getCorsHeaders, isDisallowedOrigin } from './_cors.js'; + +export const config = { + runtime: 'edge', +}; + +const ASTRAI_API_URL = 'https://astrai-compute.fly.dev/v1/chat/completions'; +const MODEL = 'auto'; // Let Astrai pick the optimal model per request +const CACHE_TTL_SECONDS = 86400; // 24 hours + +const CACHE_VERSION = 'v3'; + +function getCacheKey(headlines, mode, geoContext = '', variant = 'full', lang = 'en') { + const sorted = headlines.slice(0, 8).sort().join('|'); + const geoHash = geoContext ? ':g' + hashString(geoContext).slice(0, 6) : ''; + const hash = hashString(`${mode}:${sorted}`); + const normalizedVariant = typeof variant === 'string' && variant ? variant.toLowerCase() : 'full'; + const normalizedLang = typeof lang === 'string' && lang ? lang.toLowerCase() : 'en'; + + if (mode === 'translate') { + const targetLang = normalizedVariant || normalizedLang; + return `summary:${CACHE_VERSION}:${mode}:${targetLang}:${hash}${geoHash}`; + } + + return `summary:${CACHE_VERSION}:${mode}:${normalizedVariant}:${normalizedLang}:${hash}${geoHash}`; +} + +// Deduplicate similar headlines (same story from different sources) +function deduplicateHeadlines(headlines) { + const seen = new Set(); + const unique = []; + + for (const headline of headlines) { + const normalized = headline.toLowerCase() + .replace(/[^\w\s]/g, '') + .replace(/\s+/g, ' ') + .trim(); + + const words = new Set(normalized.split(' ').filter(w => w.length >= 4)); + + let isDuplicate = false; + for (const seenWords of seen) { + const intersection = [...words].filter(w => seenWords.has(w)); + const similarity = intersection.length / Math.min(words.size, seenWords.size); + if (similarity > 0.6) { + isDuplicate = true; + break; + } + } + + if (!isDuplicate) { + seen.add(words); + unique.push(headline); + } + } + + return unique; +} + +export default async function handler(request) { + const corsHeaders = getCorsHeaders(request, 'POST, OPTIONS'); + + if (request.method === 'OPTIONS') { + return new Response(null, { status: 204, headers: corsHeaders }); + } + + if (request.method !== 'POST') { + return new Response(JSON.stringify({ error: 'Method not allowed' }), { + status: 405, + headers: { ...corsHeaders, 'Content-Type': 'application/json' }, + }); + } + + if (isDisallowedOrigin(request)) { + return new Response(JSON.stringify({ error: 'Origin not allowed' }), { + status: 403, + headers: { 'Content-Type': 'application/json' }, + }); + } + + const apiKey = process.env.ASTRAI_API_KEY; + if (!apiKey) { + return new Response(JSON.stringify({ summary: null, fallback: true, skipped: true, reason: 'ASTRAI_API_KEY not configured' }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + + const contentLength = parseInt(request.headers.get('content-length') || '0', 10); + if (contentLength > 51200) { + return new Response(JSON.stringify({ error: 'Payload too large' }), { + status: 413, + headers: { ...corsHeaders, 'Content-Type': 'application/json' }, + }); + } + + try { + const { headlines, mode = 'brief', geoContext = '', variant = 'full', lang = 'en' } = await request.json(); + + if (!headlines || !Array.isArray(headlines) || headlines.length === 0) { + return new Response(JSON.stringify({ error: 'Headlines array required' }), { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }); + } + + // Check cache first (shared key format with Groq/OpenRouter endpoints) + const cacheKey = getCacheKey(headlines, mode, geoContext, variant, lang); + const cached = await getCachedJson(cacheKey); + if (cached && typeof cached === 'object' && cached.summary) { + console.log('[Astrai] Cache hit:', cacheKey); + return new Response(JSON.stringify({ + summary: cached.summary, + model: cached.model || MODEL, + provider: 'cache', + cached: true, + }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + + // Deduplicate similar headlines (same story from multiple sources) + const uniqueHeadlines = deduplicateHeadlines(headlines.slice(0, 8)); + const headlineText = uniqueHeadlines.map((h, i) => `${i + 1}. ${h}`).join('\n'); + + let systemPrompt, userPrompt; + + const intelSection = geoContext ? `\n\n${geoContext}` : ''; + const isTechVariant = variant === 'tech'; + const dateContext = `Current date: ${new Date().toISOString().split('T')[0]}.${isTechVariant ? '' : ' Donald Trump is the current US President (second term, inaugurated Jan 2025).'}`; + const langInstruction = lang && lang !== 'en' ? `\nIMPORTANT: Output the summary in ${lang.toUpperCase()} language.` : ''; + + if (mode === 'brief') { + if (isTechVariant) { + systemPrompt = `${dateContext}\n\nSummarize the key tech/startup development in 2-3 sentences.\nRules:\n- Focus ONLY on technology, startups, AI, funding, product launches, or developer news\n- IGNORE political news, trade policy, tariffs, government actions unless directly about tech regulation\n- Lead with the company/product/technology name\n- Start directly: "OpenAI announced...", "A new $50M Series B...", "GitHub released..."\n- No bullet points, no meta-commentary${langInstruction}`; + } else { + systemPrompt = `${dateContext}\n\nSummarize the key development in 2-3 sentences.\nRules:\n- Lead with WHAT happened and WHERE - be specific\n- NEVER start with "Breaking news", "Good evening", "Tonight", or TV-style openings\n- Start directly with the subject: "Iran's regime...", "The US Treasury...", "Protests in..."\n- CRITICAL FOCAL POINTS are the main actors - mention them by name\n- If focal points show news + signals convergence, that's the lead\n- No bullet points, no meta-commentary${langInstruction}`; + } + userPrompt = `Summarize the top story:\n${headlineText}${intelSection}`; + } else if (mode === 'analysis') { + if (isTechVariant) { + systemPrompt = `${dateContext}\n\nAnalyze the tech/startup trend in 2-3 sentences.\nRules:\n- Focus ONLY on technology implications: funding trends, AI developments, market shifts, product strategy\n- IGNORE political implications, trade wars, government unless directly about tech policy\n- Lead with the insight for tech industry\n- Connect to startup ecosystem, VC trends, or technical implications`; + } else { + systemPrompt = `${dateContext}\n\nProvide analysis in 2-3 sentences. Be direct and specific.\nRules:\n- Lead with the insight - what's significant and why\n- NEVER start with "Breaking news", "Tonight", "The key/dominant narrative is"\n- Start with substance: "Iran faces...", "The escalation in...", "Multiple signals suggest..."\n- CRITICAL FOCAL POINTS are your main actors - explain WHY they matter\n- If focal points show news-signal correlation, flag as escalation\n- Connect dots, be specific about implications`; + } + userPrompt = isTechVariant + ? `What's the key tech trend or development?\n${headlineText}${intelSection}` + : `What's the key pattern or risk?\n${headlineText}${intelSection}`; + } else if (mode === 'translate') { + const targetLang = variant; + systemPrompt = `You are a professional news translator. Translate the following news headlines/summaries into ${targetLang}.\nRules:\n- Maintain the original tone and journalistic style.\n- Do NOT add any conversational filler.\n- Output ONLY the translated text.`; + userPrompt = `Translate to ${targetLang}:\n${headlines[0]}`; + } else { + systemPrompt = isTechVariant + ? `${dateContext}\n\nSynthesize tech news in 2 sentences. Focus on startups, AI, funding, products. Ignore politics unless directly about tech regulation.${langInstruction}` + : `${dateContext}\n\nSynthesize in 2 sentences max. Lead with substance. NEVER start with "Breaking news" or "Tonight" - just state the insight directly. CRITICAL focal points with news-signal convergence are significant.${langInstruction}`; + userPrompt = `Key takeaway:\n${headlineText}${intelSection}`; + } + + const response = await fetch(ASTRAI_API_URL, { + method: 'POST', + headers: { + 'x-api-key': apiKey, + 'Content-Type': 'application/json', + 'X-Astrai-App': 'worldmonitor', + }, + body: JSON.stringify({ + model: MODEL, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + temperature: 0.3, + max_tokens: 150, + strategy: 'cheapest', // Summarization is a lightweight task — optimize for cost + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + console.error('[Astrai] API error:', response.status, errorText); + + if (response.status === 429) { + return new Response(JSON.stringify({ error: 'Rate limited', fallback: true }), { + status: 429, + headers: { 'Content-Type': 'application/json' }, + }); + } + + return new Response(JSON.stringify({ error: 'Astrai API error', fallback: true }), { + status: response.status, + headers: { 'Content-Type': 'application/json' }, + }); + } + + const data = await response.json(); + const summary = data.choices?.[0]?.message?.content?.trim(); + + if (!summary) { + return new Response(JSON.stringify({ error: 'Empty response', fallback: true }), { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }); + } + + const actualModel = data._astrai_meta?.model || data.model || MODEL; + + // Store in cache (shared with Groq/OpenRouter endpoints) + await setCachedJson(cacheKey, { + summary, + model: actualModel, + timestamp: Date.now(), + }, CACHE_TTL_SECONDS); + + return new Response(JSON.stringify({ + summary, + model: actualModel, + provider: 'astrai', + cached: false, + tokens: data.usage?.total_tokens || 0, + }), { + status: 200, + headers: { + ...corsHeaders, + 'Content-Type': 'application/json', + 'Cache-Control': 'public, max-age=1800, s-maxage=1800, stale-while-revalidate=300', + }, + }); + + } catch (error) { + console.error('[Astrai] Error:', error.name, error.message, error.stack?.split('\n')[1]); + return new Response(JSON.stringify({ + error: error.message, + errorType: error.name, + fallback: true + }), { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }); + } +} diff --git a/src/services/runtime-config.ts b/src/services/runtime-config.ts index 277483b9..7abe1ea9 100644 --- a/src/services/runtime-config.ts +++ b/src/services/runtime-config.ts @@ -3,6 +3,7 @@ import { invokeTauri } from './tauri-bridge'; export type RuntimeSecretKey = | 'GROQ_API_KEY' + | 'ASTRAI_API_KEY' | 'OPENROUTER_API_KEY' | 'FRED_API_KEY' | 'EIA_API_KEY' @@ -23,6 +24,7 @@ export type RuntimeSecretKey = export type RuntimeFeatureId = | 'aiGroq' + | 'aiAstrai' | 'aiOpenRouter' | 'economicFred' | 'energyEia' @@ -62,6 +64,7 @@ const SIDECAR_SECRET_VALIDATE_URL = 'http://127.0.0.1:46123/api/local-validate-s const defaultToggles: Record = { aiGroq: true, + aiAstrai: true, aiOpenRouter: true, economicFred: true, energyEia: true, @@ -83,6 +86,13 @@ export const RUNTIME_FEATURES: RuntimeFeatureDefinition[] = [ name: 'Groq summarization', description: 'Primary fast LLM provider used for AI summary generation.', requiredSecrets: ['GROQ_API_KEY'], + fallback: 'Falls back to Astrai, then OpenRouter, then local browser model.', + }, + { + id: 'aiAstrai', + name: 'Astrai intelligent router', + description: 'AI inference router that auto-selects the optimal model/provider by cost and latency.', + requiredSecrets: ['ASTRAI_API_KEY'], fallback: 'Falls back to OpenRouter, then local browser model.', }, { diff --git a/src/services/summarization.ts b/src/services/summarization.ts index be8c1c23..b45f4f43 100644 --- a/src/services/summarization.ts +++ b/src/services/summarization.ts @@ -9,7 +9,7 @@ import { SITE_VARIANT } from '@/config'; import { BETA_MODE } from '@/config/beta'; import { isFeatureAvailable } from './runtime-config'; -export type SummarizationProvider = 'groq' | 'openrouter' | 'browser' | 'cache'; +export type SummarizationProvider = 'groq' | 'astrai' | 'openrouter' | 'browser' | 'cache'; export interface SummarizationResult { summary: string; @@ -48,6 +48,35 @@ async function tryGroq(headlines: string[], geoContext?: string, lang?: string): } } +async function tryAstrai(headlines: string[], geoContext?: string, lang?: string): Promise { + if (!isFeatureAvailable('aiAstrai')) return null; + try { + const response = await fetch('/api/astrai-summarize', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ headlines, mode: 'brief', geoContext, variant: SITE_VARIANT, lang }), + }); + + if (!response.ok) { + const data = await response.json().catch(() => ({})); + if (data.fallback) return null; + throw new Error(`Astrai error: ${response.status}`); + } + + const data = await response.json(); + const provider = data.cached ? 'cache' : 'astrai'; + console.log(`[Summarization] ${provider === 'cache' ? 'Redis cache hit' : 'Astrai success'}:`, data.model); + return { + summary: data.summary, + provider: provider as SummarizationProvider, + cached: !!data.cached, + }; + } catch (error) { + console.warn('[Summarization] Astrai failed:', error); + return null; + } +} + async function tryOpenRouter(headlines: string[], geoContext?: string, lang?: string): Promise { if (!isFeatureAvailable('aiOpenRouter')) return null; try { @@ -141,11 +170,15 @@ export async function generateSummary( const groqResult = await tryGroq(headlines, geoContext); if (groqResult) return groqResult; + onProgress?.(3, totalSteps, 'Trying Astrai router...'); + const astraiResult = await tryAstrai(headlines, geoContext); + if (astraiResult) return astraiResult; + onProgress?.(3, totalSteps, 'Trying OpenRouter...'); const openRouterResult = await tryOpenRouter(headlines, geoContext); if (openRouterResult) return openRouterResult; } else { - const totalSteps = 4; + const totalSteps = 5; console.log('[BETA] T5-small not loaded yet, using cloud providers first'); // Kick off model load in background for next time if (mlWorker.isAvailable) { @@ -160,25 +193,29 @@ export async function generateSummary( return groqResult; } - onProgress?.(2, totalSteps, 'Trying OpenRouter...'); + onProgress?.(2, totalSteps, 'Trying Astrai router...'); + const astraiResult = await tryAstrai(headlines, geoContext); + if (astraiResult) return astraiResult; + + onProgress?.(3, totalSteps, 'Trying OpenRouter...'); const openRouterResult = await tryOpenRouter(headlines, geoContext); if (openRouterResult) return openRouterResult; // Last resort: try browser T5 (may have finished loading by now) if (mlWorker.isAvailable) { - onProgress?.(3, totalSteps, 'Waiting for local AI model...'); + onProgress?.(4, totalSteps, 'Waiting for local AI model...'); const browserResult = await tryBrowserT5(headlines, 'summarization-beta'); if (browserResult) return browserResult; } - onProgress?.(4, totalSteps, 'No providers available'); + onProgress?.(5, totalSteps, 'No providers available'); } console.warn('[BETA] All providers failed'); return null; } - const totalSteps = 3; + const totalSteps = 4; // Step 1: Try Groq (fast, 14.4K/day with 8b-instant + Redis cache) onProgress?.(1, totalSteps, 'Connecting to Groq AI...'); @@ -187,15 +224,22 @@ export async function generateSummary( return groqResult; } - // Step 2: Try OpenRouter (fallback, 50/day + Redis cache) - onProgress?.(2, totalSteps, 'Trying OpenRouter...'); + // Step 2: Try Astrai (intelligent router — auto-selects cheapest model) + onProgress?.(2, totalSteps, 'Trying Astrai router...'); + const astraiResult = await tryAstrai(headlines, geoContext, lang); + if (astraiResult) { + return astraiResult; + } + + // Step 3: Try OpenRouter (fallback, 50/day + Redis cache) + onProgress?.(3, totalSteps, 'Trying OpenRouter...'); const openRouterResult = await tryOpenRouter(headlines, geoContext, lang); if (openRouterResult) { return openRouterResult; } - // Step 3: Try Browser T5 (local, unlimited but slower) - onProgress?.(3, totalSteps, 'Loading local AI model...'); + // Step 4: Try Browser T5 (local, unlimited but slower) + onProgress?.(4, totalSteps, 'Loading local AI model...'); const browserResult = await tryBrowserT5(headlines); if (browserResult) { return browserResult; @@ -220,7 +264,7 @@ export async function translateText( // Step 1: Try Groq if (isFeatureAvailable('aiGroq')) { - onProgress?.(1, 2, 'Translating with Groq...'); + onProgress?.(1, 3, 'Translating with Groq...'); try { const response = await fetch('/api/groq-summarize', { method: 'POST', @@ -241,9 +285,32 @@ export async function translateText( } } - // Step 2: Try OpenRouter + // Step 2: Try Astrai + if (isFeatureAvailable('aiAstrai')) { + onProgress?.(2, 3, 'Translating with Astrai...'); + try { + const response = await fetch('/api/astrai-summarize', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + headlines: [text], + mode: 'translate', + variant: targetLang + }), + }); + + if (response.ok) { + const data = await response.json(); + return data.summary; + } + } catch (e) { + console.warn('Astrai translation failed', e); + } + } + + // Step 3: Try OpenRouter if (isFeatureAvailable('aiOpenRouter')) { - onProgress?.(2, 2, 'Translating with OpenRouter...'); + onProgress?.(3, 3, 'Translating with OpenRouter...'); try { const response = await fetch('/api/openrouter-summarize', { method: 'POST',