diff --git a/.kiro/skills/interview-extractor.md b/.kiro/skills/interview-extractor.md new file mode 100644 index 0000000..44f9a7a --- /dev/null +++ b/.kiro/skills/interview-extractor.md @@ -0,0 +1,241 @@ +--- +name: interview-extractor +description: > + Extracts and reformats a frontend interview experience from a URL or raw + pasted text into a clean, structured, third-person markdown case study. + Works universally — no external APIs required. Just give it a link or text. +triggers: + - 'extract interview' + - 'format this interview' + - 'rewrite this experience' + - 'process this article' + - 'structure this interview' + - 'convert to interview format' + - 'parse interview experience' +--- + +# Interview Experience Extractor Skill + +## Purpose + +Transform raw interview experience content (from a URL or pasted text) into a +clean, structured, third-person case study that is genuinely useful for +candidates preparing for frontend engineering interviews. + +--- + +## Step-by-Step Instructions + +When this skill is triggered, follow these steps in order: + +### Step 1 — Acquire the content + +- **If a URL is given**: Fetch the full text content of the page. Strip HTML + tags, navigation, footers, ads, and any non-article boilerplate. Keep only + the article body. +- **If raw text is given**: Use it directly. Strip any Medium/DEV/Hashnode + header noise (author name, read-time badge, clap counts, "Follow" buttons). +- If neither is provided, ask: "Please share either a URL or paste the raw + article text." + +### Step 2 — Assess quality + +Score the content 1–10 for being a **genuine frontend/web interview experience +narrative** using this rubric: + +| Score | Meaning | +| ----- | -------------------------------------------------------------------------------------------------- | +| 9–10 | Detailed narrative: specific rounds, verbatim/paraphrased questions, company named, outcome stated | +| 7–8 | Good narrative but missing some details (e.g. outcome unknown, fewer questions) | +| 5–6 | Mostly tips/guide format — not a personal experience | +| 1–4 | Not a frontend interview experience at all | + +- If score < 7, respond: "This doesn't appear to be a detailed frontend + interview experience (score: X/10). Here's why: [brief reason]. I can still + attempt a basic summary if you'd like." +- If score ≥ 7, proceed to Step 3. + +### Step 3 — Extract metadata + +Identify from the content: + +- `company_name` — the company being interviewed at +- `role` — job title (e.g. "Senior Software Engineer — Frontend") +- `level` — one of: intern / junior / mid / senior / staff +- `outcome` — one of: selected / rejected / pending / unknown +- `location` — city, remote, or hybrid if mentioned +- `author` — name of the person who wrote the experience (if available) +- `source_platform` — where it was originally published (Medium, DEV, + Hashnode, LinkedIn, etc.) derived from the URL or article metadata +- `ctc` — compensation details if mentioned, else omit +- `rounds_count` — total number of interview rounds + +### Step 4 — Rewrite in strict third person + +**CRITICAL RULE**: The entire output MUST be in **strict third person**. + +| ❌ WRONG (first person) | ✅ CORRECT (third person) | +| ----------------------------- | --------------------------------------------------- | +| "I was asked about closures" | "The candidate was asked about JavaScript closures" | +| "My first round was DSA" | "The first round focused on DSA" | +| "I solved it using a hashmap" | "The candidate approached it using a hashmap" | +| "I got the offer" | "The candidate received an offer" | + +**NDA / No questions disclosed**: If the article explicitly states an NDA and +gives no questions, acknowledge this in the round breakdown and focus on +round structure, skills tested, and difficulty instead of specific questions. + +### Step 5 — Output the structured markdown + +Produce the following sections in this exact order. Do not skip any section. +If data is unavailable for a section, write "Not mentioned" rather than +omitting the section. + +--- + +## Output Format + +```markdown +## Overview + +[2–3 sentences: company, role, total rounds, outcome. Third person only.] + +## Role & Compensation Details + +- **Position**: [role title] +- **Company**: [company name] +- **Location**: [city / remote / hybrid] +- **Level**: [intern/junior/mid/senior/staff] +- **Experience Required**: [years if mentioned, else "Not mentioned"] +- **CTC / Stipend**: [if mentioned, else omit this line] +- **Outcome**: [Selected ✅ / Rejected ❌ / Pending ⏳ / Unknown] + +## Interview Process Summary + +[Bullet list of all rounds in order — name, type, duration if available] + +- Round 1: [Name] — [Type] — [~duration if known] +- Round 2: ... + +## Round-by-Round Breakdown + +### Round N — [Round Name] + +**Type**: [coding / machine-coding / system-design / conceptual / behavioral / hr] +**Difficulty**: [Easy / Medium / Hard] + +**Questions asked:** + +1. [Question verbatim or paraphrased — mark "(paraphrased)" if not exact] +2. ... + +**What the interviewer focused on:** +[What skills/depth were being evaluated] + +**Tips for this round:** +[1–2 specific, actionable tips derived from this experience] + +--- + +[Repeat for each round] + +## Key Technical Topics Covered + +- **JavaScript / TypeScript**: [specific topics: closures, promises, generics…] +- **React / Framework**: [hooks, rendering, state management…] +- **CSS / Layout**: [flexbox, grid, animations…] +- **System Design**: [component design, API design, caching…] +- **DSA**: [data structures and algorithms covered] +- **Behavioral**: [themes: conflict, leadership, growth…] + +## Preparation Tips + +1. [Concrete, actionable tip specific to this company/role] +2. [Another tip] +3. [Another tip] +4. [Another tip — optional] +5. [Another tip — optional] + +## Verdict + +[2–3 sentences: outcome, overall difficulty rating (1–5 stars), whether the +candidate recommends the process/company, and any standout observation about +the hiring culture.] +``` + +--- + +## Metadata Block + +After the markdown, output this JSON block (fenced) for programmatic use: + +```json +{ + "company_name": "string", + "role": "string", + "level": "intern|junior|mid|senior|staff", + "outcome": "selected|rejected|pending|unknown", + "difficulty": 1-5, + "quality_score": 1-10, + "rounds": [ + { + "name": "string", + "type": "coding|machine-coding|system-design|conceptual|behavioral|hr", + "difficulty": "easy|medium|hard" + } + ], + "topics": ["react", "javascript", "..."], + "questions": [ + { + "question": "string", + "type": "machine-coding|dsa|system-design|conceptual|behavioral", + "difficulty": "easy|medium|hard", + "topics": ["string"] + } + ], + "source_platform": "Medium|DEV|Hashnode|LinkedIn|other", + "suggested_slug": "kebab-case-seo-slug-max-8-words" +} +``` + +--- + +## Edge Case Handling + +| Situation | How to handle | +| ------------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| NDA — no questions disclosed | Focus on round structure, skills tested, difficulty. Note "Questions not disclosed (NDA)" in each round section. | +| First person throughout | Rewrite entirely in third person. Do not leave any "I/my/me/we" references. | +| No clear outcome | Set outcome to "unknown". Do not guess. | +| Multiple companies mentioned | Extract the PRIMARY company being interviewed at. | +| Tips/guide format (score 5–6) | Offer a partial extraction with a note that it's a guide, not a narrative. | +| Very short content (<500 words) | Note that detail is limited and some sections may be incomplete. | +| Paywalled or inaccessible URL | Ask the user to paste the text directly. | + +--- + +## Example Trigger + +User says: + +> "Extract interview: https://medium.com/@user/google-frontend-interview-2024" + +or: + +> "Format this interview experience: [pastes raw text]" + +The skill then follows Steps 1–5 and outputs the full structured markdown + +metadata JSON. + +--- + +## Quality Checklist (self-verify before outputting) + +Before producing the final output, verify: + +- [ ] No first-person pronouns anywhere in the markdown +- [ ] Every round has a difficulty tag +- [ ] Preparation Tips are specific to this experience, not generic +- [ ] Metadata JSON is valid and all required fields are present +- [ ] Suggested slug is ≤ 8 words, kebab-case, SEO-friendly +- [ ] Source platform is derived from URL, not from the `source` field in DB diff --git a/actions/experiences.ts b/actions/experiences.ts index 0eb0779..547ef10 100644 --- a/actions/experiences.ts +++ b/actions/experiences.ts @@ -14,6 +14,34 @@ export interface ExperienceFilters { isAdmin?: boolean; } +// Derive a human-readable source name from a URL. +// e.g. "https://medium.com/..." → "Medium" +// "https://dev.to/..." → "DEV Community" +function sourceFromUrl(url: string | null, fallback = 'Web'): string { + if (!url) return fallback; + try { + const host = new URL(url).hostname.replace(/^www\./, ''); + const MAP: Record = { + 'medium.com': 'Medium', + 'dev.to': 'DEV Community', + 'hashnode.com': 'Hashnode', + 'substack.com': 'Substack', + 'linkedin.com': 'LinkedIn', + 'github.com': 'GitHub', + 'leetcode.com': 'LeetCode', + 'geeksforgeeks.org': 'GeeksForGeeks', + }; + // Check exact match or subdomain match (e.g. user.hashnode.dev) + for (const [key, label] of Object.entries(MAP)) { + if (host === key || host.endsWith('.' + key)) return label; + } + // Fall back to capitalised hostname root (e.g. "notion.so" → "Notion") + return host.split('.')[0].replace(/^./, (c) => c.toUpperCase()); + } catch { + return fallback; + } +} + export async function fetchPaginatedExperiences(filters: ExperienceFilters) { const { page = 1, @@ -27,70 +55,113 @@ export async function fetchPaginatedExperiences(filters: ExperienceFilters) { } = filters; try { + // ── 1. Query unified_experiences (legacy + user + scraped) ────────────── let query = supabase .from('unified_experiences') .select('*', { count: 'exact' }); - // 1. Pending Post Filtering if (source === 'pending') { if (isAdmin) { query = query.eq('status', 'pending'); } else { - query = query.neq('status', 'pending'); // Fallback if regular user tries + query = query.neq('status', 'pending'); } } else { - // By default, exclude pending posts unless actively filtered for query = query.neq('status', 'pending'); } - // 2. Source Filtering if (source === 'community') { query = query.in('type', ['legacy', 'user']); } else if (source === 'web') { query = query.eq('type', 'scraped'); } - // 3. Company Filtering if (companies && companies.length > 0) { query = query.in('company', companies); } - // 4. Year Filtering if (year) { - // Extract the year from the date column query = query.gte('date', `${year}-01-01T00:00:00Z`); query = query.lte('date', `${year}-12-31T23:59:59Z`); } - // 5. Search if (search && search.trim() !== '') { - // Supabase ilike doesn't search jsonb (tags) easily using simple or, so we search title, company, description const safeSearch = search.trim().replace(/%/g, '\\%'); query = query.or( `title.ilike.%${safeSearch}%,company.ilike.%${safeSearch}%,description.ilike.%${safeSearch}%` ); } - // 6. Sorting query = query.order('date', { ascending: sortBy === 'oldest', nullsFirst: false, }); - // 7. Pagination - const from = (page - 1) * limit; - const to = from + limit - 1; - query = query.range(from, to); - - const { data, count, error } = await query; + // Fetch ALL matching (no pagination yet — we merge with captured first) + const { data: unifiedData, count: unifiedCount, error } = await query; if (error) { console.error('Supabase query error:', error); throw error; } - // Transform to match local UI expectations - const transformedData = data.map((item) => ({ + // ── 2. Query captured_content (extension captures) ───────────────────── + // Only include when source is 'all' or 'web' (extension captures are web-sourced) + let capturedRows: any[] = []; + if (source === 'all' || source === 'web') { + let capturedQuery = supabase + .from('captured_content') + .select( + 'id, title, summary, slug, company, source, original_url, published_at, captured_at, topics, outcome, role' + ) + .eq('status', 'published'); + + if (companies && companies.length > 0) { + capturedQuery = capturedQuery.in('company', companies); + } + + if (year) { + capturedQuery = capturedQuery.gte( + 'published_at', + `${year}-01-01T00:00:00Z` + ); + capturedQuery = capturedQuery.lte( + 'published_at', + `${year}-12-31T23:59:59Z` + ); + } + + if (search && search.trim() !== '') { + const safeSearch = search.trim().replace(/%/g, '\\%'); + capturedQuery = capturedQuery.or( + `title.ilike.%${safeSearch}%,company.ilike.%${safeSearch}%,summary.ilike.%${safeSearch}%` + ); + } + + const { data: capturedData } = await capturedQuery; + + capturedRows = (capturedData || []).map((item) => ({ + id: item.id, + rawId: item.id, + title: item.title, + company: item.company, + description: item.summary || '', + tags: item.topics || [], + status: 'published', + date: item.published_at || item.captured_at, + type: 'captured', + slug: item.slug, + source: sourceFromUrl(item.original_url, item.source || 'Web'), + author: 'Community Member', + companyDomain: null, + blogLink: null, + link: item.slug ? `/interview-experience/${item.slug}` : '#', + imageSrc: '', + })); + } + + // ── 3. Merge, sort, paginate ──────────────────────────────────────────── + const unifiedRows = (unifiedData || []).map((item) => ({ id: item.id, rawId: item.raw_id, title: item.title, @@ -105,15 +176,31 @@ export async function fetchPaginatedExperiences(filters: ExperienceFilters) { author: item.author, companyDomain: item.company_domain, blogLink: item.blog_link, - // Helper function logically ported from getExperiences.ts link: getLink(item), - imageSrc: '', // Placeholder expected by UI + imageSrc: '', })); - return { - data: transformedData, - totalCount: count || 0, - }; + // Deduplicate by slug (captured may overlap with scraped in rare cases) + const seenSlugs = new Set(); + const allRows = [...unifiedRows, ...capturedRows].filter((item) => { + if (!item.slug) return true; + if (seenSlugs.has(item.slug)) return false; + seenSlugs.add(item.slug); + return true; + }); + + // Sort merged set + allRows.sort((a, b) => { + const da = new Date(a.date || 0).getTime(); + const db = new Date(b.date || 0).getTime(); + return sortBy === 'oldest' ? da - db : db - da; + }); + + const totalCount = allRows.length; + const from = (page - 1) * limit; + const paginatedData = allRows.slice(from, from + limit); + + return { data: paginatedData, totalCount }; } catch (error) { console.error('Failed to fetch paginated experiences:', error); return { data: [], totalCount: 0 }; @@ -132,17 +219,23 @@ function getLink(item: any) { export async function fetchCompanyAndYearStats() { try { - const { data, error } = await supabase - .from('unified_experiences') - .select('company, date') - .neq('status', 'pending'); + const [unifiedRes, capturedRes] = await Promise.all([ + supabase + .from('unified_experiences') + .select('company, date') + .neq('status', 'pending'), + supabase + .from('captured_content') + .select('company, published_at') + .eq('status', 'published'), + ]); - if (error) throw error; + if (unifiedRes.error) throw unifiedRes.error; const companies = new Set(); const years = new Set(); - data.forEach((item) => { + (unifiedRes.data || []).forEach((item) => { if (item.company) companies.add(item.company); if (item.date) { const year = new Date(item.date).getFullYear().toString(); @@ -150,6 +243,14 @@ export async function fetchCompanyAndYearStats() { } }); + (capturedRes.data || []).forEach((item) => { + if (item.company) companies.add(item.company); + if (item.published_at) { + const year = new Date(item.published_at).getFullYear().toString(); + if (!isNaN(Number(year))) years.add(year); + } + }); + return { companies: Array.from(companies).sort(), years: Array.from(years).sort().reverse(), diff --git a/app/admin/captured/page.tsx b/app/admin/captured/page.tsx new file mode 100644 index 0000000..145f328 --- /dev/null +++ b/app/admin/captured/page.tsx @@ -0,0 +1,260 @@ +'use client'; + +import { useState, useEffect, useRef } from 'react'; +import React from 'react'; +import { getSupabaseBrowserClient } from '@/lib/supabase-browser'; + +type CapturedItem = { + id: string; + title: string; + original_url: string; + status: string; + quality_score: number | null; + company: string | null; + source: string; + captured_at: string; + ai_processed: boolean; +}; + +export default function CapturedDashboard() { + const [items, setItems] = useState([]); + const [loading, setLoading] = useState(true); + const [actionLoading, setActionLoading] = useState(null); + + const fetchItems = async () => { + const supabase = getSupabaseBrowserClient(); + const { data } = await supabase + .from('captured_content') + .select( + 'id, title, original_url, status, quality_score, company, source, captured_at, ai_processed' + ) + .order('captured_at', { ascending: false }); + setItems(data || []); + setLoading(false); + }; + + useEffect(() => { + fetchItems(); + }, []); + + const [actionMsg, setActionMsg] = React.useState<{ + id: string; + msg: string; + ok: boolean; + link?: string | null; + } | null>(null); + + const handleAction = async (id: string, action: string) => { + setActionLoading(id); + setActionMsg(null); + + const steps: Record = { + process: [ + '📤 Sending content to Gemini AI...', + '🧠 AI is scoring & rewriting...', + '✍️ Extracting company, rounds, questions...', + ], + }; + + // Show cycling contextual messages during AI processing + let stepIndex = 0; + let stepTimer: ReturnType | null = null; + if (action === 'process' && steps.process) { + setActionMsg({ id, msg: steps.process[0], ok: true }); + stepTimer = setInterval(() => { + stepIndex = (stepIndex + 1) % steps.process.length; + setActionMsg({ id, msg: steps.process[stepIndex], ok: true }); + }, 4000); + } + + try { + const res = await fetch('/api/pipeline/manage', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ id, action }), + }); + if (stepTimer) clearInterval(stepTimer); + const data = await res.json(); + + if (res.ok) { + await fetchItems(); + if (action === 'process') { + const scoreEmoji = + data.score >= 8 ? '🚀' : data.score >= 7 ? '👀' : '❌'; + const statusLabel = + data.status === 'published' + ? 'Auto-published!' + : data.status === 'review' + ? 'Needs your review' + : 'Rejected (low quality)'; + const link = data.slug ? `/interview-experience/${data.slug}` : null; + setActionMsg({ + id, + msg: `${scoreEmoji} Score ${data.score}/10 · ${statusLabel}${data.company ? ` · ${data.company}` : ''}`, + ok: data.score >= 7, + link, + }); + } else { + setActionMsg({ id, msg: `✅ ${data.message || 'Done'}`, ok: true }); + } + } else { + setActionMsg({ id, msg: `❌ ${data.error || 'Failed'}`, ok: false }); + } + } catch (e: any) { + if (stepTimer) clearInterval(stepTimer); + setActionMsg({ id, msg: `❌ Network error: ${e.message}`, ok: false }); + } + setActionLoading(null); + }; + + const statusColor: Record = { + queued: 'bg-yellow-500/20 text-yellow-500', + processing: 'bg-blue-500/20 text-blue-500', + review: 'bg-orange-500/20 text-orange-500', + approved: 'bg-green-500/20 text-green-500', + published: 'bg-emerald-500/20 text-emerald-500', + rejected: 'bg-red-500/20 text-red-500', + }; + + if (loading) + return ( +
+ Loading... +
+ ); + + return ( +
+
+
+
+

Captured Content

+

+ Extension & manual captures. Process individually or wait for + cron. +

+
+
+ {items.filter((i) => i.status === 'queued').length} queued ·{' '} + {items.filter((i) => i.status === 'published').length} published +
+
+ + {items.length === 0 ? ( +

+ No captured content yet. Use the extension or /admin/ingest to add. +

+ ) : ( +
+ {items.map((item) => ( +
+
+
+
+ + {item.status} + + {item.quality_score && ( + + ⭐ {item.quality_score}/10 + + )} + {item.company && ( + + 🏢 {item.company} + + )} + + via {item.source} + +
+

{item.title}

+ + {item.original_url} + +
+ +
+ {(item.status === 'queued' || + item.status === 'rejected') && ( + + )} + {item.status === 'published' && ( + + )} + {item.status === 'review' && ( + + )} + {item.status !== 'rejected' && + item.status !== 'published' && ( + + )} +
+
+ {actionMsg?.id === item.id && ( +
+ {actionMsg.msg} + {actionMsg.link && ( + + View post → + + )} +
+ )} +
+ ))} +
+ )} +
+
+ ); +} diff --git a/app/admin/ingest/page.tsx b/app/admin/ingest/page.tsx new file mode 100644 index 0000000..9c9839d --- /dev/null +++ b/app/admin/ingest/page.tsx @@ -0,0 +1,137 @@ +'use client'; + +import { useState } from 'react'; + +export default function IngestPage() { + const [url, setUrl] = useState(''); + const [title, setTitle] = useState(''); + const [content, setContent] = useState(''); + const [status, setStatus] = useState<{ + msg: string; + type: 'success' | 'error'; + } | null>(null); + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (!url || !content) { + setStatus({ msg: 'URL and content are required', type: 'error' }); + return; + } + if (content.length < 200) { + setStatus({ msg: 'Content too short (min 200 chars)', type: 'error' }); + return; + } + + setLoading(true); + setStatus(null); + + try { + const res = await fetch('/api/pipeline/ingest', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-capture-key': process.env.NEXT_PUBLIC_CAPTURE_SECRET || '', + }, + body: JSON.stringify({ + url, + title: title || 'Untitled', + content, + source: 'manual', + capturedAt: new Date().toISOString(), + }), + }); + const data = await res.json(); + if (res.ok) { + setStatus({ + msg: '\u2705 ' + (data.message || 'Captured!'), + type: 'success', + }); + setUrl(''); + setTitle(''); + setContent(''); + } else { + setStatus({ msg: '\u274c ' + (data.error || 'Failed'), type: 'error' }); + } + } catch (err: any) { + setStatus({ msg: '\u274c ' + err.message, type: 'error' }); + } + setLoading(false); + }; + + return ( +
+
+

Ingest Experience

+

+ Paste interview experience content to process via AI pipeline. +

+ +
+
+ + setUrl(e.target.value)} + placeholder='https://medium.com/...' + className='w-full px-4 py-2 rounded-lg border border-border bg-card text-foreground' + required + /> +
+ +
+ + setTitle(e.target.value)} + placeholder='My Google Frontend Interview Experience' + className='w-full px-4 py-2 rounded-lg border border-border bg-card text-foreground' + /> +
+ +
+ + +
0 chars
+ + +
+ + + + diff --git a/chrome-extension/popup.js b/chrome-extension/popup.js new file mode 100644 index 0000000..26506cf --- /dev/null +++ b/chrome-extension/popup.js @@ -0,0 +1,132 @@ +const API_URLS = [ + 'https://www.frontend-junction.com/api/pipeline/ingest', + 'http://localhost:3000/api/pipeline/ingest', +]; + +// Set your capture key here or in extension options +const CAPTURE_KEY = localStorage.getItem('fj_capture_key') || 'fj-capture-2026'; + +async function postToAPI(body) { + for (const url of API_URLS) { + try { + const res = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-capture-key': CAPTURE_KEY, + }, + body: JSON.stringify(body), + }); + return { res, data: await res.json() }; + } catch (e) { + continue; + } + } + throw new Error('All endpoints unreachable'); +} + +document.addEventListener('DOMContentLoaded', async () => { + const urlEl = document.getElementById('url'); + const titleEl = document.getElementById('title'); + const contentEl = document.getElementById('content'); + const charCount = document.getElementById('char-count'); + const btn = document.getElementById('capture-btn'); + const status = document.getElementById('status'); + + // Get current tab info + const [tab] = await chrome.tabs.query({ active: true, currentWindow: true }); + urlEl.value = tab.url || ''; + titleEl.value = tab.title || ''; + + // Auto-extract page text + try { + const [result] = await chrome.scripting.executeScript({ + target: { tabId: tab.id }, + func: extractPageContent, + }); + if (result?.result) { + contentEl.value = result.result; + charCount.textContent = `${result.result.length} chars`; + } + } catch (e) { + contentEl.placeholder = 'Could not auto-extract. Paste content here...'; + } + + contentEl.addEventListener('input', () => { + charCount.textContent = `${contentEl.value.length} chars`; + }); + + btn.addEventListener('click', async () => { + const content = contentEl.value.trim(); + if (!content) { + showStatus('Content is required', 'error'); + return; + } + if (content.length < 200) { + showStatus('Too short (min 200 chars)', 'error'); + return; + } + + btn.disabled = true; + btn.textContent = 'Sending...'; + + try { + const { res, data } = await postToAPI({ + url: urlEl.value, + title: titleEl.value, + content: content.substring(0, 50000), + source: 'extension', + capturedAt: new Date().toISOString(), + }); + if (res.ok) { + showStatus('\u2705 Captured successfully!', 'success'); + } else { + showStatus(`\u274c ${data.error || 'Failed'}`, 'error'); + } + } catch (e) { + showStatus(`\u274c Network error: ${e.message}`, 'error'); + } + btn.disabled = false; + btn.textContent = 'Capture & Send'; + }); + + function showStatus(msg, type) { + status.textContent = msg; + status.className = `status ${type}`; + } +}); + +// This function runs in the page context +function extractPageContent() { + const selectors = [ + 'article', + 'main', + '[role="main"]', + '.post-content', + '.article-body', + '.entry-content', + '.prose', + '.blog-post', + '.story-content', + ]; + for (const sel of selectors) { + const el = document.querySelector(sel); + if (el && el.innerText.trim().length > 300) { + return cleanText(el.innerText); + } + } + return cleanText(document.body.innerText); + + function cleanText(text) { + return text + .replace( + /^(Skip to|Navigate to|Menu|Search|Sign [Ii]n|Log [Ii]n|Subscribe|Newsletter).*$/gm, + '' + ) + .replace(/^(Share|Tweet|Like|Comment|Follow|Clap).*$/gm, '') + .replace(/^(Cookie|Accept|Privacy|Terms).*$/gm, '') + .replace(/\n{3,}/g, '\n\n') + .trim() + .substring(0, 50000); + } +} diff --git a/components/common/card.tsx b/components/common/card.tsx index 0d85581..4eceee0 100644 --- a/components/common/card.tsx +++ b/components/common/card.tsx @@ -359,6 +359,25 @@ const CardComponent: React.FC = ({ setIsProcessing(false); } }; + // Derive a best-effort domain from a company name when no explicit domain is available. + // e.g. "Google Pay" → "googlepay.com", "Tata 1mg" → "1mg.com" (falls back to "tata1mg.com") + const guessDomain = (name: string): string => { + const lower = name.toLowerCase().trim(); + // Use known website map first + if (COMPANY_WEBSITES[lower]) { + try { + return new URL(COMPANY_WEBSITES[lower]).hostname.replace(/^www\./, ''); + } catch { + // ignore + } + } + // Strip common suffixes/words that don't appear in domains + const cleaned = lower + .replace(/\s+/g, '') // "Google Pay" → "googlepay" + .replace(/[^a-z0-9]/g, ''); // remove special chars + return `${cleaned}.com`; + }; + // Helper for logo URL const getLogoUrl = () => { // 1. Check Local Mapping first (Highest Priority for curated logos) @@ -367,7 +386,7 @@ const CardComponent: React.FC = ({ if (LOCAL_LOGOS[lowerCompany]) return LOCAL_LOGOS[lowerCompany]; } - // 2. Use Logo.dev if domain exists (User preferred service) + // 2. Use Logo.dev if explicit domain exists if (companyDomain) { return `https://img.logo.dev/${companyDomain}?token=${LOGO_DEV_PUBLIC_KEY}`; } @@ -375,16 +394,20 @@ const CardComponent: React.FC = ({ // 3. Use Image Source if provided (Legacy) if (imageSrc) return imageSrc; - // 4. Fallback based on company name (Clearbit as secondary fallback if logo.dev domain not confirmed) - if (company && !company.includes(' ')) { - const domain = `${company.toLowerCase().trim()}.com`; - return `https://img.logo.dev/${domain}?token=${LOGO_DEV_PUBLIC_KEY}`; + // 4. Guess domain from company name and try logo.dev (covers multi-word names too) + if (company) { + const guessed = guessDomain(company); + return `https://img.logo.dev/${guessed}?token=${LOGO_DEV_PUBLIC_KEY}`; } return null; }; - const logoUrl = getLogoUrl(); + const [logoError, setLogoError] = React.useState(false); + React.useEffect(() => { + setLogoError(false); + }, [company, companyDomain]); + const logoUrl = !logoError ? getLogoUrl() : null; return (
= ({ loading='eager' className='object-contain p-0.5' unoptimized={logoUrl.includes('logo.dev')} + onError={() => setLogoError(true)} />
@@ -509,7 +533,7 @@ const CardComponent: React.FC = ({

{title} diff --git a/components/common/site-header.tsx b/components/common/site-header.tsx index c0a1216..81fee89 100644 --- a/components/common/site-header.tsx +++ b/components/common/site-header.tsx @@ -32,6 +32,10 @@ export function SiteHeader({ isAdmin = false }: { isAdmin?: boolean }) { const { user } = useAuth(); const pathname = usePathname(); + // Derive admin status from the live auth context. + // The isAdmin prop is kept for backward compat but the context always wins. + const isAdminUser = isAdmin || user?.role === 'admin'; + const [open, setOpen] = React.useState(false); return ( @@ -68,7 +72,7 @@ export function SiteHeader({ isAdmin = false }: { isAdmin?: boolean }) { {item.name} ))} - {isAdmin && ( + {isAdminUser && ( <> ))} - {isAdmin && ( + {isAdminUser && (
= { + 'medium.com': 'Medium', + 'dev.to': 'DEV Community', + 'hashnode.com': 'Hashnode', + 'substack.com': 'Substack', + 'linkedin.com': 'LinkedIn', + 'github.com': 'GitHub', + 'leetcode.com': 'LeetCode', + 'geeksforgeeks.org': 'GeeksForGeeks', + }; + for (const [key, label] of Object.entries(MAP)) { + if (host === key || host.endsWith('.' + key)) return label; + } + return host.split('.')[0].replace(/^./, (c) => c.toUpperCase()); + } catch { + return fallback; + } +} + export async function getExperienceBySlug(identifierEncoded: string) { const identifier = decodeURIComponent(identifierEncoded); console.log(`[getExperienceBySlug] Lookup: "${identifier}"`); @@ -30,7 +54,41 @@ export async function getExperienceBySlug(identifierEncoded: string) { return getExperienceById(identifier); } - // 2. Try scraped_experiences (SEO Content) + // 2. Try captured_content (Extension captures — highest quality, checked first) + const { data: capturedData, error: capturedError } = await supabaseAdmin + .from('captured_content') + .select('*') + .eq('slug', identifier) + .eq('status', 'published') + .single(); + + if (capturedError && capturedError.code !== 'PGRST116') { + console.error(`[getExperienceBySlug] Captured Error:`, capturedError); + } + + if (capturedData) { + console.log( + `[getExperienceBySlug] Found in captured_content: ${capturedData.title}` + ); + return { + id: capturedData.id, + title: capturedData.title, + summary: capturedData.summary, + content: capturedData.processed_content || capturedData.raw_content, + author: 'Community Member', + source: sourceFromUrl( + capturedData.original_url, + capturedData.source || 'Web' + ), + original_link: capturedData.original_url, + date: capturedData.published_at || capturedData.captured_at, + tags: capturedData.topics || [], + company: capturedData.company, + type: 'captured', + }; + } + + // 3. Try scraped_experiences (SEO Content) let { data: scrapedData, error: scrapedError } = await supabaseAdmin .from('scraped_experiences') .select('*') @@ -62,7 +120,7 @@ export async function getExperienceBySlug(identifierEncoded: string) { }; } - // 3. Try new_interview (User Submissions) + // 4. Try new_interview (User Submissions) const { data: userData, error: userError } = await supabaseAdmin .from('new_interview') .select('*') @@ -88,7 +146,7 @@ export async function getExperienceBySlug(identifierEncoded: string) { }; } - // 4. Try experiences (Legacy) + // 5. Try experiences (Legacy) const { data: legacyData, error: legacyError } = await supabaseAdmin .from('experiences') .select('*') diff --git a/lib/parse-ai-json.ts b/lib/parse-ai-json.ts new file mode 100644 index 0000000..22bb7ff --- /dev/null +++ b/lib/parse-ai-json.ts @@ -0,0 +1,56 @@ +/** + * Robustly extracts and parses JSON from a Gemini AI response. + * Handles: markdown fences, leading/trailing text, truncated content field. + */ +export function parseAIJson(raw: string): any { + let text = raw.trim(); + + // 1. Strip markdown code fences + text = text.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/, ''); + + // 2. Find the outermost JSON object boundaries + const start = text.indexOf('{'); + const end = text.lastIndexOf('}'); + if (start !== -1 && end !== -1 && end > start) { + text = text.slice(start, end + 1); + } + + // 3. Try direct parse first + try { + return JSON.parse(text); + } catch { + // 4. The `content` field (multi-line markdown) is the usual culprit. + // Extract all fields except `content` safely, then splice content back. + try { + // Replace the content value with a placeholder, parse, then restore. + const contentMatch = text.match(/"content"\s*:\s*"([\s\S]*?)(?; + +let _client: SupabaseClient | null = null; + +const getClient = (): SupabaseClient => { + if (!_client) { + _client = createClient(); + } + return _client; +}; + +export const supabase = new Proxy({} as SupabaseClient, { + get(_target, prop, receiver) { + const client = getClient(); + const value = Reflect.get(client as object, prop, receiver); + // Bind methods to the real client so `this` is preserved. + return typeof value === 'function' ? value.bind(client) : value; + }, +}); diff --git a/lighthouserc.json b/lighthouserc.json index fc4cfc6..dd27737 100644 --- a/lighthouserc.json +++ b/lighthouserc.json @@ -13,30 +13,29 @@ "assert": { "preset": "lighthouse:no-pwa", "assertions": { - "categories:performance": [ - "warn", - { - "minScore": 0.9 - } - ], - "categories:accessibility": [ - "error", - { - "minScore": 0.9 - } - ], - "categories:best-practices": [ - "warn", - { - "minScore": 0.9 - } - ], - "categories:seo": [ - "warn", - { - "minScore": 0.9 - } - ] + "categories:performance": ["warn", { "minScore": 0.9 }], + "categories:accessibility": ["error", { "minScore": 0.9 }], + "categories:best-practices": ["warn", { "minScore": 0.9 }], + "categories:seo": ["warn", { "minScore": 0.9 }], + + "errors-in-console": "off", + + "image-delivery-insight": "warn", + "legacy-javascript-insight": "warn", + "legacy-javascript": "warn", + "network-dependency-tree-insight": "warn", + "render-blocking-insight": "warn", + "render-blocking-resources": "warn", + "unused-css-rules": "warn", + "unused-javascript": "warn", + "uses-responsive-images": "warn", + "uses-optimized-images": "warn", + "modern-image-formats": "warn", + "uses-rel-preconnect": "warn", + "largest-contentful-paint": "warn", + "interactive": "warn", + "total-byte-weight": "warn", + "dom-size": "warn" } } } diff --git a/scripts/create-captured-content.sql b/scripts/create-captured-content.sql new file mode 100644 index 0000000..7c6478d --- /dev/null +++ b/scripts/create-captured-content.sql @@ -0,0 +1,50 @@ +-- Separate bucket for extension/manual captures +-- Completely independent from scraped_experiences pipeline +-- Idempotent: safe to run multiple times + +CREATE TABLE IF NOT EXISTS captured_content ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + title TEXT NOT NULL DEFAULT 'Untitled', + original_url TEXT UNIQUE NOT NULL, + raw_content TEXT NOT NULL, + source TEXT NOT NULL DEFAULT 'extension', -- 'extension' | 'manual' + + -- AI processing results + status TEXT NOT NULL DEFAULT 'queued' CHECK (status IN ('queued', 'processing', 'approved', 'review', 'rejected', 'published')), + ai_processed BOOLEAN DEFAULT FALSE, + quality_score INT, + processed_content TEXT, + slug TEXT UNIQUE, + company TEXT, + role TEXT, + level TEXT, + outcome TEXT, + rounds JSONB DEFAULT '[]', + topics TEXT[] DEFAULT '{}', + summary TEXT, + + -- Timestamps + captured_at TIMESTAMPTZ DEFAULT NOW(), + processed_at TIMESTAMPTZ, + published_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes (IF NOT EXISTS requires Postgres 9.5+) +CREATE INDEX IF NOT EXISTS idx_captured_status ON captured_content(status); +CREATE INDEX IF NOT EXISTS idx_captured_ai_processed ON captured_content(ai_processed); +CREATE INDEX IF NOT EXISTS idx_captured_company ON captured_content(company); +CREATE INDEX IF NOT EXISTS idx_captured_slug ON captured_content(slug); + +-- RLS +ALTER TABLE captured_content ENABLE ROW LEVEL SECURITY; + +-- Anyone can read published experiences (public content) +DROP POLICY IF EXISTS "Public read published" ON captured_content; +CREATE POLICY "Public read published" ON captured_content + FOR SELECT USING (status = 'published'); + +-- Only service_role (your backend) can write/read all rows +DROP POLICY IF EXISTS "Service role full access" ON captured_content; +CREATE POLICY "Service role full access" ON captured_content + FOR ALL TO service_role USING (true) WITH CHECK (true); diff --git a/scripts/create-question-bank.sql b/scripts/create-question-bank.sql new file mode 100644 index 0000000..e655610 --- /dev/null +++ b/scripts/create-question-bank.sql @@ -0,0 +1,17 @@ +CREATE TABLE IF NOT EXISTS question_bank ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + company TEXT NOT NULL, + question TEXT NOT NULL, + type TEXT, + difficulty TEXT, + topics TEXT[] DEFAULT '{}', + frequency INT DEFAULT 1, + source_experience_ids UUID[] DEFAULT '{}', + first_seen_at TIMESTAMPTZ DEFAULT NOW(), + last_seen_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE(company, question) +); + +CREATE INDEX idx_question_bank_company ON question_bank(company); +CREATE INDEX idx_question_bank_type ON question_bank(type); +CREATE INDEX idx_question_bank_frequency ON question_bank(frequency DESC); diff --git a/scripts/sync-admin-role.cjs b/scripts/sync-admin-role.cjs new file mode 100644 index 0000000..cccf43c --- /dev/null +++ b/scripts/sync-admin-role.cjs @@ -0,0 +1,47 @@ +// Syncs app_metadata.role='admin' for all users with user_role='admin' in public.users +// Reads credentials from process.env first, then falls back to .env.local if present. +const fs = require('fs'); +const path = require('path'); + +function getEnvVar(key) { + if (process.env[key]) return process.env[key]; + // Fallback: parse .env.local if it exists + const envPath = path.resolve(process.cwd(), '.env.local'); + if (fs.existsSync(envPath)) { + const content = fs.readFileSync(envPath, 'utf8'); + const match = content.match(new RegExp(`^${key}=(.+)$`, 'm')); + if (match) return match[1].trim().replace(/^["']|["']$/g, ''); + } + return null; +} + +const url = getEnvVar('NEXT_PUBLIC_SUPABASE_URL'); +const serviceKey = getEnvVar('SUPABASE_SERVICE_ROLE_KEY'); + +if (!url || !serviceKey) { + console.error('Missing NEXT_PUBLIC_SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY'); + process.exit(1); +} + +const { createClient } = require('@supabase/supabase-js'); +const admin = createClient(url, serviceKey, { auth: { persistSession: false } }); + +(async () => { + const { data: adminRows, error } = await admin + .from('users') + .select('id, email, user_role') + .eq('user_role', 'admin'); + + if (error) { console.error('Error reading users:', error.message); process.exit(1); } + if (!adminRows?.length) { console.log('No admin rows in public.users'); return; } + + console.log(`Found ${adminRows.length} admin(s). Syncing app_metadata.role...`); + + for (const row of adminRows) { + const { data, error: updErr } = await admin.auth.admin.updateUserById(row.id, { + app_metadata: { role: 'admin' }, + }); + if (updErr) console.log(` ✗ ${row.email}: ${updErr.message}`); + else console.log(` ✓ ${row.email}: app_metadata.role = ${JSON.stringify(data.user.app_metadata?.role)}`); + } +})(); diff --git a/utils/supabase/client.ts b/utils/supabase/client.ts index e550dc8..cbf7b67 100644 --- a/utils/supabase/client.ts +++ b/utils/supabase/client.ts @@ -1,7 +1,13 @@ import { createBrowserClient } from '@supabase/ssr'; -const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL; -const supabaseKey = process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY; +// During CI builds / static prerendering the NEXT_PUBLIC_SUPABASE_* env vars +// may be absent. createBrowserClient throws if URL/key are missing, which +// breaks `next build`. Fall back to harmless placeholders so client creation +// never throws at build time. Real values are always injected at runtime +// (browser + Vercel server), where actual queries run. +const supabaseUrl = + process.env.NEXT_PUBLIC_SUPABASE_URL || 'https://placeholder.supabase.co'; +const supabaseKey = + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY || 'placeholder-anon-key'; -export const createClient = () => - createBrowserClient(supabaseUrl!, supabaseKey!); +export const createClient = () => createBrowserClient(supabaseUrl, supabaseKey); diff --git a/vercel.json b/vercel.json new file mode 100644 index 0000000..1444013 --- /dev/null +++ b/vercel.json @@ -0,0 +1,8 @@ +{ + "crons": [ + { + "path": "/api/pipeline/auto-process", + "schedule": "0 0 * * *" + } + ] +}