Skip to content
10 changes: 5 additions & 5 deletions app/composables/useNavigation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ const footerLinks = [{
}, {
label: 'Explore',
children: [{
label: 'Modules',
to: 'https://nuxt.com/modules'
}, {
label: 'Templates',
to: 'https://nuxt.com/templates'
to: '/templates'
}, {
label: 'Showcase',
to: 'https://nuxt.com/showcase'
to: '/showcase'
}, {
label: 'AI Evals',
to: '/evals'
}]
}, {
label: 'Enterprise',
Expand Down
304 changes: 304 additions & 0 deletions app/pages/evals.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
<script setup lang="ts">
import { h, resolveComponent } from 'vue'
import type { TableColumn, TableRow } from '@nuxt/ui'
import rawData from '~~/public/agent-results.json'

const UButton = resolveComponent('UButton')
const UBadge = resolveComponent('UBadge')
const UAvatar = resolveComponent('UAvatar')

definePageMeta({
heroBackground: 'opacity-70 -z-10'
})

// Types
interface EvalResultItem {
evalPath: string
result: {
success: boolean
duration: number
evalPath: string
timestamp: string
}
}

interface Experiment {
name: string
timestamp: string
modelName: string
agentHarness: string
}

interface ModelRow {
model: string
agent: string
totalEvals: number
successRate: number
evals: EvalResultItem[]
}

const { data: page } = await useAsyncData('evals', () => queryCollection('evals').first())
if (!page.value) {
throw createError({ statusCode: 404, statusMessage: 'Page not found', fatal: true })
}

const title = page.value.title
const description = page.value.description

useSeoMeta({
titleTemplate: '%s',
title,
description,
ogDescription: description,
ogTitle: title
})
defineOgImageComponent('Docs', { title, description })

// Build experiment map by name
const experimentMap = computed(() => {
const map: Record<string, Experiment> = {}
if (!rawData?.metadata?.experiments) return map
for (const exp of rawData.metadata.experiments) {
map[exp.name] = exp
}
return map
})

// Process results into table rows
const allResults = computed<ModelRow[]>(() => {
if (!rawData?.results) return []
const rows: ModelRow[] = []
for (const [experimentName, evals] of Object.entries(rawData.results)) {
const experiment = experimentMap.value[experimentName]
const successes = evals.filter(e => e.result.success).length
rows.push({
model: experiment?.modelName || experimentName,
agent: experiment?.agentHarness || 'Unknown',
totalEvals: evals.length,
successRate: evals.length ? Math.round((successes / evals.length) * 100) : 0,
evals
})
}
return rows.sort((a, b) => b.successRate - a.successRate)
})
Comment thread
benjamincanac marked this conversation as resolved.

// Agent filter
const agents = computed(() => {
return [...new Set(allResults.value.map(r => r.agent))]
})
const selectedAgents = ref<string[]>([])

const filteredResults = computed(() => {
if (selectedAgents.value.length === 0) {
return allResults.value
}
return allResults.value.filter(r => selectedAgents.value.includes(r.agent))
})

// Format exported date
const formattedDate = computed(() => {
if (!rawData?.metadata?.exportedAt) return ''
const date = new Date(rawData.metadata.exportedAt)
return date.toLocaleDateString('en-US', { month: 'long', day: 'numeric', year: 'numeric' })
})
Comment thread
benjamincanac marked this conversation as resolved.

// Model icon mapping (matched by lowercase prefix of model name)
const modelIconMap: Record<string, string> = {
claude: 'i-simple-icons-anthropic',
gpt: 'i-simple-icons-openai',
cursor: 'i-simple-icons-cursor',
gemini: 'i-simple-icons-googlegemini'
}

function getModelIcon(model: string): string {
const lower = model.toLowerCase()
for (const [key, icon] of Object.entries(modelIconMap)) {
if (lower.startsWith(key)) return icon
}
return 'i-lucide-box'
}

// Format duration from ms to seconds
function formatDuration(ms: number): string {
return `${(ms / 1000).toFixed(2)}s`
}

// Expanded rows state
const expanded = ref({})

// Toggle expand on row click
function onSelect(_e: Event, row: TableRow<ModelRow>) {
row.toggleExpanded()
}

// Table columns
const columns: TableColumn<ModelRow>[] = [
{
id: 'expand',
meta: {
class: {
th: 'w-0',
td: 'w-0'
}
},
cell: ({ row }) => h(UButton, {
'color': 'neutral',
'variant': 'ghost',
'icon': 'i-lucide-chevron-right',
'square': true,
'size': 'sm',
'aria-label': 'Expand',
'ui': {
leadingIcon: ['transition-transform', row.getIsExpanded() ? 'duration-200 rotate-90' : '']
},
'onClick': (e: Event) => {
e.stopPropagation()
row.toggleExpanded()
},
'class': 'group-hover:bg-elevated'
})
},
{
accessorKey: 'model',
header: 'Model',
cell: ({ row }) => h('div', { class: 'flex items-center gap-2' }, [
h(UAvatar, { icon: getModelIcon(row.original.model), size: 'sm', class: 'ring ring-default ring-inset' }),
h('span', {}, row.original.model)
])
},
{
accessorKey: 'agent',
header: 'Agent'
},
{
accessorKey: 'totalEvals',
header: 'Total Evals',
meta: {
class: {
th: 'text-center',
td: 'text-center'
}
}
},
{
accessorKey: 'successRate',
header: 'Success Rate',
meta: {
class: {
th: 'text-right',
td: 'text-right'
}
},
cell: ({ row }) => h('span', {}, `${row.original.successRate}%`)
}
]

// Expanded eval table columns
const evalColumns: TableColumn<EvalResultItem>[] = [
{
accessorKey: 'evalPath',
header: 'Evaluation'
},
{
id: 'score',
header: 'Result',
meta: {
class: {
th: 'text-center',
td: 'text-center'
}
},
cell: ({ row }) => h(UBadge, {
color: row.original.result.success ? 'success' : 'error',
variant: 'subtle'
}, () => row.original.result.success ? 'Pass' : 'Fail')
},
{
id: 'duration',
header: 'Duration',
meta: {
class: {
th: 'text-right',
td: 'text-right'
}
},
cell: ({ row }) => h('span', {}, formatDuration(row.original.result.duration))
}
]
</script>

<template>
<div v-if="page && rawData">
<UPageHero
:title="page.title"
:description="page.description"
:ui="{
title: 'text-4xl sm:text-5xl lg:text-6xl font-bold',
description: 'max-w-2xl mx-auto text-pretty',
links: 'items-center'
}"
>
<template #links>
<UButton
:to="page.githubUrl"
icon="i-simple-icons-github"
label="View on GitHub"
target="_blank"
color="neutral"
variant="ghost"
/>

<USeparator orientation="vertical" class="h-6" />

<span class="text-sm font-medium">Last run date: <span class="text-muted font-normal">{{ formattedDate }}</span></span>
</template>
</UPageHero>

<UPageBody class="mt-0">
<UContainer class="max-w-6xl">
<div class="flex items-center justify-between mb-4">
<h2 class="text-2xl font-bold">
Agent Performance Results
</h2>

<USelectMenu
v-model="selectedAgents"
:items="agents"
multiple
placeholder="All Agents"
color="neutral"
variant="subtle"
class="w-52 bg-elevated/50 hover:bg-elevated data-[state=open]:bg-elevated group"
:ui="{ trailingIcon: 'group-data-[state=open]:rotate-180 transition-transform duration-200' }"
/>
</div>

<UTable
v-model:expanded="expanded"
:data="filteredResults"
:columns="columns"
:ui="{
thead: '[&>tr]:bg-elevated/50 border-b border-default',
tr: 'py-2.5 peer peer-data-[expanded=true]:[&>td]:p-4! group transition-colors',
td: 'py-2.5'
}"
class="flex-1 border border-default rounded-lg"
@select="onSelect"
>
<template #expanded="{ row }">
<UTable
:data="row.original.evals"
:columns="evalColumns"
:ui="{
thead: '[&>tr]:bg-elevated/50 border-b border-default',
tr: 'py-2.5',
td: 'py-2.5'
}"
class="flex-1 border border-default rounded-lg"
/>
</template>
</UTable>
</UContainer>
</UPageBody>
</div>
</template>
9 changes: 9 additions & 0 deletions content.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,15 @@ export default defineContentConfig({
links: z.array(Link)
}))
})
}),
evals: defineCollection({
type: 'data',
source: 'evals.yml',
schema: z.object({
title: z.string(),
description: z.string(),
githubUrl: z.string().url()
})
})
}
})
3 changes: 3 additions & 0 deletions content/evals.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
title: AI Agent Evaluations
description: "Performance results of AI coding agents on Nuxt code generation tasks, measuring success rate and execution time."
githubUrl: "https://github.com/vercel/nuxt-evals"
Loading