Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/app/(home)/contributors/contributor-card.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import Link from "next/link";
import {
Card,
CardHeader,
CardTitle,
CardDescription,
} from "@/components/ui/card";

interface ContributorCardProps {
name: string;
githubHandle: string;
role: string;
}

export function ContributorCard({ name, githubHandle, role }: ContributorCardProps) {
const hasName = name && name !== githubHandle;

return (
<Link
href={`https://github.com/${githubHandle}`}
target="_blank"
rel="noopener noreferrer"
className="-mr-px -mt-px"
>
<Card className="shadow-none rounded-none h-full hover:bg-sidebar dark:hover:bg-accent transition-colors">
<CardHeader>
<CardTitle className="font-code text-lg">
{hasName ? name : `@${githubHandle}`}
</CardTitle>
<CardDescription className="font-code text-xs">
{hasName ? `@${githubHandle} · ${role}` : role}
</CardDescription>
</CardHeader>
</Card>
</Link>
);
}
11 changes: 11 additions & 0 deletions src/app/(home)/contributors/layout.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
export default function ContributorsLayout({
children,
}: {
children: React.ReactNode;
}) {
return (
<main className="flex flex-1 flex-col max-w-7xl w-full mx-auto px-4 pb-4 pt-6 sm:pt-12">
<div className="space-y-6">{children}</div>
</main>
);
}
129 changes: 129 additions & 0 deletions src/app/(home)/contributors/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import contributionData from "../../../../utils/contributors/data/harbor_contribution.json";
import { ContributorCard } from "./contributor-card";
import Link from "next/link";

interface Contributor {
github_handle: string;
email: string;
name: string;
affiliation: string;
role: string;
rank: number;
adapter_rank: number;
pr_count: number;
adapter_pr_count: number;
non_adapter_pr_count: number;
total_additions: number;
total_deletions: number;
pr_list: { pr_url: string; pr_title: string; pr_type: string }[];
}

function lastName(c: Contributor): string {
const parts = c.name.trim().split(/\s+/);
return (parts[parts.length - 1] ?? c.github_handle).toLowerCase();
}

function partitionContributors(data: Contributor[]) {
const harborContributors: Contributor[] = [];
const adapterContributors: Contributor[] = [];

for (const c of data) {
if (c.non_adapter_pr_count > 0 || c.rank !== 0) {
harborContributors.push(c);
}
if (c.adapter_pr_count > 0 || c.adapter_rank !== 0) {
adapterContributors.push(c);
}
}

// Rank first takes priority (descending), then PR count, then last name
harborContributors.sort(
(a, b) => b.rank - a.rank || b.non_adapter_pr_count - a.non_adapter_pr_count || lastName(a).localeCompare(lastName(b)),
);
adapterContributors.sort(
(a, b) => b.adapter_rank - a.adapter_rank || b.adapter_pr_count - a.adapter_pr_count || lastName(a).localeCompare(lastName(b)),
);

return { harborContributors, adapterContributors };
}

export default function ContributorsPage() {
const data = contributionData as Contributor[];
const { harborContributors, adapterContributors } =
partitionContributors(data);

return (
<>
<div className="space-y-2">
<h1 className="text-4xl tracking-tighter font-code font-medium">
Contributors
</h1>
<p className="text-muted-foreground">
Harbor is built by an open community of contributors. Interested in
contributing?{" "}
<Link href="/docs/contributing" className="underline">
Learn how to get started
</Link>
.
</p>
</div>

<section className="space-y-4">
<h2 className="text-2xl tracking-tighter font-code font-medium">
Harbor Contributors
</h2>
<div className="border rounded-xl overflow-hidden">
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 -m-px bg-card">
{harborContributors.map((c) => (
<ContributorCard
key={`harbor-${c.github_handle}`}
name={c.name}
githubHandle={c.github_handle}
role={c.role}
/>
))}
</div>
</div>
</section>

<section className="space-y-4">
<h2 className="text-2xl tracking-tighter font-code font-medium">
Harbor Adapter Contributors
</h2>
<div className="border rounded-xl overflow-hidden">
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 -m-px bg-card">
{adapterContributors.map((c) => (
<ContributorCard
key={`adapter-${c.github_handle}`}
name={c.name}
githubHandle={c.github_handle}
role={c.role}
/>
))}
</div>
</div>
</section>

<section className="space-y-4">
<h2 className="text-2xl tracking-tighter font-code font-medium">
Acknowledgments
</h2>
<div className="border rounded-xl p-6 bg-card">
<p className="text-muted-foreground">
API inference compute for parity experiments is generously supported
by{" "}
<Link
href="https://www.2077ai.com/"
target="_blank"
rel="noopener noreferrer"
className="underline"
>
2077AI
</Link>
.
</p>
</div>
</section>
</>
);
}
5 changes: 5 additions & 0 deletions src/lib/layout.shared.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ export function baseOptions(): BaseLayoutProps {
text: "Registry",
active: "nested-url",
},
{
url: "/contributors",
text: "Contributors",
active: "nested-url",
},
{
url: "https://discord.gg/6xWPKhGDbA",
text: "Discord",
Expand Down
112 changes: 112 additions & 0 deletions utils/contributors/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Contributors Data Pipeline

Scripts to collect and aggregate contributor data from the [laude-institute/harbor](https://github.com/laude-institute/harbor) repository. The output is consumed by the `/contributors` page.

## Prerequisites

- Python 3.10+
- [GitHub CLI (`gh`)](https://cli.github.com/) authenticated with access to the harbor repo (only needed for `refresh`, `refresh-prdata`, `refresh-userdata`)

## Usage

Use `ctbcli` to run the pipeline:

```bash
# Full refresh: collect PR data + user data from GitHub, then regenerate
./utils/contributors/ctbcli refresh

# PR data only: re-collect PR data from GitHub and regenerate
./utils/contributors/ctbcli refresh-prdata

# User data only: re-collect user profiles from GitHub and regenerate
./utils/contributors/ctbcli refresh-userdata

# Regenerate from existing data (no API calls — use after editing verified data)
./utils/contributors/ctbcli generate
```

## Directory Structure

```
utils/contributors/
├── ctbcli # CLI entrypoint
├── README.md
├── src/ # Python scripts
│ ├── collect_pr_data.py
│ ├── collect_user_data.py
│ └── generate_contributions.py
└── data/ # Data files
├── verified_github_users_data.json # Manually curated (source of truth)
├── raw_pr_data.json # Auto-generated from GitHub API
├── raw_github_users_data.json # Auto-generated from GitHub API
└── harbor_contribution.json # Final output for the web page
```

## How It Works

### Data flow

```
GitHub API ──► raw_pr_data.json ──────────────────────┐
GitHub API ──► raw_github_users_data.json ──┐ │
▼ ▼
verified_github_users_data.json ──► generate_contributions.py
harbor_contribution.json
/contributors page
```

### Scripts (`src/`)

| Script | What it does | API calls |
|--------|-------------|-----------|
| `collect_pr_data.py` | Fetches all merged PRs from `laude-institute/harbor`, classifies each by type (`adapter`, `task`, `engineering`, `other`), and writes `raw_pr_data.json` | Yes (slowest) |
| `collect_user_data.py` | Reads unique author handles from `raw_pr_data.json`, fetches GitHub profiles, and writes `raw_github_users_data.json` | Yes |
| `generate_contributions.py` | Merges PR data with user data, ranks contributors, and writes `harbor_contribution.json` | No |

### Data files (`data/`)

| File | Source | Editable? | Description |
|------|--------|-----------|-------------|
| `verified_github_users_data.json` | Manual | **Yes** | Curated contributor info with verified names, affiliations, roles, and ranks. Takes precedence over raw GitHub data when a handle matches. |
| `raw_pr_data.json` | `collect_pr_data.py` | No | All merged PRs with author, additions/deletions, title, and type classification |
| `raw_github_users_data.json` | `collect_user_data.py` | No | GitHub profile data (name, email, company) for each PR author. Used as fallback when a handle is not in the verified data. |
| `harbor_contribution.json` | `generate_contributions.py` | No | Final aggregated output consumed by the `/contributors` page |

### Verified user data fields

The `verified_github_users_data.json` file supports these fields:

| Field | Required | Description |
|-------|----------|-------------|
| `github_handle` | Yes | GitHub username |
| `name` | Yes | Display name |
| `affiliation` | Yes | Organization or university |
| `email` | Yes | Contact email |
| `role` | No | Displayed on card (e.g. `"Co-lead"`, `"Adapter Lead"`, `"Advisor"`). Defaults to `"Contributor"` |
| `rank` | No | Sort priority for Harbor Contributors section (higher = listed first). Defaults to `0` |
| `adapter_rank` | No | Sort priority for Adapter Contributors section (higher = listed first). Defaults to `0` |

### Ranking logic

- **Harbor Contributors**: sorted by `rank` (desc) → non-adapter PR count (desc) → last name (asc)
- **Adapter Contributors**: sorted by `adapter_rank` (desc) → adapter PR count (desc) → last name (asc)

## Common Tasks

**New PRs merged — update the page:**
```bash
./utils/contributors/ctbcli refresh
```

**Edited verified user data (name, role, rank, etc.):**
```bash
./utils/contributors/ctbcli generate
```

**New contributor needs verified info:**
1. Add an entry to `data/verified_github_users_data.json`
2. Run `./utils/contributors/ctbcli generate`
55 changes: 55 additions & 0 deletions utils/contributors/ctbcli
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PYTHON="${PYTHON:-python3}"

run_collect_pr_data() {
echo "==> Collecting PR data..."
"$PYTHON" "$SCRIPT_DIR/src/collect_pr_data.py"
}

run_collect_user_data() {
echo "==> Collecting user data..."
"$PYTHON" "$SCRIPT_DIR/src/collect_user_data.py"
}

run_generate() {
echo "==> Generating contributions..."
"$PYTHON" "$SCRIPT_DIR/src/generate_contributions.py"
}

usage() {
cat <<EOF
Usage: $0 <command>

Commands:
refresh Collect PR data, user data, and regenerate contributions
refresh-prdata Collect PR data and regenerate contributions
refresh-userdata Collect user data and regenerate contributions
generate Regenerate contributions from existing data (no API calls)
EOF
}

case "${1:-}" in
refresh)
run_collect_pr_data
run_collect_user_data
run_generate
;;
refresh-prdata)
run_collect_pr_data
run_generate
;;
refresh-userdata)
run_collect_user_data
run_generate
;;
generate)
run_generate
;;
*)
usage
exit 1
;;
esac
Loading