-
Notifications
You must be signed in to change notification settings - Fork 48
163 lines (150 loc) · 8.64 KB
/
Copy pathdeploy-dev.yml
File metadata and controls
163 lines (150 loc) · 8.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
name: Deploy to Dev
on:
push:
branches: [dev]
jobs:
deploy:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Connect to Tailscale
# SHA-pinned to mitigate supply-chain hijack of the v2 floating tag.
uses: tailscale/github-action@4e4c49acaa9818630ce0bd7a564372c17e33fb4d # v2
with:
authkey: ${{ secrets.TAILSCALE_AUTH_KEY }}
- name: Deploy
# SHA-pinned to mitigate supply-chain hijack of the v1 floating tag.
uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 # v1
with:
# Host + username come from repo secrets so this public workflow file
# doesn't disclose the Tailscale topology of the dev environment.
host: ${{ secrets.DEV_HOST }}
username: ${{ secrets.DEV_USER }}
key: ${{ secrets.DEV_SSH_KEY }}
command_timeout: 25m
script: |
set -e
cd ~/trinity
echo "=== Pull ==="
git fetch origin dev
git checkout dev
# The dev VM has a persistent checkout that may carry hand-applied
# fixes (e.g. docker-compose.prod.yml tweaks). `git pull` aborts on
# dirty tree, blocking every deploy. Stash with a deploy-run-keyed
# message so the changes are recoverable via `git stash list` (and
# survive in the reflog for ~90 days) without polluting history.
# If the working tree is clean, stash is a no-op (exit 0).
#
# Long-term fix is to move the build to CI + push images to a
# registry so the dev VM is stateless (see #942). Until then, this
# keeps deploys unblocked while preserving the audit trail.
# Workflow-syntax interpolation: GitHub expands `${{ github.run_id }}`
# at YAML render time so the literal run id is baked into the script
# before it ships over SSH. Using `$GITHUB_RUN_ID` here would fail
# silently — that env var lives on the GH Actions runner, not on
# the dev VM where this script actually executes.
#
# NO `-u`: the dev VM has Docker-volume directories (trinity-data/,
# archives/, avatars/) owned by root inside containers. The trinity
# user can't `rm` those, so `git stash -u` aborts mid-cleanup with
# "Permission denied", leaves the stash entry created but the
# working tree UNREVERTED, returns non-zero, our `|| echo` masks
# the failure as "no changes" — and the subsequent pull aborts on
# the still-dirty tracked file. Tracked-only stash is what actually
# blocks the pull; untracked Docker-owned files don't block git.
STASH_MSG="auto-stash-deploy-${{ github.run_id }}"
git stash push -m "${STASH_MSG}" || echo "No local changes to stash"
git pull --ff-only origin dev
echo "Version: $(git log -1 --oneline)"
echo "=== Submodule (enterprise) ==="
# #847 — try to init the private trinity-enterprise submodule
# so the dev deploy lights up enterprise features (audit
# dashboard, etc.). If the VM lacks read access to the private
# repo, this is a non-fatal warning — the conditional import
# in main.py falls back to OSS-only mode and the deploy
# continues. Enterprise UI surfaces simply stay hidden.
if git submodule update --init --recursive src/backend/enterprise; then
ENT_SHA=$(git -C src/backend/enterprise rev-parse --short HEAD 2>/dev/null || echo "unknown")
echo "ENTERPRISE: initialized at ${ENT_SHA}"
else
echo "::warning::Enterprise submodule init failed — deploying in OSS-only mode"
echo "ENTERPRISE: confirm the dev VM has read access to Abilityai/trinity-enterprise"
echo "ENTERPRISE: (deploy key, PAT in a credential helper, or org membership all work)"
fi
# Compose file list: base prod + enterprise overlay. The
# overlay bind-mounts ./src/backend/enterprise into
# /app/enterprise inside the backend container. If the
# submodule init above failed, the host directory will be
# empty (or contain just .git), and the conditional import
# in main.py will fall through to OSS-only mode — same
# outcome as if the overlay weren't applied.
COMPOSE_FILES="-f docker-compose.prod.yml -f docker-compose.prod.enterprise.yml"
echo "=== Volume ownership (#874 / #958) ==="
# Backend + scheduler run as UID 1000 (issue #874). Existing
# deployments rolled before #874 still have data paths owned by
# root from the prior root-container era. The first migration to
# actually UPDATE existing rows (#922's null_legacy_schedule_
# timeouts) hits "attempt to write a readonly database" on
# startup, /health returns 503 forever, deploy times out.
# Canonical recipe: docs/migrations/NON_ROOT_CONTAINERS_2026-05.md.
# Both checks are idempotent — silent no-ops on healthy VMs.
TRINITY_DATA_PATH="${TRINITY_DATA_PATH:-./trinity-data}"
if [ -d "$TRINITY_DATA_PATH" ]; then
DATA_UID=$(stat -c %u "$TRINITY_DATA_PATH")
if [ "$DATA_UID" != "1000" ]; then
echo "Re-owning $TRINITY_DATA_PATH from UID=$DATA_UID to UID=1000"
sudo chown -R 1000:1000 "$TRINITY_DATA_PATH"
fi
fi
# agent-configs is a named volume; check + chown via ephemeral
# alpine container running as root.
PROJECT=$(basename "$PWD")
VOL_NAME="${PROJECT}_agent-configs"
if sudo docker volume inspect "$VOL_NAME" >/dev/null 2>&1; then
VOL_UID=$(sudo docker run --rm -v "$VOL_NAME:/v" alpine stat -c %u /v 2>/dev/null || echo "0")
if [ "$VOL_UID" != "1000" ]; then
echo "Re-owning volume $VOL_NAME from UID=$VOL_UID to UID=1000"
sudo docker run --rm -v "$VOL_NAME:/v" --user 0 alpine chown -R 1000:1000 /v
fi
fi
echo "=== Build ==="
# Build-time provenance (#926 / #958 / #993). docker-compose.prod.yml
# backend.build.args reads these env vars; absent them, the
# Dockerfile defaults to "unknown" and Build Info in the UI
# shows a wall of unknown values. Mirrors scripts/deploy/start.sh.
#
# #993: pass via `sudo env VAR=val …` — NOT `sudo VAR=val …`.
# Under default Ubuntu sudoers (env_reset on, setenv off), env
# assignments placed before the command are filtered out, so the
# prior form silently dropped every var and compose fell back to
# ${VAR:-unknown}. `env` is the program sudo execs, so the
# assignments are its arguments and reach docker compose intact.
GIT_COMMIT=$(git rev-parse HEAD)
GIT_COMMIT_SUBJECT=$(git log -1 --pretty=%s)
GIT_COMMIT_TIMESTAMP=$(git log -1 --pretty=%cI)
GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
BUILD_DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ)
# #993: dynamic version = VERSION file + git short sha (+ .dirty).
BASE_VER=$(cat VERSION 2>/dev/null || echo unknown)
SHORT_SHA=$(git rev-parse --short=8 HEAD)
git diff --quiet HEAD 2>/dev/null || SHORT_SHA="${SHORT_SHA}.dirty"
VERSION="${BASE_VER}+g${SHORT_SHA}"
sudo env \
VERSION="${VERSION}" \
GIT_COMMIT="${GIT_COMMIT}" \
GIT_COMMIT_SUBJECT="${GIT_COMMIT_SUBJECT}" \
GIT_COMMIT_TIMESTAMP="${GIT_COMMIT_TIMESTAMP}" \
GIT_BRANCH="${GIT_BRANCH}" \
BUILD_DATE="${BUILD_DATE}" \
docker compose ${COMPOSE_FILES} build --no-cache backend frontend mcp-server scheduler
echo "=== Restart ==="
sudo docker compose ${COMPOSE_FILES} up -d backend frontend mcp-server scheduler
echo "=== Health ==="
sleep 10
curl -sf http://localhost:8000/health && echo "Backend: OK"
SCHED=$(sudo docker inspect trinity-scheduler --format='{{.State.Health.Status}}')
echo "Scheduler: $SCHED"
[ "$SCHED" = "healthy" ] || echo "WARNING: scheduler not healthy yet"
echo "=== Error check ==="
sudo docker logs trinity-backend --tail 50 2>&1 | grep -iE 'error|exception|failed' | head -10 || echo "No errors"
echo "=== Done ==="