diff --git a/README.md b/README.md index a908e71..75047e2 100644 --- a/README.md +++ b/README.md @@ -20,26 +20,34 @@ ci-self up - `ci-self up` は `register + run-focus` を連続実行 - `verify.yml` / PRテンプレートが無ければ自動雛形を生成 +- 雛形の生成はローカルファイル変更のみ(GitHub反映には commit/push が必要) +- 対象リポジトリに `flake.nix` がある場合、runner マシンに `nix` の事前インストールが必要 + - `ci-self` / `verify.yml` は `nix-daemon.sh` を自動読み込みして `nix` を検出(毎回の手動 `source` は不要) + - 既存の `verify.yml` が古い場合は `bash ops/ci/scaffold_verify_workflow.sh --repo --apply --force` で更新 -## ネットワーク別の最短 +## Mac mini ワンコマンド(推奨) -同一LANの Mac mini へ SSH: +MacBook から 1 コマンドで「鍵認証確認 -> 同期 -> Mac mini 実行 -> 結果回収」まで行う: ```bash -ci-self remote-up --host --project-dir ~/dev/maakie-brainlab --repo mt4110/maakie-brainlab +ci-self remote-ci --host @ --project-dir '~/dev/maakie-brainlab' --repo mt4110/maakie-brainlab ``` -外出先(SSHあり): +`remote-ci` の実行内容: -```bash -ci-self remote-up --host --project-dir ~/dev/maakie-brainlab --repo mt4110/maakie-brainlab -``` +1. SSH 公開鍵認証(password禁止)を検証 +2. ローカル作業ツリーを Mac mini へ `rsync` 同期 +3. (repo指定時)runner bootstrap をベストエフォート実行 +4. Mac mini で `ops/ci/run_verify_full.sh` を実行 +5. `verify-full.status` と `out/logs` をローカル `out/remote//` に回収 -外出先(SSHなし): +公開鍵未登録時は、`authorized_keys` 登録のヒントを出して停止します。 -```bash -ci-self run-focus --repo mt4110/maakie-brainlab --ref main -``` +補足: + +- `--host` は `ssh` の接続先文字列(`user@host` / IP / `~/.ssh/config` のHost別名) +- `--project-dir` に `~` を使う場合は `--project-dir '~/'` のようにクオート +- runner 初期化/復旧専用の旧導線は `ci-self remote-up` ## さらに短縮する設定ファイル @@ -57,8 +65,8 @@ ci-self config-init CI_SELF_REPO=mt4110/maakie-brainlab CI_SELF_REF=main CI_SELF_PROJECT_DIR=/Users//dev/maakie-brainlab -CI_SELF_REMOTE_HOST=mac-mini.local -CI_SELF_REMOTE_PROJECT_DIR=~/dev/maakie-brainlab +CI_SELF_REMOTE_HOST=@mac-mini.local +CI_SELF_REMOTE_PROJECT_DIR=/Users//dev/maakie-brainlab CI_SELF_PR_BASE=main ``` @@ -68,8 +76,10 @@ CI_SELF_PR_BASE=main - `ci-self up`: ローカル最短(register + run-focus) - `ci-self focus`: run-focus 後、PR未作成なら自動作成し checks を監視 +- `ci-self remote-ci`: 鍵必須・同期・Mac mini実行・結果回収を1コマンドで実行 - `ci-self doctor --fix`: 依存/gh auth/colima/docker/runner_health を診断し可能な範囲で修復 -- `ci-self remote-up`: SSH先で register + run-focus +- `ci-self doctor --repo-dir `: `flake.nix` リポジトリの Nix 到達性も含めて診断 +- `ci-self remote-up`: SSH先で register + run-focus(同期しない旧導線) - `ci-self config-init`: `.ci-self.env` テンプレート生成 注: `doctor --fix` は `gh auth login` だけは自動化できないため、未ログイン時は手動ログインが必要です。 diff --git a/cmd/runner_health/main.go b/cmd/runner_health/main.go index 990e08c..628e1ef 100644 --- a/cmd/runner_health/main.go +++ b/cmd/runner_health/main.go @@ -1,7 +1,9 @@ package main import ( + "flag" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -15,6 +17,10 @@ type checkResult struct { detail string } +type options struct { + repoDir string +} + func main() { defer func() { if r := recover(); r != nil { @@ -24,6 +30,14 @@ func main() { } }() + opts, err := parseOptions(os.Args[1:]) + if err != nil { + writeStatus("ERROR", "invalid_args="+err.Error(), nil) + fmt.Printf("ERROR: runner_health invalid_args=%s\n", err.Error()) + fmt.Println("STATUS: ERROR") + os.Exit(2) + } + fmt.Println("OK: runner_health start") results := []checkResult{} @@ -45,6 +59,7 @@ func main() { results = append(results, checkDiskDir("out")) results = append(results, checkDiskDir(".local")) results = append(results, checkDiskDir("cache")) + results = append(results, checkNixForRepo(opts.repoDir)) // Print results and determine overall status for _, r := range results { @@ -58,6 +73,20 @@ func main() { fmt.Printf("STATUS: %s\n", overallStatus) } +func parseOptions(args []string) (options, error) { + opts := options{} + fs := flag.NewFlagSet("runner_health", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.StringVar(&opts.repoDir, "repo-dir", "", "target repository directory for context-aware checks") + if err := fs.Parse(args); err != nil { + return options{}, err + } + if fs.NArg() > 0 { + return options{}, fmt.Errorf("unexpected_args=%s", strings.Join(fs.Args(), ",")) + } + return opts, nil +} + func checkCommand(name, bin string, args ...string) checkResult { cmd := exec.Command(bin, args...) cmd.Stdout = nil @@ -158,6 +187,86 @@ func checkDiskDir(dir string) checkResult { } } +func checkNixForRepo(repoDir string) checkResult { + if strings.TrimSpace(repoDir) == "" { + return checkResult{ + name: "nix", + status: "SKIP", + detail: "reason=repo_dir_not_set", + } + } + + flakePath := filepath.Join(repoDir, "flake.nix") + fi, err := os.Stat(flakePath) + if err != nil { + if os.IsNotExist(err) { + return checkResult{ + name: "nix", + status: "SKIP", + detail: "reason=not_required(no_flake)", + } + } + return checkResult{ + name: "nix", + status: "ERROR", + detail: "reason=flake_stat_failed(" + err.Error() + ")", + } + } + if fi.IsDir() { + return checkResult{ + name: "nix", + status: "ERROR", + detail: "reason=flake_not_file", + } + } + + // flake.nix exists => nix is required. + if p, err := exec.LookPath("nix"); err == nil { + return checkResult{ + name: "nix", + status: "OK", + detail: "reason=available path=" + p, + } + } + + defaultProfileBin := "/nix/var/nix/profiles/default/bin/nix" + if _, err := os.Stat(defaultProfileBin); err == nil { + return checkResult{ + name: "nix", + status: "OK", + detail: "reason=available_via_default_profile path=" + defaultProfileBin, + } + } + + user := strings.TrimSpace(os.Getenv("USER")) + if user == "" { + user = "unknown" + } + perUserBin := filepath.Join("/nix/var/nix/profiles/per-user", user, "profile/bin/nix") + if _, err := os.Stat(perUserBin); err == nil { + return checkResult{ + name: "nix", + status: "OK", + detail: "reason=available_via_user_profile path=" + perUserBin, + } + } + + daemonProfile := "/nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh" + if _, err := os.Stat(daemonProfile); err == nil { + return checkResult{ + name: "nix", + status: "ERROR", + detail: "reason=not_in_path profile=" + daemonProfile, + } + } + + return checkResult{ + name: "nix", + status: "ERROR", + detail: "reason=not_installed", + } +} + func writeStatus(st string, reason string, results []checkResult) { outDir := "out" _ = os.MkdirAll(outDir, 0o755) diff --git a/docs/ci/QUICKSTART.md b/docs/ci/QUICKSTART.md index 9e7b7c1..0734e22 100644 --- a/docs/ci/QUICKSTART.md +++ b/docs/ci/QUICKSTART.md @@ -31,22 +31,38 @@ ci-self config-init CI_SELF_REPO=mt4110/maakie-brainlab CI_SELF_REF=main CI_SELF_PROJECT_DIR=/Users//dev/maakie-brainlab -CI_SELF_REMOTE_HOST=mac-mini.local -CI_SELF_REMOTE_PROJECT_DIR=~/dev/maakie-brainlab +CI_SELF_REMOTE_HOST=@mac-mini.local +CI_SELF_REMOTE_PROJECT_DIR=/Users//dev/maakie-brainlab CI_SELF_PR_BASE=main ``` ## ネットワーク別ワンコマンド -同一LAN / 外出先(SSHあり): +同一LAN / 外出先(SSHあり, 推奨): ```bash -ci-self remote-up +ci-self remote-ci ``` -`remote-up` は `.ci-self.env` の `CI_SELF_REMOTE_HOST` などが設定済みの場合の最短です。 +`remote-ci` は以下を 1 コマンドで実行します: + +1. SSH 鍵認証チェック(password不可) +2. ローカル変更を Mac mini に `rsync` 同期 +3. Mac mini 側 verify 実行 +4. `out/remote//` へ結果回収 + 未設定なら `--host --project-dir --repo` を明示してください。 +```bash +ci-self remote-ci --host @ --project-dir '~/dev/maakie-brainlab' --repo mt4110/maakie-brainlab +``` + +runner 初期化/復旧専用の旧導線: + +```bash +ci-self remote-up +``` + 外出先(SSHなし): ```bash diff --git a/ops/ci/ci_self.sh b/ops/ci/ci_self.sh index d5d02fd..e6f5d5c 100755 --- a/ops/ci/ci_self.sh +++ b/ops/ci/ci_self.sh @@ -46,6 +46,20 @@ expand_local_path() { fi } +run_go_cmd() { + if command -v go >/dev/null 2>&1; then + if go "$@"; then + return 0 + fi + echo "WARN: go command failed; retrying via mise" >&2 + fi + if command -v mise >/dev/null 2>&1; then + mise x -- go "$@" + return $? + fi + return 127 +} + unquote_value() { local v="$1" local n="${#v}" @@ -124,6 +138,7 @@ Commands: register One-command runner registration for current repo run-watch One-command verify workflow dispatch + watch run-focus run-watch + All Green check + PR template sync + remote-ci Key-only SSH + sync + remote verify + fetch results remote-register Run `register` over SSH on remote host remote-run-focus Run `run-focus` over SSH on remote host remote-up Run `remote-register` + `remote-run-focus` in one command @@ -139,6 +154,7 @@ Examples: ci-self register ci-self run-watch ci-self run-focus + ci-self remote-ci --host @ --project-dir '~/dev/maakie-brainlab' ci-self remote-up --host mac-mini.local --project-dir ~/dev/maakie-brainlab USAGE } @@ -173,6 +189,87 @@ current_branch() { git branch --show-current } +ensure_verify_workflow_nix_compat() { + local project_dir="${1:-}" + [[ -z "$project_dir" ]] && return 0 + [[ -d "$project_dir" ]] || return 0 + + if [[ ! -f "$project_dir/flake.nix" ]]; then + return 0 + fi + + local wf="$project_dir/.github/workflows/verify.yml" + if [[ ! -f "$wf" ]]; then + echo "ERROR: verify workflow not found for flake repo: $wf" >&2 + echo "HINT: bash $ROOT_DIR/ops/ci/scaffold_verify_workflow.sh --repo $project_dir --apply" >&2 + return 1 + fi + + if ! grep -Fq "nix-daemon.sh" "$wf"; then + echo "ERROR: verify.yml is outdated for nix runner env: $wf" >&2 + echo "HINT: bash $ROOT_DIR/ops/ci/scaffold_verify_workflow.sh --repo $project_dir --apply --force" >&2 + echo "HINT: commit/push updated verify.yml, then rerun ci-self" >&2 + return 1 + fi +} + +resolve_verify_workflow_id() { + local repo="$1" + local workflows="" + local id="" + local path="" + local name="" + local lc_path="" + local lc_name="" + local verify_yaml_id="" + local verify_name_id="" + local verify_path_id="" + + workflows="$(gh api "repos/$repo/actions/workflows" --jq '.workflows[]? | [.id, (.path // ""), (.name // "")] | @tsv')" || return 1 + + while IFS=$'\t' read -r id path name; do + [[ -n "$id" ]] || continue + lc_path="$(printf '%s' "$path" | tr '[:upper:]' '[:lower:]')" + lc_name="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')" + + if [[ "$lc_path" == ".github/workflows/verify.yml" ]]; then + printf '%s\n' "$id" + return 0 + fi + [[ -z "$verify_yaml_id" && "$lc_path" == ".github/workflows/verify.yaml" ]] && verify_yaml_id="$id" + [[ -z "$verify_name_id" && "$lc_name" == "verify" ]] && verify_name_id="$id" + [[ -z "$verify_path_id" && "$lc_path" == *"/verify."* ]] && verify_path_id="$id" + done <<< "$workflows" + + if [[ -n "$verify_yaml_id" ]]; then + printf '%s\n' "$verify_yaml_id" + return 0 + fi + if [[ -n "$verify_name_id" ]]; then + printf '%s\n' "$verify_name_id" + return 0 + fi + if [[ -n "$verify_path_id" ]]; then + printf '%s\n' "$verify_path_id" + return 0 + fi +} + +print_verify_workflow_missing_hint() { + local repo="$1" + local project_dir="${2:-}" + echo "ERROR: verify workflow not found in remote repo ($repo)" >&2 + echo "HINT: expected .github/workflows/verify.yml (or verify.yaml) in $repo" >&2 + if [[ -n "$project_dir" && -d "$project_dir" ]]; then + if [[ -f "$project_dir/.github/workflows/verify.yml" || -f "$project_dir/.github/workflows/verify.yaml" ]]; then + echo "HINT: local workflow exists in $project_dir/.github/workflows; commit/push then rerun ci-self" >&2 + else + echo "HINT: bash $ROOT_DIR/ops/ci/scaffold_verify_workflow.sh --repo $project_dir --apply" >&2 + echo "HINT: commit/push generated .github/workflows/verify.yml, then rerun ci-self" >&2 + fi + fi +} + resolve_pr_number() { local repo="$1" local branch="$2" @@ -388,10 +485,19 @@ USAGE project_dir="$(expand_local_path "$project_dir")" [[ -z "$ref" ]] && ref="$(resolve_ref "$ref")" repo="$(resolve_repo "$repo")" - gh workflow run verify.yml --ref "$ref" -R "$repo" + ensure_verify_workflow_nix_compat "$project_dir" + + local workflow_id="" + workflow_id="$(resolve_verify_workflow_id "$repo")" + if [[ -z "$workflow_id" ]]; then + print_verify_workflow_missing_hint "$repo" "$project_dir" + return 1 + fi + + gh workflow run "$workflow_id" --ref "$ref" -R "$repo" local run_id - run_id="$(gh run list --workflow verify.yml -R "$repo" --limit 1 --json databaseId --jq '.[0].databaseId')" - [[ -n "$run_id" ]] || { echo "ERROR: failed to resolve verify run id" >&2; return 1; } + run_id="$(gh run list --workflow "$workflow_id" -R "$repo" --limit 1 --json databaseId --jq '.[0].databaseId')" + [[ -n "$run_id" ]] || { echo "ERROR: failed to resolve verify run id (workflow=$workflow_id)" >&2; return 1; } gh run watch "$run_id" -R "$repo" --exit-status if [[ "$all_green" -eq 1 || "$sync_pr_template" -eq 1 ]]; then @@ -432,7 +538,13 @@ USAGE done repo="$(resolve_repo "$repo")" if [[ -z "$run_id" ]]; then - run_id="$(gh run list --workflow verify.yml -R "$repo" --limit 1 --json databaseId --jq '.[0].databaseId')" + local workflow_id="" + workflow_id="$(resolve_verify_workflow_id "$repo")" + if [[ -z "$workflow_id" ]]; then + print_verify_workflow_missing_hint "$repo" + return 1 + fi + run_id="$(gh run list --workflow "$workflow_id" -R "$repo" --limit 1 --json databaseId --jq '.[0].databaseId')" fi [[ -n "$run_id" ]] || { echo "ERROR: failed to resolve verify run id" >&2; return 1; } gh run watch "$run_id" -R "$repo" --exit-status @@ -576,16 +688,18 @@ USAGE cmd_doctor() { local repo="" + local repo_dir="$PWD" local fix=0 local verbose=0 while [[ $# -gt 0 ]]; do case "$1" in --repo) repo="${2:-}"; shift 2 ;; + --repo-dir) repo_dir="${2:-}"; shift 2 ;; --fix) fix=1; shift ;; --verbose) verbose=1; shift ;; -h|--help) cat <<'USAGE' -Usage: ci-self doctor [--repo owner/repo] [--fix] [--verbose] +Usage: ci-self doctor [--repo owner/repo] [--repo-dir path] [--fix] [--verbose] USAGE return 0 ;; @@ -596,6 +710,10 @@ USAGE esac done + [[ -z "$repo_dir" ]] && repo_dir="$PWD" + [[ -n "$CONFIG_PROJECT_DIR" && "$repo_dir" == "$PWD" ]] && repo_dir="$CONFIG_PROJECT_DIR" + repo_dir="$(expand_local_path "$repo_dir")" + local failed=0 local item="" for item in gh colima docker; do @@ -664,12 +782,8 @@ USAGE else if [[ "$fix" -eq 1 ]]; then echo "OK: doctor fix=runner_setup repo=$repo" - if command -v go >/dev/null 2>&1; then - go run ./cmd/runner_setup --apply --repo "$repo" - elif command -v mise >/dev/null 2>&1; then - mise x -- go run ./cmd/runner_setup --apply --repo "$repo" - else - echo "ERROR: doctor fix=runner_setup reason=go_missing" + if ! run_go_cmd run ./cmd/runner_setup --apply --repo "$repo"; then + echo "ERROR: doctor fix=runner_setup reason=runner_setup_failed" failed=1 fi online_count="$(gh api "repos/$repo/actions/runners" --jq '[.runners[] | select(.status=="online")] | length' 2>/dev/null || echo "0")" @@ -686,24 +800,18 @@ USAGE fi fi - if command -v go >/dev/null 2>&1; then - if go run ./cmd/runner_health >/dev/null 2>&1; then - echo "OK: doctor check=runner_health reason=ok" - else + if run_go_cmd run ./cmd/runner_health --repo-dir "$repo_dir" >/dev/null 2>&1; then + echo "OK: doctor check=runner_health reason=ok" + else + if [[ "$verbose" -eq 1 ]]; then echo "ERROR: doctor check=runner_health reason=failed" - [[ "$verbose" -eq 1 ]] && go run ./cmd/runner_health || true - failed=1 - fi - elif command -v mise >/dev/null 2>&1; then - if mise x -- go run ./cmd/runner_health >/dev/null 2>&1; then - echo "OK: doctor check=runner_health reason=ok" + run_go_cmd run ./cmd/runner_health --repo-dir "$repo_dir" || true else echo "ERROR: doctor check=runner_health reason=failed" - [[ "$verbose" -eq 1 ]] && mise x -- go run ./cmd/runner_health || true - failed=1 fi - else - echo "ERROR: doctor check=go reason=missing_go_and_mise" + if ! command -v go >/dev/null 2>&1 && ! command -v mise >/dev/null 2>&1; then + echo "ERROR: doctor check=go reason=missing_go_and_mise" + fi failed=1 fi @@ -802,6 +910,154 @@ default_remote_project_dir() { printf '%s\n' "~/dev/$name" } +default_local_project_dir() { + if [[ -n "$CONFIG_PROJECT_DIR" ]]; then + printf '%s\n' "$CONFIG_PROJECT_DIR" + return + fi + git rev-parse --show-toplevel 2>/dev/null || pwd +} + +sanitize_for_path_segment() { + local raw="$1" + local out + out="$(printf '%s' "$raw" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9._-' '-')" + out="${out#-}" + out="${out%-}" + [[ -z "$out" ]] && out="remote" + printf '%s\n' "$out" +} + +remote_path_for_shell() { + local path="$1" + if [[ "$path" == "~/"* ]]; then + printf '\$HOME/%s\n' "${path#~/}" + else + printf '%q\n' "$path" + fi +} + +run_remote_command_in_dir() { + local host="$1" + local project_dir="$2" + shift 2 + local remote_cmd_q + local script_q + local remote_script + local remote_cd_q + + remote_cmd_q="$(quote_words "$@")" + remote_cd_q="$(remote_path_for_shell "$project_dir")" + printf -v remote_script 'set -euo pipefail; cd %s; %s' "$remote_cd_q" "$remote_cmd_q" + script_q="$(quote_words "$remote_script")" + echo "OK: ssh host=$host dir=$project_dir cmd=$*" + ssh "$host" "bash -lc $script_q" +} + +first_existing_public_key() { + local key="" + for key in \ + "$HOME/.ssh/id_ed25519.pub" \ + "$HOME/.ssh/id_ecdsa.pub" \ + "$HOME/.ssh/id_rsa.pub"; do + if [[ -f "$key" ]]; then + printf '%s\n' "$key" + return 0 + fi + done + return 1 +} + +ensure_ssh_key_auth() { + local host="$1" + if ssh -o BatchMode=yes -o PasswordAuthentication=no -o KbdInteractiveAuthentication=no "$host" "true" >/dev/null 2>&1; then + echo "OK: ssh key_auth host=$host" + return 0 + fi + + echo "ERROR: ssh key-based auth failed for host=$host" >&2 + local pub_key="" + pub_key="$(first_existing_public_key || true)" + if [[ -n "$pub_key" ]]; then + echo "HINT: register your public key to remote ~/.ssh/authorized_keys" >&2 + echo "HINT: cat $pub_key | ssh $host 'mkdir -p ~/.ssh && chmod 700 ~/.ssh && cat >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys'" >&2 + else + echo "HINT: generate a key first: ssh-keygen -t ed25519 -a 100" >&2 + fi + return 1 +} + +ensure_remote_project_dir() { + local host="$1" + local project_dir="$2" + local remote_dir_q + local script_q + local remote_script + + remote_dir_q="$(remote_path_for_shell "$project_dir")" + printf -v remote_script 'set -euo pipefail; mkdir -p %s' "$remote_dir_q" + script_q="$(quote_words "$remote_script")" + echo "OK: ssh host=$host ensure_dir=$project_dir" + ssh "$host" "bash -lc $script_q" +} + +sync_local_project_to_remote() { + local local_dir="$1" + local host="$2" + local project_dir="$3" + echo "OK: rsync host=$host src=$local_dir dst=$project_dir" + rsync -az --delete \ + --exclude ".local/" \ + --exclude "out/" \ + --exclude "cache/" \ + --exclude ".DS_Store" \ + "$local_dir/" "$host:$project_dir/" +} + +fetch_remote_verify_artifacts() { + local host="$1" + local project_dir="$2" + local out_dir="$3" + + mkdir -p "$out_dir" "$out_dir/logs" + local failed=0 + + if rsync -a "$host:$project_dir/out/verify-full.status" "$out_dir/"; then + echo "OK: fetch status_file=$out_dir/verify-full.status" + else + echo "ERROR: fetch status_file failed host=$host path=$project_dir/out/verify-full.status" >&2 + failed=1 + fi + + if rsync -a "$host:$project_dir/out/logs/" "$out_dir/logs/"; then + echo "OK: fetch logs_dir=$out_dir/logs" + else + echo "ERROR: fetch logs failed host=$host path=$project_dir/out/logs/" >&2 + failed=1 + fi + + return "$failed" +} + +read_verify_status_file() { + local status_file="$1" + if [[ ! -f "$status_file" ]]; then + return 0 + fi + if grep -q "status=OK" "$status_file"; then + echo "OK" + return 0 + fi + if grep -q "status=ERROR" "$status_file"; then + echo "ERROR" + return 0 + fi + if grep -q "status=SKIP" "$status_file"; then + echo "SKIP" + return 0 + fi +} + run_remote_ci_self() { local host="$1" local project_dir="$2" @@ -825,6 +1081,139 @@ run_remote_ci_self() { ssh "$host" "bash -lc $script_q" } +cmd_remote_ci() { + local host="" + local project_dir="" + local local_dir="" + local out_dir="" + local remote_cli="ci-self" + local repo="" + local labels="" + local runner_name="" + local runner_group="" + local discord_webhook_url="" + local skip_bootstrap=0 + local no_sync=0 + local verify_dry_run=1 + local verify_gha_sync=1 + + while [[ $# -gt 0 ]]; do + case "$1" in + --host) host="${2:-}"; shift 2 ;; + --project-dir) project_dir="${2:-}"; shift 2 ;; + --local-dir) local_dir="${2:-}"; shift 2 ;; + --out-dir) out_dir="${2:-}"; shift 2 ;; + --remote-cli) remote_cli="${2:-}"; shift 2 ;; + --repo) repo="${2:-}"; shift 2 ;; + --labels) labels="${2:-}"; shift 2 ;; + --runner-name) runner_name="${2:-}"; shift 2 ;; + --runner-group) runner_group="${2:-}"; shift 2 ;; + --discord-webhook-url) discord_webhook_url="${2:-}"; shift 2 ;; + --verify-dry-run) verify_dry_run="$(config_bool_to_int "${2:-}")"; shift 2 ;; + --verify-gha-sync) verify_gha_sync="$(config_bool_to_int "${2:-}")"; shift 2 ;; + --skip-bootstrap) skip_bootstrap=1; shift ;; + --no-sync) no_sync=1; shift ;; + -h|--help) + cat <<'USAGE' +Usage: ci-self remote-ci --host [--project-dir path] [--local-dir path] [--out-dir path] + [--repo owner/repo] [--remote-cli path] + [--labels csv] [--runner-name name] [--runner-group name] + [--discord-webhook-url url] + [--verify-dry-run 0|1] [--verify-gha-sync 0|1] + [--skip-bootstrap] [--no-sync] +USAGE + return 0 + ;; + *) + echo "ERROR: unknown option for remote-ci: $1" >&2 + return 2 + ;; + esac + done + + [[ -z "$host" ]] && host="$CONFIG_REMOTE_HOST" + [[ "$remote_cli" == "ci-self" && -n "$CONFIG_REMOTE_CLI" ]] && remote_cli="$CONFIG_REMOTE_CLI" + [[ -z "$repo" && -n "$CONFIG_REPO" ]] && repo="$CONFIG_REPO" + [[ -z "$labels" && -n "$CONFIG_LABELS" ]] && labels="$CONFIG_LABELS" + [[ -z "$runner_name" && -n "$CONFIG_RUNNER_NAME" ]] && runner_name="$CONFIG_RUNNER_NAME" + [[ -z "$runner_group" && -n "$CONFIG_RUNNER_GROUP" ]] && runner_group="$CONFIG_RUNNER_GROUP" + [[ -z "$discord_webhook_url" && -n "$CONFIG_DISCORD_WEBHOOK_URL" ]] && discord_webhook_url="$CONFIG_DISCORD_WEBHOOK_URL" + + [[ -n "$host" ]] || { echo "ERROR: --host is required" >&2; return 2; } + [[ -z "$project_dir" ]] && project_dir="$(default_remote_project_dir)" + [[ -z "$local_dir" ]] && local_dir="$(default_local_project_dir)" + local_dir="$(expand_local_path "$local_dir")" + [[ -d "$local_dir" ]] || { echo "ERROR: --local-dir not found: $local_dir" >&2; return 2; } + + if [[ -z "$out_dir" ]]; then + out_dir="$local_dir/out/remote/$(sanitize_for_path_segment "$host")" + fi + out_dir="$(expand_local_path "$out_dir")" + + command -v ssh >/dev/null 2>&1 || { echo "ERROR: ssh command not found" >&2; return 1; } + command -v rsync >/dev/null 2>&1 || { echo "ERROR: rsync command not found" >&2; return 1; } + + ensure_ssh_key_auth "$host" + ensure_remote_project_dir "$host" "$project_dir" + + if [[ "$no_sync" -eq 1 ]]; then + echo "SKIP: sync reason=no_sync_flag" + else + sync_local_project_to_remote "$local_dir" "$host" "$project_dir" + fi + + if [[ "$skip_bootstrap" -eq 1 ]]; then + echo "SKIP: bootstrap reason=skip_bootstrap_flag" + elif [[ -z "$repo" ]]; then + echo "SKIP: bootstrap reason=repo_not_set" + else + local register_args=(register --repo "$repo" --repo-dir "$project_dir" --skip-workflow --skip-dispatch) + [[ -n "$labels" ]] && register_args+=(--labels "$labels") + [[ -n "$runner_name" ]] && register_args+=(--runner-name "$runner_name") + [[ -n "$runner_group" ]] && register_args+=(--runner-group "$runner_group") + [[ -n "$discord_webhook_url" ]] && register_args+=(--discord-webhook-url "$discord_webhook_url") + if ! run_remote_ci_self "$host" "$project_dir" "$remote_cli" "${register_args[@]}"; then + echo "WARN: bootstrap failed; continuing standalone verify" >&2 + fi + fi + + local sha="" + local ref="" + sha="$(git -C "$local_dir" rev-parse HEAD 2>/dev/null || true)" + ref="$(git -C "$local_dir" rev-parse --abbrev-ref HEAD 2>/dev/null || true)" + [[ "$ref" == "HEAD" ]] && ref="" + + local remote_verify_args=(env "VERIFY_DRY_RUN=$verify_dry_run" "VERIFY_GHA_SYNC=$verify_gha_sync" "GITHUB_ACTIONS=true") + [[ -n "$sha" ]] && remote_verify_args+=("GITHUB_SHA=$sha") + [[ -n "$ref" ]] && remote_verify_args+=("GITHUB_REF_NAME=$ref") + remote_verify_args+=(sh ops/ci/run_verify_full.sh) + + local verify_failed=0 + if ! run_remote_command_in_dir "$host" "$project_dir" "${remote_verify_args[@]}"; then + echo "ERROR: remote verify command failed" >&2 + verify_failed=1 + fi + + local fetch_failed=0 + if ! fetch_remote_verify_artifacts "$host" "$project_dir" "$out_dir"; then + fetch_failed=1 + fi + + local status_file="$out_dir/verify-full.status" + local verify_status="" + verify_status="$(read_verify_status_file "$status_file")" + if [[ -z "$verify_status" ]]; then + echo "ERROR: verify status missing in $status_file" >&2 + return 1 + fi + + echo "OK: remote-ci result status=$verify_status status_file=$status_file" + if [[ "$verify_failed" -eq 1 || "$fetch_failed" -eq 1 || "$verify_status" != "OK" ]]; then + return 1 + fi + return 0 +} + cmd_remote_register() { local host="" local project_dir="" @@ -1017,6 +1406,7 @@ main() { register) cmd_register "$@" ;; run-watch) cmd_run_watch "$@" ;; run-focus) cmd_run_watch --all-green --sync-pr-template "$@" ;; + remote-ci) cmd_remote_ci "$@" ;; remote-register) cmd_remote_register "$@" ;; remote-run-focus) cmd_remote_run_focus "$@" ;; remote-up) cmd_remote_up "$@" ;; diff --git a/ops/ci/ci_self_test.go b/ops/ci/ci_self_test.go index 195c206..f496ea4 100644 --- a/ops/ci/ci_self_test.go +++ b/ops/ci/ci_self_test.go @@ -1,6 +1,7 @@ package ci_test import ( + "fmt" "os" "os/exec" "path/filepath" @@ -32,12 +33,20 @@ func runCiSelf(t *testing.T, args ...string) (string, error) { return runCiSelfInDirEnv(t, ".", nil, args...) } +func writeFakeGH(t *testing.T, dir string, body string) { + t.Helper() + ghPath := filepath.Join(dir, "gh") + if err := os.WriteFile(ghPath, []byte(body), 0o755); err != nil { + t.Fatalf("failed to write fake gh: %v", err) + } +} + func TestHelpListsRemoteCommands(t *testing.T) { out, err := runCiSelf(t, "help") if err != nil { t.Fatalf("help failed: %v\noutput:\n%s", err, out) } - for _, want := range []string{"up", "focus", "doctor", "config-init", "remote-register", "remote-run-focus", "remote-up"} { + for _, want := range []string{"up", "focus", "doctor", "config-init", "remote-ci", "remote-register", "remote-run-focus", "remote-up"} { if !strings.Contains(out, want) { t.Fatalf("help output missing %q\noutput:\n%s", want, out) } @@ -151,3 +160,208 @@ func TestRemoteUpUsesConfigHost(t *testing.T) { t.Fatalf("expected config host in output\noutput:\n%s", out) } } + +func TestRunWatchResolvesVerifyWorkflowID(t *testing.T) { + tmp := t.TempDir() + logPath := filepath.Join(tmp, "gh.log") + fakeGH := fmt.Sprintf(`#!/usr/bin/env bash +set -euo pipefail +echo "$*" >> %q +if [[ "${1:-}" == "api" && "${2:-}" == "repos/mt4110/zt-gateway/actions/workflows" ]]; then + printf '42\t.github/workflows/verify.yaml\tverify\n' + exit 0 +fi +if [[ "${1:-}" == "workflow" && "${2:-}" == "run" ]]; then + exit 0 +fi +if [[ "${1:-}" == "run" && "${2:-}" == "list" ]]; then + echo "98765" + exit 0 +fi +if [[ "${1:-}" == "run" && "${2:-}" == "watch" ]]; then + exit 0 +fi +echo "unexpected gh args: $*" >&2 +exit 1 +`, logPath) + writeFakeGH(t, tmp, fakeGH) + + out, err := runCiSelfInDirEnv( + t, + tmp, + []string{"PATH=" + tmp + ":" + os.Getenv("PATH")}, + "run-watch", + "--repo", + "mt4110/zt-gateway", + "--ref", + "main", + ) + if err != nil { + t.Fatalf("run-watch failed: %v\noutput:\n%s", err, out) + } + + logBody, readErr := os.ReadFile(logPath) + if readErr != nil { + t.Fatalf("failed to read gh log: %v", readErr) + } + logText := string(logBody) + if !strings.Contains(logText, "workflow run 42 --ref main -R mt4110/zt-gateway") { + t.Fatalf("expected workflow run to use resolved id\nlog:\n%s", logText) + } + if !strings.Contains(logText, "run list --workflow 42 -R mt4110/zt-gateway") { + t.Fatalf("expected run list to use resolved id\nlog:\n%s", logText) + } +} + +func TestRunWatchMissingVerifyWorkflowShowsHint(t *testing.T) { + tmp := t.TempDir() + fakeGH := `#!/usr/bin/env bash +set -euo pipefail +if [[ "${1:-}" == "api" && "${2:-}" == "repos/mt4110/zt-gateway/actions/workflows" ]]; then + exit 0 +fi +echo "unexpected gh args: $*" >&2 +exit 1 +` + writeFakeGH(t, tmp, fakeGH) + + out, err := runCiSelfInDirEnv( + t, + tmp, + []string{"PATH=" + tmp + ":" + os.Getenv("PATH")}, + "run-watch", + "--repo", + "mt4110/zt-gateway", + "--ref", + "main", + ) + if err == nil { + t.Fatalf("expected run-watch failure when verify workflow is missing\noutput:\n%s", out) + } + if !strings.Contains(out, "ERROR: verify workflow not found in remote repo (mt4110/zt-gateway)") { + t.Fatalf("expected missing-workflow error\noutput:\n%s", out) + } + if !strings.Contains(out, "scaffold_verify_workflow.sh") { + t.Fatalf("expected scaffold hint in output\noutput:\n%s", out) + } +} + +func TestRemoteCIRequiresKeyAuth(t *testing.T) { + tmp := t.TempDir() + localDir := filepath.Join(tmp, "repo") + if err := os.MkdirAll(localDir, 0o755); err != nil { + t.Fatalf("mkdir local repo failed: %v", err) + } + + sshPath := filepath.Join(tmp, "ssh") + sshScript := `#!/usr/bin/env bash +set -euo pipefail +if [[ "$*" == *"BatchMode=yes"* ]]; then + exit 1 +fi +exit 0 +` + if err := os.WriteFile(sshPath, []byte(sshScript), 0o755); err != nil { + t.Fatalf("write fake ssh failed: %v", err) + } + + rsyncPath := filepath.Join(tmp, "rsync") + rsyncScript := "#!/usr/bin/env bash\nexit 0\n" + if err := os.WriteFile(rsyncPath, []byte(rsyncScript), 0o755); err != nil { + t.Fatalf("write fake rsync failed: %v", err) + } + + out, err := runCiSelfInDirEnv( + t, + tmp, + []string{"PATH=" + tmp + ":" + os.Getenv("PATH")}, + "remote-ci", + "--host", + "mini-user@192.168.1.9", + "--local-dir", + localDir, + "--project-dir", + "~/dev/zt-gateway", + "--skip-bootstrap", + ) + if err == nil { + t.Fatalf("expected key auth failure\noutput:\n%s", out) + } + if !strings.Contains(out, "ERROR: ssh key-based auth failed") { + t.Fatalf("expected key auth error output\noutput:\n%s", out) + } +} + +func TestRemoteCIRunsSyncVerifyAndFetch(t *testing.T) { + tmp := t.TempDir() + localDir := filepath.Join(tmp, "repo") + if err := os.MkdirAll(filepath.Join(localDir, "ops", "ci"), 0o755); err != nil { + t.Fatalf("mkdir local repo failed: %v", err) + } + if err := os.WriteFile(filepath.Join(localDir, "ops", "ci", "run_verify_full.sh"), []byte("#!/usr/bin/env sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write local verify script failed: %v", err) + } + + logPath := filepath.Join(tmp, "tool.log") + sshPath := filepath.Join(tmp, "ssh") + sshScript := fmt.Sprintf(`#!/usr/bin/env bash +set -euo pipefail +echo "ssh $*" >> %q +if [[ "$*" == *"BatchMode=yes"* ]]; then + exit 0 +fi +exit 0 +`, logPath) + if err := os.WriteFile(sshPath, []byte(sshScript), 0o755); err != nil { + t.Fatalf("write fake ssh failed: %v", err) + } + + rsyncPath := filepath.Join(tmp, "rsync") + rsyncScript := fmt.Sprintf(`#!/usr/bin/env bash +set -euo pipefail +echo "rsync $*" >> %q +src="${@: -2:1}" +dst="${@: -1}" +if printf '%%s' "$src" | grep -q '/out/verify-full.status$'; then + mkdir -p "$dst" + cat > "${dst%%/}/verify-full.status" <<'EOF' +status=OK +EOF + exit 0 +fi +if printf '%%s' "$src" | grep -q '/out/logs/$'; then + mkdir -p "${dst%%/}" + echo "ok" > "${dst%%/}/verify.log" + exit 0 +fi +exit 0 +`, logPath) + if err := os.WriteFile(rsyncPath, []byte(rsyncScript), 0o755); err != nil { + t.Fatalf("write fake rsync failed: %v", err) + } + + out, err := runCiSelfInDirEnv( + t, + tmp, + []string{"PATH=" + tmp + ":" + os.Getenv("PATH")}, + "remote-ci", + "--host", + "mini-user@192.168.1.9", + "--local-dir", + localDir, + "--project-dir", + "~/dev/zt-gateway", + "--skip-bootstrap", + ) + if err != nil { + t.Fatalf("remote-ci failed: %v\noutput:\n%s", err, out) + } + if !strings.Contains(out, "OK: remote-ci result status=OK") { + t.Fatalf("expected success status output\noutput:\n%s", out) + } + + statusPath := filepath.Join(localDir, "out", "remote", "mini-user-192.168.1.9", "verify-full.status") + if _, statErr := os.Stat(statusPath); statErr != nil { + t.Fatalf("expected fetched status file at %s: %v", statusPath, statErr) + } +} diff --git a/ops/ci/onboard_and_verify.sh b/ops/ci/onboard_and_verify.sh index 0594151..32a649c 100755 --- a/ops/ci/onboard_and_verify.sh +++ b/ops/ci/onboard_and_verify.sh @@ -34,8 +34,10 @@ cd "$ROOT_DIR" run_go() { if command -v go >/dev/null 2>&1; then - go "$@" - return + if go "$@"; then + return + fi + echo "WARN: go command failed; retrying via mise" >&2 fi if command -v mise >/dev/null 2>&1; then mise x -- go "$@" @@ -45,6 +47,24 @@ run_go() { exit 1 } +ensure_nix_on_path() { + if command -v nix >/dev/null 2>&1; then + return 0 + fi + + local daemon_profile="/nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh" + if [[ -f "$daemon_profile" ]]; then + # shellcheck disable=SC1091 + . "$daemon_profile" + fi + + if command -v nix >/dev/null 2>&1; then + return 0 + fi + + export PATH="/nix/var/nix/profiles/default/bin:/nix/var/nix/profiles/per-user/${USER:-$(id -un)}/profile/bin:$PATH" +} + REPO="" REPO_DIR="" REF="main" @@ -116,6 +136,14 @@ if [[ -z "$REPO" ]]; then exit 2 fi +if [[ -n "$REPO_DIR" ]]; then + if [[ ! -d "$REPO_DIR" ]]; then + echo "ERROR: --repo-dir not found: $REPO_DIR" >&2 + exit 2 + fi + REPO_DIR="$(cd "$REPO_DIR" && pwd)" +fi + echo "OK: target_repo=$REPO ref=$REF" if ! command -v gh >/dev/null 2>&1; then @@ -134,7 +162,23 @@ fi run_go run ./cmd/runner_setup "${runner_setup_args[@]}" echo "OK: runner_health_start" -run_go run ./cmd/runner_health +if [[ -n "$REPO_DIR" ]]; then + run_go run ./cmd/runner_health --repo-dir "$REPO_DIR" +else + run_go run ./cmd/runner_health +fi + +if [[ -n "$REPO_DIR" && -f "$REPO_DIR/flake.nix" ]]; then + ensure_nix_on_path + if ! command -v nix >/dev/null 2>&1; then + echo "ERROR: nix is required for flake-based repos (found: $REPO_DIR/flake.nix)" >&2 + echo "HINT: install nix first, then retry 'ci-self up'" >&2 + echo "HINT: if installed already, check /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh" >&2 + echo "HINT: https://nixos.org/download/" >&2 + exit 1 + fi + echo "OK: runner_health check=nix reason=available" +fi OWNER="$(gh repo view "$REPO" --json owner --jq .owner.login)" echo "OK: set_variable SELF_HOSTED_OWNER=$OWNER" diff --git a/ops/ci/scaffold_verify_workflow.sh b/ops/ci/scaffold_verify_workflow.sh index ac185b4..ef7f3d5 100755 --- a/ops/ci/scaffold_verify_workflow.sh +++ b/ops/ci/scaffold_verify_workflow.sh @@ -143,8 +143,16 @@ jobs: - name: Verify (Nix) shell: bash run: | + if ! command -v nix >/dev/null 2>&1 && [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then + # shellcheck disable=SC1091 + . /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh + fi + if ! command -v nix >/dev/null 2>&1; then + export PATH="/nix/var/nix/profiles/default/bin:/nix/var/nix/profiles/per-user/${USER:-$(id -un)}/profile/bin:$PATH" + fi if ! command -v nix >/dev/null 2>&1; then echo "ERROR: nix is required on this runner" + echo "HINT: install nix and/or ensure nix-daemon profile exists" exit 1 fi nix shell nixpkgs#go nixpkgs#uv nixpkgs#python312 -c env -u GOROOT -u GOTOOLDIR -u GOENV bash -c ' @@ -207,6 +215,12 @@ fi mkdir -p "$WORKFLOW_DIR" if [[ -f "$WORKFLOW_FILE" && "$FORCE" -ne 1 ]]; then echo "SKIP: $WORKFLOW_FILE already exists (use --force to overwrite)" + if [[ "$MODE" == "nix" ]]; then + if ! grep -Fq "nix-daemon.sh" "$WORKFLOW_FILE"; then + echo "WARN: existing verify.yml may not load nix in non-login shells" + echo "HINT: rerun with --force to refresh verify.yml template" + fi + fi if [[ "$UPDATE_GITIGNORE" -eq 1 ]]; then touch "$GITIGNORE_FILE" for entry in ".local/" "out/" "cache/"; do