Sync Knowledge #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync Knowledge | |
| on: | |
| schedule: | |
| - cron: '0 8 * * 1-5' # Weekday mornings at 08:00 UTC | |
| workflow_dispatch: | |
| inputs: | |
| source_id: | |
| description: 'Sync a specific source by ID (leave empty for all)' | |
| type: string | |
| required: false | |
| default: '' | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| jobs: | |
| sync-repo-knowledge: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Sync sources | |
| id: sync | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| SOURCE_FILTER: ${{ inputs.source_id || '' }} | |
| WORKFLOW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| run: | | |
| set -euo pipefail | |
| CONFIG=".github/sync-sources.json" | |
| STATE=".github/sync-state.json" | |
| HAS_CHANGES="false" | |
| PR_TITLE="" | |
| PR_BODY="" | |
| # Read all source entries (or filter to one) | |
| if [ -n "$SOURCE_FILTER" ]; then | |
| sources=$(jq -c --arg id "$SOURCE_FILTER" '.sources[] | select(.id == $id)' "$CONFIG") | |
| else | |
| sources=$(jq -c '.sources[]' "$CONFIG") | |
| fi | |
| if [ -z "$sources" ]; then | |
| echo "No matching sources found." | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "$sources" | while IFS= read -r source; do | |
| id=$(echo "$source" | jq -r '.id') | |
| repo=$(echo "$source" | jq -r '.repo') | |
| branch=$(echo "$source" | jq -r '.branch') | |
| source_path=$(echo "$source" | jq -r '.sourcePath') | |
| target_path=$(echo "$source" | jq -r '.targetPath') | |
| generate_enabled=$(echo "$source" | jq -r '.generate.enabled // false') | |
| echo "::group::Processing source: $id" | |
| echo " repo=$repo branch=$branch" | |
| echo " source=$source_path -> target=$target_path" | |
| # --- Get latest commit SHA --- | |
| latest_sha=$(gh api "repos/$repo/commits/$branch" --jq '.sha') | |
| echo " Latest commit: $latest_sha" | |
| # --- Read last synced SHA --- | |
| last_sha=$(jq -r --arg id "$id" '.sources[$id].lastSyncedCommit // ""' "$STATE") | |
| echo " Last synced: ${last_sha:-<never>}" | |
| # --- Skip if unchanged --- | |
| if [ "$latest_sha" = "$last_sha" ]; then | |
| echo " No new commits. Skipping." | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| # --- Sparse checkout source repo --- | |
| src_dir=$(mktemp -d) | |
| echo " Cloning $repo (sparse: $source_path)..." | |
| git clone --filter=blob:none --sparse --branch "$branch" --depth 1 \ | |
| "https://github.com/$repo.git" "$src_dir" 2>/dev/null | |
| (cd "$src_dir" && git sparse-checkout set "$source_path") | |
| # --- Build rsync exclude args --- | |
| exclude_args="" | |
| for pattern in $(echo "$source" | jq -r '.exclude[]? // empty'); do | |
| exclude_args="$exclude_args --exclude=$pattern" | |
| done | |
| # --- Sync files --- | |
| mkdir -p "$target_path" | |
| rsync -av --delete $exclude_args "$src_dir/$source_path/" "$target_path/" | |
| rm -rf "$src_dir" | |
| # --- Get commit log --- | |
| commit_log="" | |
| if [ -n "$last_sha" ]; then | |
| echo " Fetching commit log ${last_sha:0:7}..${latest_sha:0:7}..." | |
| commit_log=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq '[.commits[] | "- [`\(.sha[0:7])`](\(.html_url)) \(.commit.message | split("\n") | .[0])"] | join("\n")' \ | |
| 2>/dev/null || echo " (Could not fetch commit log)") | |
| # Filter to commits that touch sourcePath | |
| changed_files=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq --arg sp "$source_path" '[.files[] | select(.filename | startswith($sp)) | .filename] | length' \ | |
| 2>/dev/null || echo "?") | |
| else | |
| commit_log="Initial sync from $repo ($branch)" | |
| changed_files="all" | |
| fi | |
| # --- Update sync state --- | |
| now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
| jq --arg id "$id" --arg sha "$latest_sha" --arg ts "$now" \ | |
| '.sources[$id] = {"lastSyncedCommit": $sha, "lastSyncedAt": $ts}' \ | |
| "$STATE" > "${STATE}.tmp" && mv "${STATE}.tmp" "$STATE" | |
| echo " Synced. Updated state to $latest_sha" | |
| # --- Build PR metadata --- | |
| # Count commits | |
| if [ -n "$last_sha" ]; then | |
| commit_count=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq '.commits | length' 2>/dev/null || echo "?") | |
| else | |
| commit_count="initial" | |
| fi | |
| # Save per-source PR info to temp files (for aggregation after loop) | |
| echo "$id" >> /tmp/sync_changed_ids | |
| { | |
| echo "## Source: \`$id\`" | |
| echo "" | |
| echo "**Repository:** [$repo](https://github.com/$repo) (branch: \`$branch\`)" | |
| echo "**Path:** \`$source_path\` → \`$target_path\`" | |
| echo "**Commits:** $commit_count new (${last_sha:0:7}..${latest_sha:0:7})" | |
| echo "**Files in source path changed:** $changed_files" | |
| echo "" | |
| echo "### Commit Log" | |
| echo "" | |
| echo "$commit_log" | |
| echo "" | |
| echo "---" | |
| } >> "/tmp/sync_pr_body_$id" | |
| echo "::endgroup::" | |
| done | |
| # --- Aggregate results --- | |
| if [ -f /tmp/sync_changed_ids ]; then | |
| changed_ids=$(cat /tmp/sync_changed_ids | tr '\n' ', ' | sed 's/,$//') | |
| count=$(wc -l < /tmp/sync_changed_ids | tr -d ' ') | |
| # Combine PR bodies | |
| full_body="# Knowledge Sync Report"$'\n\n' | |
| full_body+="**Synced sources:** $changed_ids"$'\n' | |
| full_body+="**Workflow run:** [View run]($WORKFLOW_RUN_URL)"$'\n\n' | |
| for id_file in /tmp/sync_pr_body_*; do | |
| full_body+=$(cat "$id_file") | |
| full_body+=$'\n' | |
| done | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| echo "pr_title=[Knowledge Sync] $changed_ids: $count source(s) updated" >> "$GITHUB_OUTPUT" | |
| # Write PR body to file (too long for env var) | |
| echo "$full_body" > /tmp/pr_body.md | |
| else | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| # --- Generate experts for sources that need it --- | |
| - name: Setup Node.js | |
| if: steps.sync.outputs.has_changes == 'true' | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '22' | |
| - name: Generate experts (if configured) | |
| if: steps.sync.outputs.has_changes == 'true' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| COPILOT_GITHUB_TOKEN: ${{ secrets.PAT }} | |
| SOURCE_FILTER: ${{ inputs.source_id || '' }} | |
| run: | | |
| set -euo pipefail | |
| CONFIG=".github/sync-sources.json" | |
| # Find sources with generate.enabled = true | |
| if [ -n "$SOURCE_FILTER" ]; then | |
| gen_sources=$(jq -c --arg id "$SOURCE_FILTER" \ | |
| '.sources[] | select(.id == $id and .generate.enabled == true)' "$CONFIG") | |
| else | |
| gen_sources=$(jq -c '.sources[] | select(.generate.enabled == true)' "$CONFIG") | |
| fi | |
| if [ -z "$gen_sources" ]; then | |
| echo "No sources require expert generation." | |
| exit 0 | |
| fi | |
| # Install Copilot CLI | |
| echo "Installing GitHub Copilot CLI..." | |
| npm install -g @github/copilot | |
| echo "$gen_sources" | while IFS= read -r source; do | |
| id=$(echo "$source" | jq -r '.id') | |
| target_path=$(echo "$source" | jq -r '.targetPath') | |
| template_path=$(echo "$source" | jq -r '.generate.templatePath') | |
| prompt_path=$(echo "$source" | jq -r '.generate.promptPath') | |
| output_path=$(echo "$source" | jq -r '.generate.outputPath') | |
| skill_name=$(echo "$source" | jq -r '.generate.skillName // .id') | |
| echo "::group::Generating experts for: $id" | |
| # Read prompt template and perform variable substitution | |
| prompt_template=$(cat "$prompt_path") | |
| # List synced source files for context | |
| source_files=$(find "$target_path" -type f -name '*.md' | head -200 | sort) | |
| # Substitute template variables | |
| prompt="${prompt_template//\{\{SOURCE_PATH\}\}/$target_path}" | |
| prompt="${prompt//\{\{OUTPUT_PATH\}\}/$output_path}" | |
| prompt="${prompt//\{\{TEMPLATE_PATH\}\}/$template_path}" | |
| prompt="${prompt//\{\{SKILL_NAME\}\}/$skill_name}" | |
| # Append file listing | |
| prompt="$prompt"$'\n\n'"## Source files found:"$'\n'"$source_files" | |
| echo " Running Copilot CLI to generate experts..." | |
| mkdir -p "$output_path" | |
| # Run Copilot CLI — graceful degradation on failure | |
| copilot --no-custom-instructions --no-ask-user --allow-all \ | |
| --prompt "$prompt" 2>&1 || { | |
| echo "::warning::Copilot CLI failed for $id. PR will include synced content only." | |
| } | |
| echo "::endgroup::" | |
| done | |
| # --- Create Pull Request --- | |
| - name: Create Pull Request | |
| if: steps.sync.outputs.has_changes == 'true' | |
| uses: peter-evans/create-pull-request@v7 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| commit-message: 'sync: update knowledge from upstream sources' | |
| branch: sync/knowledge | |
| delete-branch: true | |
| title: ${{ steps.sync.outputs.pr_title }} | |
| body-path: /tmp/pr_body.md | |
| labels: knowledge-sync |