Sync Knowledge #6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync Knowledge | |
| on: | |
| schedule: | |
| - cron: '0 8 * * 1-5' # Weekday mornings at 08:00 UTC | |
| workflow_dispatch: | |
| inputs: | |
| source_id: | |
| description: 'Sync a specific source by ID (leave empty for all)' | |
| type: string | |
| required: false | |
| default: '' | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| jobs: | |
| sync-repo-knowledge: | |
| runs-on: ubuntu-latest | |
| env: | |
| FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Sync sources | |
| id: sync | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| SOURCE_FILTER: ${{ inputs.source_id || '' }} | |
| WORKFLOW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| run: | | |
| set -euo pipefail | |
| CONFIG=".github/sync-sources.json" | |
| STATE=".github/sync-state.json" | |
| HAS_CHANGES="false" | |
| PR_TITLE="" | |
| PR_BODY="" | |
| # Read all source entries (or filter to one) | |
| if [ -n "$SOURCE_FILTER" ]; then | |
| sources=$(jq -c --arg id "$SOURCE_FILTER" '.sources[] | select(.id == $id)' "$CONFIG") | |
| else | |
| sources=$(jq -c '.sources[]' "$CONFIG") | |
| fi | |
| if [ -z "$sources" ]; then | |
| echo "No matching sources found." | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "$sources" | while IFS= read -r source; do | |
| id=$(echo "$source" | jq -r '.id') | |
| repo=$(echo "$source" | jq -r '.repo') | |
| branch=$(echo "$source" | jq -r '.branch') | |
| source_path=$(echo "$source" | jq -r '.sourcePath') | |
| target_path=$(echo "$source" | jq -r '.targetPath') | |
| generate_enabled=$(echo "$source" | jq -r '.generate.enabled // false') | |
| echo "::group::Processing source: $id" | |
| echo " repo=$repo branch=$branch" | |
| echo " source=$source_path -> target=$target_path" | |
| # --- Get latest commit SHA --- | |
| latest_sha=$(gh api "repos/$repo/commits/$branch" --jq '.sha') | |
| echo " Latest commit: $latest_sha" | |
| # --- Read last synced SHA --- | |
| last_sha=$(jq -r --arg id "$id" '.sources[$id].lastSyncedCommit // ""' "$STATE") | |
| echo " Last synced: ${last_sha:-<never>}" | |
| # --- Skip if unchanged --- | |
| if [ "$latest_sha" = "$last_sha" ]; then | |
| echo " No new commits. Skipping." | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| # --- Sparse checkout source repo --- | |
| src_dir=$(mktemp -d) | |
| echo " Cloning $repo (sparse: $source_path)..." | |
| git clone --filter=blob:none --sparse --branch "$branch" --depth 1 \ | |
| "https://github.com/$repo.git" "$src_dir" 2>/dev/null | |
| (cd "$src_dir" && git sparse-checkout set "$source_path") | |
| # --- Build rsync exclude args --- | |
| exclude_args="" | |
| for pattern in $(echo "$source" | jq -r '.exclude[]? // empty'); do | |
| exclude_args="$exclude_args --exclude=$pattern" | |
| done | |
| # --- Sync files --- | |
| mkdir -p "$target_path" | |
| rsync -av --delete $exclude_args "$src_dir/$source_path/" "$target_path/" | |
| rm -rf "$src_dir" | |
| # --- Get commit log --- | |
| commit_log="" | |
| if [ -n "$last_sha" ]; then | |
| echo " Fetching commit log ${last_sha:0:7}..${latest_sha:0:7}..." | |
| commit_log=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq '[.commits[] | "- [`\(.sha[0:7])`](\(.html_url)) \(.commit.message | split("\n") | .[0])"] | join("\n")' \ | |
| 2>/dev/null || echo " (Could not fetch commit log)") | |
| # Filter to commits that touch sourcePath | |
| changed_files=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq --arg sp "$source_path" '[.files[] | select(.filename | startswith($sp)) | .filename] | length' \ | |
| 2>/dev/null || echo "?") | |
| else | |
| commit_log="Initial sync from $repo ($branch)" | |
| changed_files="all" | |
| fi | |
| # --- Update sync state --- | |
| now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
| jq --arg id "$id" --arg sha "$latest_sha" --arg ts "$now" \ | |
| '.sources[$id] = {"lastSyncedCommit": $sha, "lastSyncedAt": $ts}' \ | |
| "$STATE" > "${STATE}.tmp" && mv "${STATE}.tmp" "$STATE" | |
| echo " Synced. Updated state to $latest_sha" | |
| # --- Build PR metadata --- | |
| # Count commits | |
| if [ -n "$last_sha" ]; then | |
| commit_count=$(gh api "repos/$repo/compare/${last_sha}...${latest_sha}" \ | |
| --jq '.commits | length' 2>/dev/null || echo "?") | |
| else | |
| commit_count="initial" | |
| fi | |
| # Save per-source PR info to temp files (for aggregation after loop) | |
| echo "$id" >> /tmp/sync_changed_ids | |
| { | |
| echo "## Source: \`$id\`" | |
| echo "" | |
| echo "**Repository:** [$repo](https://github.com/$repo) (branch: \`$branch\`)" | |
| echo "**Path:** \`$source_path\` → \`$target_path\`" | |
| echo "**Commits:** $commit_count new (${last_sha:0:7}..${latest_sha:0:7})" | |
| echo "**Files in source path changed:** $changed_files" | |
| echo "" | |
| echo "### Commit Log" | |
| echo "" | |
| echo "$commit_log" | |
| echo "" | |
| echo "---" | |
| } >> "/tmp/sync_pr_body_$id" | |
| echo "::endgroup::" | |
| done | |
| # --- Aggregate results --- | |
| if [ -f /tmp/sync_changed_ids ]; then | |
| changed_ids=$(cat /tmp/sync_changed_ids | tr '\n' ', ' | sed 's/,$//') | |
| count=$(wc -l < /tmp/sync_changed_ids | tr -d ' ') | |
| # Combine PR bodies | |
| full_body="# Knowledge Sync Report"$'\n\n' | |
| full_body+="**Synced sources:** $changed_ids"$'\n' | |
| full_body+="**Workflow run:** [View run]($WORKFLOW_RUN_URL)"$'\n\n' | |
| for id_file in /tmp/sync_pr_body_*; do | |
| full_body+=$(cat "$id_file") | |
| full_body+=$'\n' | |
| done | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| echo "pr_title=[Knowledge Sync] $changed_ids: $count source(s) updated" >> "$GITHUB_OUTPUT" | |
| # Write PR body to file (too long for env var) | |
| echo "$full_body" > /tmp/pr_body.md | |
| else | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| # --- Generate experts for sources that need it --- | |
| - name: Setup Node.js | |
| if: steps.sync.outputs.has_changes == 'true' | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '22' | |
| - name: Generate experts (if configured) | |
| if: steps.sync.outputs.has_changes == 'true' | |
| env: | |
| GH_TOKEN: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} | |
| SOURCE_FILTER: ${{ inputs.source_id || '' }} | |
| run: | | |
| set -euo pipefail | |
| CONFIG=".github/sync-sources.json" | |
| # Find sources with generate.enabled = true | |
| if [ -n "$SOURCE_FILTER" ]; then | |
| gen_sources=$(jq -c --arg id "$SOURCE_FILTER" \ | |
| '.sources[] | select(.id == $id and .generate.enabled == true)' "$CONFIG") | |
| else | |
| gen_sources=$(jq -c '.sources[] | select(.generate.enabled == true)' "$CONFIG") | |
| fi | |
| if [ -z "$gen_sources" ]; then | |
| echo "No sources require expert generation." | |
| exit 0 | |
| fi | |
| # Install Copilot CLI | |
| echo "Installing GitHub Copilot CLI..." | |
| npm install -g @github/copilot | |
| echo "$gen_sources" | while IFS= read -r source; do | |
| id=$(echo "$source" | jq -r '.id') | |
| target_path=$(echo "$source" | jq -r '.targetPath') | |
| template_path=$(echo "$source" | jq -r '.generate.templatePath') | |
| prompt_path=$(echo "$source" | jq -r '.generate.promptPath') | |
| output_path=$(echo "$source" | jq -r '.generate.outputPath') | |
| skill_name=$(echo "$source" | jq -r '.generate.skillName // .id') | |
| echo "::group::Generating experts for: $id" | |
| # Read prompt template and perform variable substitution | |
| prompt_template=$(cat "$prompt_path") | |
| # List synced source files for context | |
| source_files=$(find "$target_path" -type f -name '*.md' | head -200 | sort) | |
| # Substitute template variables | |
| prompt="${prompt_template//\{\{SOURCE_PATH\}\}/$target_path}" | |
| prompt="${prompt//\{\{OUTPUT_PATH\}\}/$output_path}" | |
| prompt="${prompt//\{\{TEMPLATE_PATH\}\}/$template_path}" | |
| prompt="${prompt//\{\{SKILL_NAME\}\}/$skill_name}" | |
| # Append file listing | |
| prompt="$prompt"$'\n\n'"## Source files found:"$'\n'"$source_files" | |
| echo " Running Copilot CLI to generate experts..." | |
| mkdir -p "$output_path" | |
| # Run Copilot CLI — graceful degradation on failure | |
| copilot -p "$prompt" --allow-all-tools 2>&1 || { | |
| echo "::warning::Copilot CLI failed for $id. PR will include synced content only." | |
| } | |
| echo "::endgroup::" | |
| done | |
| # --- Assess and update derived files --- | |
| - name: Assess derived files | |
| id: derived | |
| if: steps.sync.outputs.has_changes == 'true' | |
| env: | |
| GH_TOKEN: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} | |
| HAS_PAT: ${{ secrets.PAT && 'true' || 'false' }} | |
| SOURCE_FILTER: ${{ inputs.source_id || '' }} | |
| run: | | |
| set -euo pipefail | |
| CONFIG=".github/sync-sources.json" | |
| PROMPT_TEMPLATE=".github/templates/assess-derived-file-prompt.md" | |
| VALIDATE_SCRIPT=".github/scripts/validate-derived.sh" | |
| chmod +x "$VALIDATE_SCRIPT" | |
| # Collect sources that had changes (written by sync step) | |
| if [ ! -f /tmp/sync_changed_ids ]; then | |
| echo "No changed sources. Skipping derived file assessment." | |
| exit 0 | |
| fi | |
| # Check if Copilot CLI is available (requires PAT) | |
| HAS_COPILOT="false" | |
| if [ "$HAS_PAT" = "true" ]; then | |
| if command -v copilot &>/dev/null; then | |
| HAS_COPILOT="true" | |
| else | |
| echo "Installing GitHub Copilot CLI..." | |
| npm install -g @github/copilot 2>/dev/null && HAS_COPILOT="true" || true | |
| fi | |
| fi | |
| # Initialize derived files report | |
| echo "## Derived Files" > /tmp/derived_report.md | |
| echo "" >> /tmp/derived_report.md | |
| echo "| File | Status | Details |" >> /tmp/derived_report.md | |
| echo "|------|--------|---------|" >> /tmp/derived_report.md | |
| derived_updated="false" | |
| while IFS= read -r changed_id; do | |
| # Get derivedFiles for this source | |
| derived_files=$(jq -c --arg id "$changed_id" \ | |
| '.sources[] | select(.id == $id) | .derivedFiles[]?' "$CONFIG") | |
| [ -z "$derived_files" ] && continue | |
| echo "$derived_files" | while IFS= read -r df; do | |
| df_path=$(echo "$df" | jq -r '.path') | |
| df_desc=$(echo "$df" | jq -r '.description') | |
| context_paths=$(echo "$df" | jq -r '.contextPaths[]') | |
| echo "::group::Assessing derived file: $df_path" | |
| if [ ! -f "$df_path" ]; then | |
| echo " Derived file does not exist. Skipping." | |
| echo "| \`$df_path\` | ⏭️ Skipped | File does not exist |" >> /tmp/derived_report.md | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| if [ "$HAS_COPILOT" != "true" ]; then | |
| echo " Copilot CLI not available. Skipping assessment." | |
| echo "| \`$df_path\` | ⏭️ Skipped | Copilot CLI not available (set PAT secret) |" >> /tmp/derived_report.md | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| # Build directory listing from contextPaths | |
| dir_listing="" | |
| for ctx in $context_paths; do | |
| if [ -d "$ctx" ]; then | |
| dir_listing+="$ctx/"$'\n' | |
| dir_listing+=$(find "$ctx" -type f -name '*.md' | sort | sed "s|^| |") | |
| dir_listing+=$'\n' | |
| fi | |
| done | |
| # Get changed files from git diff (staged + unstaged) | |
| changed_files_list=$(git diff --name-only HEAD -- 2>/dev/null || echo "(initial sync)") | |
| # Read current derived file | |
| current_content=$(cat "$df_path") | |
| # Build prompt from template | |
| prompt=$(cat "$PROMPT_TEMPLATE") | |
| prompt="${prompt//\{\{DERIVED_PATH\}\}/$df_path}" | |
| prompt="${prompt//\{\{DESCRIPTION\}\}/$df_desc}" | |
| prompt="${prompt//\{\{CURRENT_CONTENT\}\}/$current_content}" | |
| prompt="${prompt//\{\{DIR_LISTING\}\}/$dir_listing}" | |
| prompt="${prompt//\{\{CHANGED_FILES\}\}/$changed_files_list}" | |
| # Back up current file | |
| cp "$df_path" "${df_path}.bak" | |
| # Run Copilot CLI for assessment | |
| echo " Running Copilot CLI assessment..." | |
| copilot_output=$(copilot -p "$prompt" --allow-all-tools 2>&1) || { | |
| echo "::warning::Copilot CLI failed for $df_path. Keeping existing file." | |
| echo "| \`$df_path\` | ⚠️ Error | Copilot CLI failed |" >> /tmp/derived_report.md | |
| rm -f "${df_path}.bak" | |
| echo "::endgroup::" | |
| continue | |
| } | |
| # Parse verdict | |
| if echo "$copilot_output" | grep -q "VERDICT: COMPATIBLE"; then | |
| echo " Derived file is compatible. No changes needed." | |
| echo "| \`$df_path\` | ✅ Compatible | No changes needed |" >> /tmp/derived_report.md | |
| rm -f "${df_path}.bak" | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| # Extract regenerated content | |
| if echo "$copilot_output" | grep -q "REGENERATED_FILE_START"; then | |
| regen_content=$(echo "$copilot_output" | sed -n '/REGENERATED_FILE_START/,/REGENERATED_FILE_END/p' | sed '1d;$d') | |
| if [ -z "$regen_content" ]; then | |
| echo "::warning::Empty regeneration output for $df_path. Keeping existing file." | |
| echo "| \`$df_path\` | ⚠️ Error | Empty regeneration output |" >> /tmp/derived_report.md | |
| mv "${df_path}.bak" "$df_path" | |
| echo "::endgroup::" | |
| continue | |
| fi | |
| # Write regenerated content | |
| echo "$regen_content" > "$df_path" | |
| # Validate | |
| if bash "$VALIDATE_SCRIPT" "$df_path"; then | |
| echo " Regenerated and validated successfully." | |
| # Get a short summary of changes | |
| diff_stat=$(diff --brief "${df_path}.bak" "$df_path" 2>/dev/null && echo "identical" || echo "updated") | |
| echo "| \`$df_path\` | ✅ Updated | Regenerated and validated |" >> /tmp/derived_report.md | |
| rm -f "${df_path}.bak" | |
| echo "true" > /tmp/derived_updated | |
| else | |
| echo "::warning::Validation failed for regenerated $df_path. Reverting to previous version." | |
| mv "${df_path}.bak" "$df_path" | |
| echo "| \`$df_path\` | ⚠️ Kept old | Validation failed on regenerated version |" >> /tmp/derived_report.md | |
| fi | |
| else | |
| echo " Verdict was INCOMPATIBLE but no regenerated content found." | |
| echo "| \`$df_path\` | ⚠️ Kept old | Incompatible but regeneration not produced |" >> /tmp/derived_report.md | |
| mv "${df_path}.bak" "$df_path" | |
| fi | |
| echo "::endgroup::" | |
| done | |
| done < /tmp/sync_changed_ids | |
| # Append derived report to PR body | |
| if [ -f /tmp/pr_body.md ] && [ -f /tmp/derived_report.md ]; then | |
| echo "" >> /tmp/pr_body.md | |
| cat /tmp/derived_report.md >> /tmp/pr_body.md | |
| fi | |
| # Safety cleanup: remove any stray .bak files | |
| find . -name '*.bak' -delete 2>/dev/null || true | |
| # --- Create Pull Request --- | |
| - name: Create Pull Request | |
| if: steps.sync.outputs.has_changes == 'true' | |
| uses: peter-evans/create-pull-request@v7 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| commit-message: 'sync: update knowledge from upstream sources' | |
| branch: sync/knowledge | |
| delete-branch: true | |
| title: ${{ steps.sync.outputs.pr_title }} | |
| body-path: /tmp/pr_body.md | |
| labels: knowledge-sync |