From 58f88604ff36c7c2615969345ba47e7593fb94a9 Mon Sep 17 00:00:00 2001 From: xrendan Date: Thu, 2 Oct 2025 17:01:45 -0600 Subject: [PATCH 01/10] Fix PostgreSQL client version mismatch - Install PostgreSQL 17 client to match server version - Uses official PostgreSQL APT repository - Fixes pg_dump version mismatch error --- .github/workflows/database-dump.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/database-dump.yml b/.github/workflows/database-dump.yml index 11d5d72..74f3cef 100644 --- a/.github/workflows/database-dump.yml +++ b/.github/workflows/database-dump.yml @@ -14,10 +14,14 @@ jobs: actions: write steps: - - name: Set up PostgreSQL client + - name: Set up PostgreSQL 17 client run: | + # Add PostgreSQL official APT repository + sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - sudo apt-get update - sudo apt-get install -y postgresql-client + # Install PostgreSQL 17 client specifically + sudo apt-get install -y postgresql-client-17 - name: Create dump directory run: mkdir -p database_dumps From eb0a42a553934f73a157e125201b982edaf09387 Mon Sep 17 00:00:00 2001 From: xrendan Date: Thu, 2 Oct 2025 18:11:39 -0600 Subject: [PATCH 02/10] Use actions/upload-artifact instead of GitHub API - Simplifies artifact upload using official GitHub Action - More reliable than direct API calls - Maintains same functionality with cleaner implementation --- .github/workflows/database-dump.yml | 60 +++-------------------------- 1 file changed, 6 insertions(+), 54 deletions(-) diff --git a/.github/workflows/database-dump.yml b/.github/workflows/database-dump.yml index 74f3cef..9113401 100644 --- a/.github/workflows/database-dump.yml +++ b/.github/workflows/database-dump.yml @@ -52,60 +52,12 @@ jobs: echo "DUMP_FILE=${DUMP_FILE}.gz" >> $GITHUB_ENV echo "Dump created: ${DUMP_FILE}.gz" - - name: Upload dump as artifact using GitHub API - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - # Get the dump file name - DUMP_FILE_NAME=$(basename "$DUMP_FILE") - - # Create a unique artifact name with timestamp - ARTIFACT_NAME="database-dump-$(date +%Y%m%d-%H%M%S)" - - # Get workflow run ID - RUN_ID="${{ github.run_id }}" - - # Create artifact upload - echo "Creating artifact upload..." - UPLOAD_RESPONSE=$(curl -L \ - -X POST \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $GITHUB_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/${{ github.repository }}/actions/runs/${RUN_ID}/artifacts" \ - -d "{\"name\":\"${ARTIFACT_NAME}\", \"retention_days\": 30}") - - # Extract upload URL and other details - UPLOAD_URL=$(echo "$UPLOAD_RESPONSE" | jq -r '.upload_url') - ARTIFACT_ID=$(echo "$UPLOAD_RESPONSE" | jq -r '.id') - - if [ "$UPLOAD_URL" = "null" ] || [ -z "$UPLOAD_URL" ]; then - echo "Failed to create artifact upload" - echo "Response: $UPLOAD_RESPONSE" - exit 1 - fi - - # Upload the file - echo "Uploading dump file..." - curl -L \ - -X PUT \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $GITHUB_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - -H "Content-Type: application/gzip" \ - --data-binary "@$DUMP_FILE" \ - "$UPLOAD_URL" - - # Finalize the artifact - echo "Finalizing artifact..." - curl -L \ - -X POST \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $GITHUB_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/${{ github.repository }}/actions/artifacts/${ARTIFACT_ID}/finalize" - - echo "Database dump uploaded as artifact: ${ARTIFACT_NAME}" + - name: Upload dump as artifact + uses: actions/upload-artifact@v4 + with: + name: database-dump-${{ github.run_number }}-${{ github.run_attempt }} + path: ${{ env.DUMP_FILE }} + retention-days: 30 - name: Clean up old artifacts env: From 17bf16dec7fe28c7baa433da47160a2cc9f7fd55 Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 11:53:22 -0600 Subject: [PATCH 03/10] Switch to PostgreSQL archive format and add restore rake task - Use pg_dump custom format (.dump) instead of plain SQL - Add rake db:fetch_and_restore to download and restore latest dump - Add rake db:restore to restore from a specific dump file - Add rake db:list_dumps to list available dump artifacts - Requires GITHUB_TOKEN or GITHUB_PAT environment variable --- .claude/settings.local.json | 10 ++ .github/workflows/database-dump.yml | 13 +- lib/tasks/database_restore.rake | 228 ++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 lib/tasks/database_restore.rake diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..3247245 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,10 @@ +{ + "permissions": { + "allow": [ + "Bash(git add:*)", + "Bash(git commit:*)", + "Bash(git push:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/.github/workflows/database-dump.yml b/.github/workflows/database-dump.yml index 9113401..1fae6f7 100644 --- a/.github/workflows/database-dump.yml +++ b/.github/workflows/database-dump.yml @@ -37,20 +37,19 @@ jobs: export PGUSER=$(echo $DATABASE_URL | sed -E 's/postgres:\/\/([^:]+):.*/\1/') export PGPASSWORD=$(echo $DATABASE_URL | sed -E 's/postgres:\/\/[^:]+:([^@]+)@.*/\1/') - # Create dump excluding users table - DUMP_FILE="database_dumps/db_dump_$(date +%Y%m%d_%H%M%S).sql" + # Create dump excluding users table in custom archive format + DUMP_FILE="database_dumps/db_dump_$(date +%Y%m%d_%H%M%S).dump" - # Dump schema and data, excluding the users table + # Dump schema and data in custom format, excluding the users table pg_dump --no-owner --no-privileges \ + --format=custom \ --exclude-table=users \ --exclude-table=schema_migrations \ --exclude-table=ar_internal_metadata \ -f "$DUMP_FILE" - # Compress the dump - gzip "$DUMP_FILE" - echo "DUMP_FILE=${DUMP_FILE}.gz" >> $GITHUB_ENV - echo "Dump created: ${DUMP_FILE}.gz" + echo "DUMP_FILE=${DUMP_FILE}" >> $GITHUB_ENV + echo "Dump created: ${DUMP_FILE}" - name: Upload dump as artifact uses: actions/upload-artifact@v4 diff --git a/lib/tasks/database_restore.rake b/lib/tasks/database_restore.rake new file mode 100644 index 0000000..0794cfa --- /dev/null +++ b/lib/tasks/database_restore.rake @@ -0,0 +1,228 @@ +namespace :db do + desc "Fetch latest database dump from GitHub artifacts and restore it" + task :fetch_and_restore => :environment do + require 'net/http' + require 'json' + require 'fileutils' + require 'open3' + + # GitHub API configuration + github_token = ENV['GITHUB_TOKEN'] || ENV['GITHUB_PAT'] + repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' + + if github_token.nil? || github_token.empty? + puts "Error: GITHUB_TOKEN or GITHUB_PAT environment variable is required" + exit 1 + end + + puts "Fetching latest database dump artifact..." + + # Get list of artifacts + uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts") + uri.query = URI.encode_www_form(per_page: 100) + + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + + request = Net::HTTP::Get.new(uri) + request['Accept'] = 'application/vnd.github+json' + request['Authorization'] = "Bearer #{github_token}" + request['X-GitHub-Api-Version'] = '2022-11-28' + + response = http.request(request) + + if response.code != '200' + puts "Error fetching artifacts: #{response.code} #{response.body}" + exit 1 + end + + artifacts = JSON.parse(response.body)['artifacts'] + + # Find the latest database dump artifact + dump_artifacts = artifacts.select { |a| a['name'].start_with?('database-dump-') } + + if dump_artifacts.empty? + puts "No database dump artifacts found" + exit 1 + end + + latest_artifact = dump_artifacts.max_by { |a| DateTime.parse(a['created_at']) } + + puts "Found artifact: #{latest_artifact['name']} (created: #{latest_artifact['created_at']})" + + # Download the artifact + download_uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts/#{latest_artifact['id']}/zip") + + request = Net::HTTP::Get.new(download_uri) + request['Accept'] = 'application/vnd.github+json' + request['Authorization'] = "Bearer #{github_token}" + request['X-GitHub-Api-Version'] = '2022-11-28' + + response = http.request(request) + + if response.code != '200' + puts "Error downloading artifact: #{response.code} #{response.body}" + exit 1 + end + + # Save the artifact + temp_dir = Rails.root.join('tmp', 'database_restore') + FileUtils.mkdir_p(temp_dir) + + zip_file = temp_dir.join('artifact.zip') + File.open(zip_file, 'wb') do |file| + file.write(response.body) + end + + puts "Downloaded artifact to #{zip_file}" + + # Extract the zip file + system("unzip -o #{zip_file} -d #{temp_dir}") or raise "Failed to extract artifact" + + # Find the dump file + dump_file = Dir.glob(temp_dir.join('*.dump')).first + + if dump_file.nil? + puts "No dump file found in artifact" + exit 1 + end + + puts "Found dump file: #{dump_file}" + + # Restore the database + Rake::Task['db:restore'].invoke(dump_file) + + # Cleanup + FileUtils.rm_rf(temp_dir) + + puts "Database restore complete!" + end + + desc "Restore database from a dump file" + task :restore, [:dump_file] => :environment do |t, args| + dump_file = args[:dump_file] + + if dump_file.nil? || !File.exist?(dump_file) + puts "Error: Dump file not found: #{dump_file}" + exit 1 + end + + # Confirm before proceeding + unless ENV['SKIP_CONFIRMATION'] == 'true' + puts "\nWARNING: This will restore the database from #{dump_file}" + puts "This will DROP and recreate all tables except 'users', 'schema_migrations', and 'ar_internal_metadata'" + puts "Are you sure? Type 'yes' to continue:" + + confirmation = STDIN.gets.chomp + unless confirmation.downcase == 'yes' + puts "Aborted" + exit 0 + end + end + + # Get database configuration + db_config = Rails.configuration.database_configuration[Rails.env] + + # Build pg_restore command + pg_restore_cmd = ['pg_restore'] + + # Connection parameters + pg_restore_cmd << "--host=#{db_config['host']}" if db_config['host'] + pg_restore_cmd << "--port=#{db_config['port']}" if db_config['port'] + pg_restore_cmd << "--username=#{db_config['username']}" if db_config['username'] + pg_restore_cmd << "--dbname=#{db_config['database']}" + + # Restore options + pg_restore_cmd << '--clean' # Clean (drop) database objects before recreating + pg_restore_cmd << '--if-exists' # Use IF EXISTS when dropping objects + pg_restore_cmd << '--no-owner' # Don't set ownership + pg_restore_cmd << '--no-privileges' # Don't restore access privileges + pg_restore_cmd << '--verbose' # Verbose output + + # The dump already excludes users, schema_migrations, and ar_internal_metadata + # so we don't need to exclude them again + + pg_restore_cmd << dump_file + + # Set PGPASSWORD environment variable if password is provided + env = {} + env['PGPASSWORD'] = db_config['password'] if db_config['password'] + + puts "Restoring database from #{dump_file}..." + + # Execute pg_restore + stdout, stderr, status = Open3.capture3(env, *pg_restore_cmd.map(&:to_s)) + + if status.success? + puts "Database restored successfully!" + else + puts "Error restoring database:" + puts stderr + exit 1 + end + + # Run any pending migrations that might have been added since the dump + puts "Running pending migrations..." + Rake::Task['db:migrate'].invoke + end + + desc "List available database dump artifacts" + task :list_dumps => :environment do + require 'net/http' + require 'json' + + # GitHub API configuration + github_token = ENV['GITHUB_TOKEN'] || ENV['GITHUB_PAT'] + repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' + + if github_token.nil? || github_token.empty? + puts "Error: GITHUB_TOKEN or GITHUB_PAT environment variable is required" + exit 1 + end + + # Get list of artifacts + uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts") + uri.query = URI.encode_www_form(per_page: 100) + + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + + request = Net::HTTP::Get.new(uri) + request['Accept'] = 'application/vnd.github+json' + request['Authorization'] = "Bearer #{github_token}" + request['X-GitHub-Api-Version'] = '2022-11-28' + + response = http.request(request) + + if response.code != '200' + puts "Error fetching artifacts: #{response.code} #{response.body}" + exit 1 + end + + artifacts = JSON.parse(response.body)['artifacts'] + + # Find database dump artifacts + dump_artifacts = artifacts.select { |a| a['name'].start_with?('database-dump-') } + + if dump_artifacts.empty? + puts "No database dump artifacts found" + exit 0 + end + + puts "\nAvailable database dumps:" + puts "-" * 80 + + dump_artifacts.sort_by { |a| DateTime.parse(a['created_at']) }.reverse.each do |artifact| + created_at = DateTime.parse(artifact['created_at']) + size_mb = artifact['size_in_bytes'].to_f / (1024 * 1024) + expires_at = DateTime.parse(artifact['expires_at']) + + puts "Name: #{artifact['name']}" + puts "Created: #{created_at.strftime('%Y-%m-%d %H:%M:%S UTC')}" + puts "Size: #{size_mb.round(2)} MB" + puts "Expires: #{expires_at.strftime('%Y-%m-%d %H:%M:%S UTC')}" + puts "ID: #{artifact['id']}" + puts "-" * 80 + end + end +end \ No newline at end of file From 937c35558ee91366339e5d4a169b0e6b15b5275d Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 12:10:00 -0600 Subject: [PATCH 04/10] Fix apt-key deprecation warning and add PostgreSQL caching - Replace deprecated apt-key with signed-by keyring method - Add GitHub Actions cache for PostgreSQL client installation - Cache will speed up subsequent workflow runs --- .github/workflows/database-dump.yml | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/.github/workflows/database-dump.yml b/.github/workflows/database-dump.yml index 1fae6f7..da753ff 100644 --- a/.github/workflows/database-dump.yml +++ b/.github/workflows/database-dump.yml @@ -14,13 +14,33 @@ jobs: actions: write steps: + - name: Cache PostgreSQL client + id: cache-postgresql + uses: actions/cache@v3 + with: + path: | + /usr/lib/postgresql/17 + /usr/share/postgresql/17 + /usr/bin/pg_dump + /usr/bin/pg_restore + /usr/bin/psql + key: ${{ runner.os }}-postgresql-client-17-${{ hashFiles('.github/workflows/database-dump.yml') }} + - name: Set up PostgreSQL 17 client + if: steps.cache-postgresql.outputs.cache-hit != 'true' run: | - # Add PostgreSQL official APT repository - sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - + # Add PostgreSQL official APT repository using the new method + sudo apt-get update + sudo apt-get install -y wget ca-certificates + + # Download and add the signing key + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo tee /usr/share/keyrings/postgresql-archive-keyring.gpg >/dev/null + + # Add the repository with signed-by option + echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list + + # Update and install PostgreSQL 17 client sudo apt-get update - # Install PostgreSQL 17 client specifically sudo apt-get install -y postgresql-client-17 - name: Create dump directory From 170eed0cfc0274a37d42ccea5ac92d3fcc4ac748 Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 12:38:04 -0600 Subject: [PATCH 05/10] Remove authentication requirement for public artifacts - Rake tasks no longer require GITHUB_TOKEN for public repositories - Handle redirect when downloading artifacts - Add better error messages for expired/private artifacts --- lib/tasks/database_restore.rake | 39 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/lib/tasks/database_restore.rake b/lib/tasks/database_restore.rake index 0794cfa..c6185ed 100644 --- a/lib/tasks/database_restore.rake +++ b/lib/tasks/database_restore.rake @@ -7,13 +7,7 @@ namespace :db do require 'open3' # GitHub API configuration - github_token = ENV['GITHUB_TOKEN'] || ENV['GITHUB_PAT'] repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' - - if github_token.nil? || github_token.empty? - puts "Error: GITHUB_TOKEN or GITHUB_PAT environment variable is required" - exit 1 - end puts "Fetching latest database dump artifact..." @@ -26,7 +20,7 @@ namespace :db do request = Net::HTTP::Get.new(uri) request['Accept'] = 'application/vnd.github+json' - request['Authorization'] = "Bearer #{github_token}" + # No authorization needed for public repositories request['X-GitHub-Api-Version'] = '2022-11-28' response = http.request(request) @@ -50,18 +44,35 @@ namespace :db do puts "Found artifact: #{latest_artifact['name']} (created: #{latest_artifact['created_at']})" - # Download the artifact + # Download the artifact - for public repos, we need to use the API with no auth download_uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts/#{latest_artifact['id']}/zip") + # For public repositories, the artifact download will redirect + # We need to follow the redirect to get the actual download URL + http = Net::HTTP.new(download_uri.host, download_uri.port) + http.use_ssl = true + request = Net::HTTP::Get.new(download_uri) request['Accept'] = 'application/vnd.github+json' - request['Authorization'] = "Bearer #{github_token}" request['X-GitHub-Api-Version'] = '2022-11-28' response = http.request(request) + # Follow redirect + if response.code == '302' + download_url = response['Location'] + uri = URI(download_url) + + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + + request = Net::HTTP::Get.new(uri) + response = http.request(request) + end + if response.code != '200' - puts "Error downloading artifact: #{response.code} #{response.body}" + puts "Error downloading artifact: #{response.code}" + puts "This might be because the artifact has expired or the repository is private." exit 1 end @@ -172,13 +183,7 @@ namespace :db do require 'json' # GitHub API configuration - github_token = ENV['GITHUB_TOKEN'] || ENV['GITHUB_PAT'] repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' - - if github_token.nil? || github_token.empty? - puts "Error: GITHUB_TOKEN or GITHUB_PAT environment variable is required" - exit 1 - end # Get list of artifacts uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts") @@ -189,7 +194,7 @@ namespace :db do request = Net::HTTP::Get.new(uri) request['Accept'] = 'application/vnd.github+json' - request['Authorization'] = "Bearer #{github_token}" + # No authorization needed for public repositories request['X-GitHub-Api-Version'] = '2022-11-28' response = http.request(request) From 2c444b8ae87ef989599aa4a095ab6a237ef96058 Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 12:39:39 -0600 Subject: [PATCH 06/10] Add developer onboarding documentation - Document database restore process for new developers - Explain weekly backup schedule and retention - Include quick start commands for fetching production data - Note PostgreSQL version requirements --- README.md | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e123dae..4959561 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ ### 🛠 Setup -Ensure you have Ruby and PostgresQL installed +Ensure you have Ruby and PostgreSQL installed ```bash # Install dependencies @@ -61,3 +61,46 @@ rails db:seed # Run the server rails s ``` + +### 🚀 Developer Onboarding + +For new developers joining the project, we provide a streamlined onboarding process using production database dumps: + +#### Quick Start with Production Data + +1. **Request Access**: Contact the team to ensure you have access to the GitHub repository + +2. **Restore from Latest Database Dump**: + ```bash + # List available database dumps + rake db:list_dumps + + # Fetch and restore the latest production database dump + # This will download the most recent weekly backup and restore it locally + rake db:fetch_and_restore + ``` + +3. **What's Included**: The database dump includes all production data except: + - User accounts (for privacy/security) + - Schema migrations metadata + - Internal Rails metadata + +4. **Post-Restore**: After restoring, the rake task automatically runs any pending migrations + +#### Manual Database Restore + +If you have a specific dump file: +```bash +rake db:restore[/path/to/dump.dump] +``` + +#### Database Dumps Schedule + +- Production database is automatically dumped weekly (every Monday at 2 AM UTC) +- Dumps are stored as GitHub Actions artifacts for 30 days +- Dumps use PostgreSQL's custom archive format for efficient storage and restore + +#### Notes +- No GitHub token required - artifacts are public +- Ensure your local PostgreSQL version is compatible with the production version (currently 17.x) +- The restore process will prompt for confirmation before proceeding From 72a589ff0d97c90cc02dbffc277ab4f21f7e4762 Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 12:43:28 -0600 Subject: [PATCH 07/10] Use GitHub CLI for artifact downloads - Switch to gh CLI for downloading artifacts - Add prerequisite check for gh installation - Update documentation to reflect gh CLI requirement - GitHub API requires authentication even for public repos --- .claude/settings.local.json | 3 +- README.md | 7 +++-- lib/tasks/database_restore.rake | 51 +++++++++++---------------------- 3 files changed, 24 insertions(+), 37 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 3247245..8d10708 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,8 @@ "allow": [ "Bash(git add:*)", "Bash(git commit:*)", - "Bash(git push:*)" + "Bash(git push:*)", + "Bash(curl:*)" ], "deny": [] } diff --git a/README.md b/README.md index 4959561..5d1e967 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,10 @@ For new developers joining the project, we provide a streamlined onboarding proc #### Quick Start with Production Data -1. **Request Access**: Contact the team to ensure you have access to the GitHub repository +1. **Prerequisites**: + - Install the GitHub CLI: https://cli.github.com/ + - Authenticate with: `gh auth login` + - Ensure you have access to the GitHub repository 2. **Restore from Latest Database Dump**: ```bash @@ -101,6 +104,6 @@ rake db:restore[/path/to/dump.dump] - Dumps use PostgreSQL's custom archive format for efficient storage and restore #### Notes -- No GitHub token required - artifacts are public +- GitHub CLI (`gh`) is required for downloading artifacts due to GitHub API limitations - Ensure your local PostgreSQL version is compatible with the production version (currently 17.x) - The restore process will prompt for confirmation before proceeding diff --git a/lib/tasks/database_restore.rake b/lib/tasks/database_restore.rake index c6185ed..a948c0b 100644 --- a/lib/tasks/database_restore.rake +++ b/lib/tasks/database_restore.rake @@ -44,47 +44,30 @@ namespace :db do puts "Found artifact: #{latest_artifact['name']} (created: #{latest_artifact['created_at']})" - # Download the artifact - for public repos, we need to use the API with no auth - download_uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts/#{latest_artifact['id']}/zip") - - # For public repositories, the artifact download will redirect - # We need to follow the redirect to get the actual download URL - http = Net::HTTP.new(download_uri.host, download_uri.port) - http.use_ssl = true - - request = Net::HTTP::Get.new(download_uri) - request['Accept'] = 'application/vnd.github+json' - request['X-GitHub-Api-Version'] = '2022-11-28' - - response = http.request(request) - - # Follow redirect - if response.code == '302' - download_url = response['Location'] - uri = URI(download_url) - - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - - request = Net::HTTP::Get.new(uri) - response = http.request(request) - end - - if response.code != '200' - puts "Error downloading artifact: #{response.code}" - puts "This might be because the artifact has expired or the repository is private." + # Check if gh CLI is installed + unless system('which gh > /dev/null 2>&1') + puts "Error: GitHub CLI (gh) is not installed." + puts "Please install it from: https://cli.github.com/" exit 1 end - # Save the artifact + # Create temp directory temp_dir = Rails.root.join('tmp', 'database_restore') FileUtils.mkdir_p(temp_dir) - zip_file = temp_dir.join('artifact.zip') - File.open(zip_file, 'wb') do |file| - file.write(response.body) + # Download the artifact using gh CLI + puts "Downloading artifact using GitHub CLI..." + download_cmd = "gh api repos/#{repo}/actions/artifacts/#{latest_artifact['id']}/zip > #{temp_dir}/artifact.zip" + + success = system(download_cmd) + + unless success + puts "Error downloading artifact" + puts "Make sure you're authenticated with: gh auth login" + exit 1 end - + + zip_file = temp_dir.join('artifact.zip') puts "Downloaded artifact to #{zip_file}" # Extract the zip file From ed0710c363a6644af400e66446855d54000c9888 Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 12:58:29 -0600 Subject: [PATCH 08/10] simplify readme --- README.md | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 5d1e967..36306c8 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,12 @@ These are extracted using an LLM from the Entry's raw data. Each entry might have multiple activities. `Evidence`: - Evidence links an Activity to a Promise. They are linked using an LLM. + Evidence links an Activity to a Promise. They are linked using an LLM. ### 🛠 Setup -Ensure you have Ruby and PostgreSQL installed +Ensure you have Ruby, PostgreSQL and the Github CLI installed ```bash # Install dependencies @@ -55,8 +55,7 @@ sudo service postgresql start # Setup database rails db:create -rails db:migrate -rails db:seed +rake db:fetch_and_restore # Run the server rails s @@ -71,13 +70,12 @@ For new developers joining the project, we provide a streamlined onboarding proc 1. **Prerequisites**: - Install the GitHub CLI: https://cli.github.com/ - Authenticate with: `gh auth login` - - Ensure you have access to the GitHub repository 2. **Restore from Latest Database Dump**: ```bash # List available database dumps rake db:list_dumps - + # Fetch and restore the latest production database dump # This will download the most recent weekly backup and restore it locally rake db:fetch_and_restore @@ -90,20 +88,8 @@ For new developers joining the project, we provide a streamlined onboarding proc 4. **Post-Restore**: After restoring, the rake task automatically runs any pending migrations -#### Manual Database Restore - -If you have a specific dump file: -```bash -rake db:restore[/path/to/dump.dump] -``` - #### Database Dumps Schedule - Production database is automatically dumped weekly (every Monday at 2 AM UTC) - Dumps are stored as GitHub Actions artifacts for 30 days - Dumps use PostgreSQL's custom archive format for efficient storage and restore - -#### Notes -- GitHub CLI (`gh`) is required for downloading artifacts due to GitHub API limitations -- Ensure your local PostgreSQL version is compatible with the production version (currently 17.x) -- The restore process will prompt for confirmation before proceeding From 688024e09fcb690a8cd5a6a479422679591ef2ba Mon Sep 17 00:00:00 2001 From: xrendan Date: Fri, 3 Oct 2025 15:35:09 -0600 Subject: [PATCH 09/10] Fix PostgreSQL GPG key import - Use gpg --dearmor to convert ASCII armored key to binary format - Add gnupg package dependency - Create keyrings directory if missing - Fixes 'NO_PUBKEY' error during apt update --- .github/workflows/database-dump.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/database-dump.yml b/.github/workflows/database-dump.yml index da753ff..0105a72 100644 --- a/.github/workflows/database-dump.yml +++ b/.github/workflows/database-dump.yml @@ -31,13 +31,19 @@ jobs: run: | # Add PostgreSQL official APT repository using the new method sudo apt-get update - sudo apt-get install -y wget ca-certificates + sudo apt-get install -y wget ca-certificates gnupg - # Download and add the signing key - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo tee /usr/share/keyrings/postgresql-archive-keyring.gpg >/dev/null + # Create keyrings directory if it doesn't exist + sudo mkdir -p /usr/share/keyrings + + # Download and add the signing key with proper conversion + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | \ + gpg --dearmor | \ + sudo tee /usr/share/keyrings/postgresql-archive-keyring.gpg > /dev/null # Add the repository with signed-by option - echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list + echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | \ + sudo tee /etc/apt/sources.list.d/pgdg.list # Update and install PostgreSQL 17 client sudo apt-get update From 9fddc53882470f42d80034722aba1811719000db Mon Sep 17 00:00:00 2001 From: xrendan Date: Tue, 7 Oct 2025 09:26:49 -0600 Subject: [PATCH 10/10] Fix styling --- lib/tasks/database_restore.rake | 162 ++++++++++++++++---------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/lib/tasks/database_restore.rake b/lib/tasks/database_restore.rake index a948c0b..c2e6349 100644 --- a/lib/tasks/database_restore.rake +++ b/lib/tasks/database_restore.rake @@ -1,114 +1,114 @@ namespace :db do desc "Fetch latest database dump from GitHub artifacts and restore it" - task :fetch_and_restore => :environment do - require 'net/http' - require 'json' - require 'fileutils' - require 'open3' + task fetch_and_restore: :environment do + require "net/http" + require "json" + require "fileutils" + require "open3" # GitHub API configuration - repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' + repo = ENV["GITHUB_REPOSITORY"] || "BuildCanada/OutcomeTrackerAPI" puts "Fetching latest database dump artifact..." # Get list of artifacts uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts") uri.query = URI.encode_www_form(per_page: 100) - + http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = true - + request = Net::HTTP::Get.new(uri) - request['Accept'] = 'application/vnd.github+json' + request["Accept"] = "application/vnd.github+json" # No authorization needed for public repositories - request['X-GitHub-Api-Version'] = '2022-11-28' - + request["X-GitHub-Api-Version"] = "2022-11-28" + response = http.request(request) - - if response.code != '200' + + if response.code != "200" puts "Error fetching artifacts: #{response.code} #{response.body}" exit 1 end - artifacts = JSON.parse(response.body)['artifacts'] - + artifacts = JSON.parse(response.body)["artifacts"] + # Find the latest database dump artifact - dump_artifacts = artifacts.select { |a| a['name'].start_with?('database-dump-') } - + dump_artifacts = artifacts.select { |a| a["name"].start_with?("database-dump-") } + if dump_artifacts.empty? puts "No database dump artifacts found" exit 1 end - latest_artifact = dump_artifacts.max_by { |a| DateTime.parse(a['created_at']) } - + latest_artifact = dump_artifacts.max_by { |a| DateTime.parse(a["created_at"]) } + puts "Found artifact: #{latest_artifact['name']} (created: #{latest_artifact['created_at']})" # Check if gh CLI is installed - unless system('which gh > /dev/null 2>&1') + unless system("which gh > /dev/null 2>&1") puts "Error: GitHub CLI (gh) is not installed." puts "Please install it from: https://cli.github.com/" exit 1 end # Create temp directory - temp_dir = Rails.root.join('tmp', 'database_restore') + temp_dir = Rails.root.join("tmp", "database_restore") FileUtils.mkdir_p(temp_dir) - + # Download the artifact using gh CLI puts "Downloading artifact using GitHub CLI..." download_cmd = "gh api repos/#{repo}/actions/artifacts/#{latest_artifact['id']}/zip > #{temp_dir}/artifact.zip" - + success = system(download_cmd) - + unless success puts "Error downloading artifact" puts "Make sure you're authenticated with: gh auth login" exit 1 end - - zip_file = temp_dir.join('artifact.zip') + + zip_file = temp_dir.join("artifact.zip") puts "Downloaded artifact to #{zip_file}" # Extract the zip file system("unzip -o #{zip_file} -d #{temp_dir}") or raise "Failed to extract artifact" - + # Find the dump file - dump_file = Dir.glob(temp_dir.join('*.dump')).first - + dump_file = Dir.glob(temp_dir.join("*.dump")).first + if dump_file.nil? puts "No dump file found in artifact" exit 1 end puts "Found dump file: #{dump_file}" - + # Restore the database - Rake::Task['db:restore'].invoke(dump_file) - + Rake::Task["db:restore"].invoke(dump_file) + # Cleanup FileUtils.rm_rf(temp_dir) - + puts "Database restore complete!" end desc "Restore database from a dump file" - task :restore, [:dump_file] => :environment do |t, args| + task :restore, [ :dump_file ] => :environment do |t, args| dump_file = args[:dump_file] - + if dump_file.nil? || !File.exist?(dump_file) puts "Error: Dump file not found: #{dump_file}" exit 1 end # Confirm before proceeding - unless ENV['SKIP_CONFIRMATION'] == 'true' + unless ENV["SKIP_CONFIRMATION"] == "true" puts "\nWARNING: This will restore the database from #{dump_file}" puts "This will DROP and recreate all tables except 'users', 'schema_migrations', and 'ar_internal_metadata'" puts "Are you sure? Type 'yes' to continue:" - + confirmation = STDIN.gets.chomp - unless confirmation.downcase == 'yes' + unless confirmation.downcase == "yes" puts "Aborted" exit 0 end @@ -116,37 +116,37 @@ namespace :db do # Get database configuration db_config = Rails.configuration.database_configuration[Rails.env] - + # Build pg_restore command - pg_restore_cmd = ['pg_restore'] - + pg_restore_cmd = [ "pg_restore" ] + # Connection parameters - pg_restore_cmd << "--host=#{db_config['host']}" if db_config['host'] - pg_restore_cmd << "--port=#{db_config['port']}" if db_config['port'] - pg_restore_cmd << "--username=#{db_config['username']}" if db_config['username'] + pg_restore_cmd << "--host=#{db_config['host']}" if db_config["host"] + pg_restore_cmd << "--port=#{db_config['port']}" if db_config["port"] + pg_restore_cmd << "--username=#{db_config['username']}" if db_config["username"] pg_restore_cmd << "--dbname=#{db_config['database']}" - + # Restore options - pg_restore_cmd << '--clean' # Clean (drop) database objects before recreating - pg_restore_cmd << '--if-exists' # Use IF EXISTS when dropping objects - pg_restore_cmd << '--no-owner' # Don't set ownership - pg_restore_cmd << '--no-privileges' # Don't restore access privileges - pg_restore_cmd << '--verbose' # Verbose output - + pg_restore_cmd << "--clean" # Clean (drop) database objects before recreating + pg_restore_cmd << "--if-exists" # Use IF EXISTS when dropping objects + pg_restore_cmd << "--no-owner" # Don't set ownership + pg_restore_cmd << "--no-privileges" # Don't restore access privileges + pg_restore_cmd << "--verbose" # Verbose output + # The dump already excludes users, schema_migrations, and ar_internal_metadata # so we don't need to exclude them again - + pg_restore_cmd << dump_file - + # Set PGPASSWORD environment variable if password is provided env = {} - env['PGPASSWORD'] = db_config['password'] if db_config['password'] - + env["PGPASSWORD"] = db_config["password"] if db_config["password"] + puts "Restoring database from #{dump_file}..." - + # Execute pg_restore stdout, stderr, status = Open3.capture3(env, *pg_restore_cmd.map(&:to_s)) - + if status.success? puts "Database restored successfully!" else @@ -154,44 +154,44 @@ namespace :db do puts stderr exit 1 end - + # Run any pending migrations that might have been added since the dump puts "Running pending migrations..." - Rake::Task['db:migrate'].invoke + Rake::Task["db:migrate"].invoke end desc "List available database dump artifacts" - task :list_dumps => :environment do - require 'net/http' - require 'json' + task list_dumps: :environment do + require "net/http" + require "json" # GitHub API configuration - repo = ENV['GITHUB_REPOSITORY'] || 'BuildCanada/OutcomeTrackerAPI' + repo = ENV["GITHUB_REPOSITORY"] || "BuildCanada/OutcomeTrackerAPI" # Get list of artifacts uri = URI("https://api.github.com/repos/#{repo}/actions/artifacts") uri.query = URI.encode_www_form(per_page: 100) - + http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = true - + request = Net::HTTP::Get.new(uri) - request['Accept'] = 'application/vnd.github+json' + request["Accept"] = "application/vnd.github+json" # No authorization needed for public repositories - request['X-GitHub-Api-Version'] = '2022-11-28' - + request["X-GitHub-Api-Version"] = "2022-11-28" + response = http.request(request) - - if response.code != '200' + + if response.code != "200" puts "Error fetching artifacts: #{response.code} #{response.body}" exit 1 end - artifacts = JSON.parse(response.body)['artifacts'] - + artifacts = JSON.parse(response.body)["artifacts"] + # Find database dump artifacts - dump_artifacts = artifacts.select { |a| a['name'].start_with?('database-dump-') } - + dump_artifacts = artifacts.select { |a| a["name"].start_with?("database-dump-") } + if dump_artifacts.empty? puts "No database dump artifacts found" exit 0 @@ -199,12 +199,12 @@ namespace :db do puts "\nAvailable database dumps:" puts "-" * 80 - - dump_artifacts.sort_by { |a| DateTime.parse(a['created_at']) }.reverse.each do |artifact| - created_at = DateTime.parse(artifact['created_at']) - size_mb = artifact['size_in_bytes'].to_f / (1024 * 1024) - expires_at = DateTime.parse(artifact['expires_at']) - + + dump_artifacts.sort_by { |a| DateTime.parse(a["created_at"]) }.reverse.each do |artifact| + created_at = DateTime.parse(artifact["created_at"]) + size_mb = artifact["size_in_bytes"].to_f / (1024 * 1024) + expires_at = DateTime.parse(artifact["expires_at"]) + puts "Name: #{artifact['name']}" puts "Created: #{created_at.strftime('%Y-%m-%d %H:%M:%S UTC')}" puts "Size: #{size_mb.round(2)} MB" @@ -213,4 +213,4 @@ namespace :db do puts "-" * 80 end end -end \ No newline at end of file +end