@@ -487,21 +487,82 @@ EOF
487487
488488 if [ $(( wait_time % 15 )) -eq 0 ]; then
489489 log " INFO" " Still waiting for WAL archiving... (${wait_time} s/${max_wait} s)"
490+ # Force another WAL switch to trigger archiving
491+ if ! su-exec postgres psql -d " $pg_database " -c " SELECT pg_switch_wal();" 2> /dev/null; then
492+ log " WARN" " Failed to force WAL switch"
493+ fi
490494 fi
491495 done
492496
493497 if [ " $archived_count " -eq 0 ]; then
494- log " WARN " " No archived WAL files found after ${max_wait} seconds"
495- log " WARN " " This may cause backup failures. Check PostgreSQL logs for archive errors ."
498+ log " ERROR " " No archived WAL files found after ${max_wait} seconds"
499+ log " ERROR " " WAL archiving is not working properly. This will cause backup failures ."
496500
497501 # Show PostgreSQL log for debugging
498502 log " INFO" " Recent PostgreSQL log entries:"
499503 tail -20 " $pgdata /log/" * .log 2> /dev/null || log " WARN" " Could not read PostgreSQL logs"
504+
505+ # Show current archive_command
506+ local current_archive_cmd=$( su-exec postgres psql -d " $pg_database " -t -c " SHOW archive_command;" 2> /dev/null | sed ' s/^[ \t]*//;s/[ \t]*$//' )
507+ log " ERROR" " Current archive_command: $current_archive_cmd "
508+
509+ return 1
500510 fi
501511
502512 return 0
503513}
504514
515+ # Verify WAL archiving is working before backup
516+ verify_wal_archiving () {
517+ local stanza_name=" ${PGBACKREST_STANZA:- main} "
518+ local pg_database=" ${POSTGRES_DB:- postgres} "
519+
520+ log " INFO" " Verifying WAL archiving is working..."
521+
522+ # Force a WAL switch and check if it gets archived
523+ local pre_switch_lsn=$( su-exec postgres psql -d " $pg_database " -t -c " SELECT pg_current_wal_lsn();" 2> /dev/null | tr -d ' ' )
524+
525+ if [ -z " $pre_switch_lsn " ]; then
526+ log " ERROR" " Failed to get current WAL LSN"
527+ return 1
528+ fi
529+
530+ log " INFO" " Current WAL LSN before switch: $pre_switch_lsn "
531+
532+ # Force WAL switch
533+ if ! su-exec postgres psql -d " $pg_database " -c " SELECT pg_switch_wal();" 2> /dev/null; then
534+ log " ERROR" " Failed to force WAL switch"
535+ return 1
536+ fi
537+
538+ log " INFO" " WAL switch forced, waiting for archiving..."
539+
540+ # Wait up to 60 seconds for the WAL file to be archived
541+ local max_wait=60
542+ local wait_time=0
543+ local archive_dir=" /var/lib/pgbackrest/archive/${stanza_name} "
544+
545+ while [ $wait_time -lt $max_wait ]; do
546+ if [ -d " $archive_dir " ]; then
547+ local archived_count=$( find " $archive_dir " -type f \( -name " *.gz" -o -name " *.lz4" -o -name " *.xz" -o -name " *.bz2" -o -name " *-*" \) -newer " $archive_dir " 2> /dev/null | wc -l)
548+ if [ " $archived_count " -gt 0 ]; then
549+ log " INFO" " WAL archiving verified - found newly archived WAL files"
550+ return 0
551+ fi
552+ fi
553+
554+ sleep 2
555+ wait_time=$(( wait_time + 2 ))
556+
557+ if [ $(( wait_time % 10 )) -eq 0 ]; then
558+ log " INFO" " Still waiting for WAL archiving... (${wait_time} s/${max_wait} s)"
559+ fi
560+ done
561+
562+ log " ERROR" " WAL archiving verification failed - no WAL files archived within ${max_wait} seconds"
563+ return 1
564+ }
565+
505566# Perform full backup using pgbackrest
506567perform_pgbackrest_backup () {
507568 local stanza_name=" ${PGBACKREST_STANZA:- main} "
@@ -515,42 +576,16 @@ perform_pgbackrest_backup() {
515576 return 1
516577 fi
517578
518- # For full backups, ensure we have some WAL files archived first
579+ # For full backups, ensure WAL archiving is working
519580 if [ " $backup_type " = " full" ]; then
520- log " INFO" " Checking for archived WAL files before starting full backup..."
521-
522- # Force a few WAL switches to ensure we have archived WAL files
523- for i in 1 2 3; do
524- log " INFO" " Forcing WAL switch $i /3..."
525- su-exec postgres psql -d " ${POSTGRES_DB:- postgres} " -c " SELECT pg_switch_wal();" || true
526- sleep 2
527- done
528-
529- # Wait for archiving to complete
530- sleep 10
581+ log " INFO" " Verifying WAL archiving before starting full backup..."
531582
532- # Check if we have archived WAL files
533- if [ -d " /var/lib/pgbackrest/archive/${stanza_name} " ]; then
534- archived_count=$( find " /var/lib/pgbackrest/archive/${stanza_name} " -type f | wc -l)
535- log " INFO" " Found ${archived_count} archived WAL files"
536-
537- if [ " $archived_count " -eq 0 ]; then
538- log " WARN" " No archived WAL files found. Backup may fail."
539- log " INFO" " Attempting to run archive-push manually..."
540-
541- # Try to manually archive any pending WAL files
542- wal_dir=" ${PGDATA:-/ var/ lib/ postgresql/ data} /pg_wal"
543- if [ -d " $wal_dir " ]; then
544- for wal_file in " $wal_dir " /[0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F]; do
545- if [ -f " $wal_file " ]; then
546- log " INFO" " Trying to archive WAL file: $( basename " $wal_file " ) "
547- su-exec postgres pgbackrest --stanza=" ${stanza_name} " archive-push " $wal_file " || true
548- break
549- fi
550- done
551- fi
552- fi
583+ if ! verify_wal_archiving; then
584+ log " ERROR" " WAL archiving verification failed - backup will likely fail"
585+ return 1
553586 fi
587+
588+ log " INFO" " WAL archiving verified successfully"
554589 fi
555590
556591 # Perform the backup using su-exec
0 commit comments