diff --git a/frontend/src/components/config/HealthConfigSection.tsx b/frontend/src/components/config/HealthConfigSection.tsx index b05e1dd99..71dcb8490 100644 --- a/frontend/src/components/config/HealthConfigSection.tsx +++ b/frontend/src/components/config/HealthConfigSection.tsx @@ -384,6 +384,23 @@ export function HealthConfigSection({ validation of all segments (slower).

+
+ Hybrid Data Verification + +

+ When enabled, verify a subset of segments by downloading them to ensure data integrity. + This helps detect "ghost" files that exist on the provider but are corrupted. +

+
{formData.segment_sample_percentage !== undefined && !formData.check_all_segments && (
Segment Sample Percentage diff --git a/frontend/src/types/config.ts b/frontend/src/types/config.ts index 0259742af..6d8819314 100644 --- a/frontend/src/types/config.ts +++ b/frontend/src/types/config.ts @@ -68,6 +68,7 @@ export interface HealthConfig { library_sync_interval_minutes?: number; // Library sync interval in minutes (optional) check_all_segments?: boolean; // Whether to check all segments or use sampling resolve_repair_on_import?: boolean; // Automatically resolve pending repairs in the same directory when a new file is imported + data_verification?: boolean; // Whether to perform actual data download verification (hybrid approach) } // Library sync types @@ -296,6 +297,7 @@ export interface HealthUpdateRequest { library_sync_interval_minutes?: number; // Library sync interval in minutes (optional) check_all_segments?: boolean; // Whether to check all segments or use sampling resolve_repair_on_import?: boolean; + data_verification?: boolean; } // RClone update request diff --git a/internal/config/accessors.go b/internal/config/accessors.go index f0e0eb963..841ab3ce2 100644 --- a/internal/config/accessors.go +++ b/internal/config/accessors.go @@ -39,6 +39,14 @@ func (c *Config) GetSegmentSamplePercentage() int { return c.Health.SegmentSamplePercentage } +// GetDataVerification returns whether data verification is enabled with a default fallback. +func (c *Config) GetDataVerification() bool { + if c.Health.DataVerification == nil { + return false // Default: false + } + return *c.Health.DataVerification +} + // GetLibrarySyncInterval returns the library sync interval with a default fallback. func (c *Config) GetLibrarySyncInterval() time.Duration { if c.Health.LibrarySyncIntervalMinutes <= 0 { diff --git a/internal/config/manager.go b/internal/config/manager.go index 27c1e5464..8a7e31f94 100644 --- a/internal/config/manager.go +++ b/internal/config/manager.go @@ -192,6 +192,7 @@ type HealthConfig struct { LibrarySyncIntervalMinutes int `yaml:"library_sync_interval_minutes" mapstructure:"library_sync_interval_minutes" json:"library_sync_interval_minutes,omitempty"` LibrarySyncConcurrency int `yaml:"library_sync_concurrency" mapstructure:"library_sync_concurrency" json:"library_sync_concurrency,omitempty"` ResolveRepairOnImport *bool `yaml:"resolve_repair_on_import" mapstructure:"resolve_repair_on_import" json:"resolve_repair_on_import,omitempty"` + DataVerification *bool `yaml:"data_verification" mapstructure:"data_verification" json:"data_verification,omitempty"` } // GenerateProviderID creates a unique ID based on host, port, and username @@ -856,6 +857,7 @@ func DefaultConfig(configDir ...string) *Config { skipHealthCheck := true watchIntervalSeconds := 10 // Default watch interval cleanupAutomaticImportFailure := false + dataVerification := false // Set paths based on whether we're running in Docker or have a specific config directory var dbPath, metadataPath, logPath, rclonePath, cachePath string @@ -982,6 +984,7 @@ func DefaultConfig(configDir ...string) *Config { SegmentSamplePercentage: 5, // Default: 5% segment sampling LibrarySyncIntervalMinutes: 360, // Default: sync every 6 hours ResolveRepairOnImport: &resolveRepairOnImport, // Enabled by default + DataVerification: &dataVerification, // Disabled by default }, SABnzbd: SABnzbdConfig{ Enabled: &sabnzbdEnabled, diff --git a/internal/health/checker.go b/internal/health/checker.go index c3fdd90bb..b25bd088c 100644 --- a/internal/health/checker.go +++ b/internal/health/checker.go @@ -132,6 +132,7 @@ func (hc *HealthChecker) checkSingleFile(ctx context.Context, filePath string, f hc.poolManager, cfg.GetMaxConnectionsForHealthChecks(), samplePercentage, + cfg.GetDataVerification(), nil, // No progress callback for health checks 30*time.Second, ) diff --git a/internal/importer/validation/segments.go b/internal/importer/validation/segments.go index 481763d8d..ce15c2b80 100644 --- a/internal/importer/validation/segments.go +++ b/internal/importer/validation/segments.go @@ -78,7 +78,7 @@ func ValidateSegmentsForFile( } // Validate segment availability using shared validation logic - if err := usenet.ValidateSegmentAvailability(ctx, segments, poolManager, maxGoroutines, samplePercentage, progressTracker, timeout); err != nil { + if err := usenet.ValidateSegmentAvailability(ctx, segments, poolManager, maxGoroutines, samplePercentage, false, progressTracker, timeout); err != nil { return err } diff --git a/internal/usenet/validation.go b/internal/usenet/validation.go index 6776bb3fc..ce4ed27f5 100644 --- a/internal/usenet/validation.go +++ b/internal/usenet/validation.go @@ -3,6 +3,7 @@ package usenet import ( "context" "fmt" + "io" "math/rand" "sync/atomic" "time" @@ -33,6 +34,7 @@ func ValidateSegmentAvailability( poolManager pool.Manager, maxConnections int, samplePercentage int, + verifyData bool, progressTracker progress.ProgressTracker, timeout time.Duration, ) error { @@ -57,6 +59,37 @@ func ValidateSegmentAvailability( // Atomic counter for progress tracking (thread-safe for concurrent validation) var validatedCount int32 + // Determine which segments need FULL download verification if verifyData is enabled + segmentsToDownload := make(map[string]bool) + if verifyData { + // First segment + if len(segments) > 0 { + segmentsToDownload[segments[0].Id] = true + } + // Last segment + if len(segments) > 1 { + segmentsToDownload[segments[len(segments)-1].Id] = true + } + // Random sample of ~18 others from the SELECTED segments + candidates := make([]string, 0, len(segmentsToValidate)) + for _, s := range segmentsToValidate { + if !segmentsToDownload[s.Id] { + candidates = append(candidates, s.Id) + } + } + + if len(candidates) > 0 { + perm := rand.Perm(len(candidates)) + count := 18 + if count > len(candidates) { + count = len(candidates) + } + for i := 0; i < count; i++ { + segmentsToDownload[candidates[perm[i]]] = true + } + } + } + // Validate segments concurrently with connection limit pl := concpool.New().WithErrors().WithFirstError().WithMaxGoroutines(maxConnections) for _, segment := range segmentsToValidate { @@ -65,7 +98,15 @@ func ValidateSegmentAvailability( checkCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - _, err := usenetPool.Stat(checkCtx, seg.Id, []string{}) + var err error + if verifyData && segmentsToDownload[seg.Id] { + // Download check (Body) + _, err = usenetPool.Body(checkCtx, seg.Id, io.Discard, []string{}) + } else { + // Stat check + _, err = usenetPool.Stat(checkCtx, seg.Id, []string{}) + } + if err != nil { return fmt.Errorf("segment with ID %s unreachable: %w", seg.Id, err) } @@ -102,6 +143,7 @@ func ValidateSegmentAvailabilityDetailed( poolManager pool.Manager, maxConnections int, samplePercentage int, + verifyData bool, progressTracker progress.ProgressTracker, timeout time.Duration, ) (ValidationResult, error) { @@ -135,6 +177,37 @@ func ValidateSegmentAvailabilityDetailed( // We use a channel to collect missing IDs to avoid locking missingChan := make(chan string, len(segmentsToValidate)) + // Determine which segments need FULL download verification if verifyData is enabled + segmentsToDownload := make(map[string]bool) + if verifyData { + // First segment + if len(segments) > 0 { + segmentsToDownload[segments[0].Id] = true + } + // Last segment + if len(segments) > 1 { + segmentsToDownload[segments[len(segments)-1].Id] = true + } + // Random sample of ~18 others from the SELECTED segments + candidates := make([]string, 0, len(segmentsToValidate)) + for _, s := range segmentsToValidate { + if !segmentsToDownload[s.Id] { + candidates = append(candidates, s.Id) + } + } + + if len(candidates) > 0 { + perm := rand.Perm(len(candidates)) + count := 18 + if count > len(candidates) { + count = len(candidates) + } + for i := 0; i < count; i++ { + segmentsToDownload[candidates[perm[i]]] = true + } + } + } + // Validate segments concurrently with connection limit // We don't use WithFirstError because we want to check all selected segments pl := concpool.New().WithErrors().WithMaxGoroutines(maxConnections) @@ -144,7 +217,15 @@ func ValidateSegmentAvailabilityDetailed( checkCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - _, err := usenetPool.Stat(checkCtx, seg.Id, []string{}) + var err error + if verifyData && segmentsToDownload[seg.Id] { + // Download check (Body) + _, err = usenetPool.Body(checkCtx, seg.Id, io.Discard, []string{}) + } else { + // Stat check + _, err = usenetPool.Stat(checkCtx, seg.Id, []string{}) + } + if err != nil { atomic.AddInt32(&missingCount, 1) missingChan <- seg.Id