diff --git a/client/common/constants.go b/client/common/constants.go index 4f5fd6c..6f9bc64 100644 --- a/client/common/constants.go +++ b/client/common/constants.go @@ -2,6 +2,7 @@ package common import ( "fmt" + "log" "os" "os/exec" "strconv" @@ -18,7 +19,7 @@ const ( MB // GB is gigabytes GB - // TB is terrabytes + // TB is terabytes TB ) const ( @@ -75,10 +76,10 @@ const ( HeaderContentType = "Content-Type" MIMEApplicationJSON = "application/json" - // FileSizeLimit is the maximun single file size for non-multipart upload (5GB) + // FileSizeLimit is the maximum single file size for non-multipart upload (5GB) FileSizeLimit = 5 * GB - // MultipartFileSizeLimit is the maximun single file size for multipart upload (5TB) + // MultipartFileSizeLimit is the maximum single file size for multipart upload (5TB) MultipartFileSizeLimit = 5 * TB MinMultipartChunkSize = 10 * MB @@ -89,7 +90,6 @@ const ( MaxMultipartParts = 10000 MaxConcurrentUploads = 10 MaxRetries = 5 - // MinChunkSize = 10 * 1024 * 1024 ) var ( @@ -100,12 +100,12 @@ var ( func init() { v, err := GetLfsCustomTransferInt("lfs.customtransfer.drs.multipart-min-chunk-size", 10) if err != nil { + log.Printf("Warning: Could not read git config for multipart-min-chunk-size, using default (10 MB): %v\n", err) MinChunkSize = int64(10) * MB return } MinChunkSize = int64(v) * MB - } func GetLfsCustomTransferInt(key string, defaultValue int64) (int64, error) { @@ -121,7 +121,7 @@ func GetLfsCustomTransferInt(key string, defaultValue int64) (int64, error) { parsed, err := strconv.ParseInt(value, 10, 64) if err != nil { - return defaultValue, fmt.Errorf("invalid int value for %s: >%q<", key, value) + return defaultValue, fmt.Errorf("invalid int value for %s: %q", key, value) } if parsed < 0 { diff --git a/client/upload/utils.go b/client/upload/utils.go index a9c1c40..c26f3fc 100644 --- a/client/upload/utils.go +++ b/client/upload/utils.go @@ -148,7 +148,6 @@ func OptimalChunkSize(fileSize int64) int64 { switch { case fileSize <= 100*common.MB: // Single PUT: return whole file size - return fileSize case fileSize <= 1*common.GB: diff --git a/client/upload/utils_test.go b/client/upload/utils_test.go index 5991c8a..8681096 100644 --- a/client/upload/utils_test.go +++ b/client/upload/utils_test.go @@ -11,49 +11,97 @@ func TestOptimalChunkSize(t *testing.T) { name string fileSize int64 wantChunkSize int64 - wantChunks int64 + wantParts int64 }{ + { + name: "0 bytes", + fileSize: 0, + wantChunkSize: 1 * common.MB, + wantParts: 0, + }, + { + name: "1MB", + fileSize: 1 * common.MB, + wantChunkSize: 1 * common.MB, + wantParts: 1, + }, { name: "100MB", fileSize: 100 * common.MB, wantChunkSize: 100 * common.MB, - wantChunks: 1, + wantParts: 1, + }, + { + name: "100MB+1B", + fileSize: 100*common.MB + 1, + wantChunkSize: 10 * common.MB, + wantParts: 11, + }, + { + name: "500MB", + fileSize: 500 * common.MB, + wantChunkSize: 10 * common.MB, + wantParts: 50, }, { name: "1GB", fileSize: 1 * common.GB, wantChunkSize: 10 * common.MB, - wantChunks: 103, + wantParts: 103, + }, + { + name: "1GB+1B", + fileSize: 1*common.GB + 1, + wantChunkSize: 25 * common.MB, + wantParts: 41, }, { name: "5GB", fileSize: 5 * common.GB, wantChunkSize: 70 * common.MB, - wantChunks: 74, + wantParts: 74, }, { name: "10GB", fileSize: 10 * common.GB, wantChunkSize: 128 * common.MB, - wantChunks: 80, + wantParts: 80, + }, + { + name: "10GB+1B", + fileSize: 10*common.GB + 1, + wantChunkSize: 256 * common.MB, + wantParts: 41, }, { name: "50GB", fileSize: 50 * common.GB, wantChunkSize: 256 * common.MB, - wantChunks: 200, + wantParts: 200, }, { name: "100GB", fileSize: 100 * common.GB, wantChunkSize: 256 * common.MB, - wantChunks: 400, + wantParts: 400, + }, + { + name: "100GB+1B", + fileSize: 100*common.GB + 1, + wantChunkSize: 512 * common.MB, + wantParts: 201, + }, + { + name: "500GB", + fileSize: 500 * common.GB, + wantChunkSize: 739 * common.MB, + wantParts: 693, }, { name: "1TB", fileSize: 1 * common.TB, wantChunkSize: 1 * common.GB, - wantChunks: 1024, + wantParts: 1024, }, } @@ -64,9 +112,12 @@ func TestOptimalChunkSize(t *testing.T) { t.Fatalf("chunk size = %d, want %d", chunkSize, tt.wantChunkSize) } - chunks := (tt.fileSize + chunkSize - 1) / chunkSize - if chunks != tt.wantChunks { - t.Fatalf("chunks = %d, want %d", chunks, tt.wantChunks) + parts := int64(0) + if tt.fileSize > 0 && chunkSize > 0 { + parts = (tt.fileSize + chunkSize - 1) / chunkSize + } + if parts != tt.wantParts { + t.Fatalf("parts = %d, want %d", parts, tt.wantParts) } }) } diff --git a/docs/optimal-chunk-size.md b/docs/optimal-chunk-size.md index a3f3e6c..86019ba 100644 --- a/docs/optimal-chunk-size.md +++ b/docs/optimal-chunk-size.md @@ -30,6 +30,39 @@ ## NEW +OptimalChunkSize determines the ideal chunk/part size for multipart upload based on file size. +The chunk size (also known as "message size" or "part size") affects upload performance and +must comply with S3 constraints. + +Calculation logic: + - For files ≤ 100 MB: Returns the file size itself (single PUT, no multipart) + - For files > 100 MB and ≤ 1 GB: Returns 10 MB chunks + - For files > 1 GB and ≤ 10 GB: Scales linearly between 25 MB and 128 MB + - For files > 10 GB and ≤ 100 GB: Returns 256 MB chunks + - For files > 100 GB: Scales linearly between 512 MB and 1024 MB (capped at 1 TB for ratio purposes) + - All chunk sizes are rounded down to the nearest MB + - Minimum chunk size is 1 MB (for zero or negative input) + +This results in: + - Files ≤ 100 MB: Single PUT upload + - Files 100 MB - 1 GB: 10 MB chunks + - Files 1 GB - 10 GB: 25-128 MB chunks (scaled) + - Files 10 GB - 100 GB: 256 MB chunks + - Files > 100 GB: 512-1024 MB chunks (scaled) + +Examples: + - 100 MB file → 100 MB chunk (1 part, single PUT) + - 500 MB file → 10 MB chunks (50 parts) + - 1 GB file → 10 MB chunks (103 parts) + - 5 GB file → 70 MB chunks (74 parts, scaled) + - 10 GB file → 128 MB chunks (80 parts) + - 50 GB file → 256 MB chunks (200 parts) + - 100 GB file → 256 MB chunks (400 parts) + - 500 GB file → 739 MB chunks (693 parts, scaled) + - 1 TB file → 1024 MB chunks (1024 parts) + +### Testing + ```bash go test ./client/upload -run '^TestOptimalChunkSize$' -v @@ -60,7 +93,7 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts) - parts: `1` 4. `100 MB + 1 B` - - chunk: `10 MB` (> 100 MB \- <= 1 GB) + - chunk: `10 MB` (> 100 MB - <= 1 GB) - parts: ceil((100 MB + 1 B) / 10 MB) = `11` 5. `500 MB` @@ -72,7 +105,7 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts) - parts: ceil(1024 / 10) = `103` 7. `1 GB + 1 B` - - chunk: `25 MB` (start of 1 GB \- 10 GB scaled range) + - chunk: `25 MB` (start of 1 GB - 10 GB scaled range) - parts: ceil((1024 MB + 1 B) / 25 MB) = `41` 8. `5 GB` (5120 MB) @@ -80,11 +113,11 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts) - parts: ceil(5120 / 70) = `74` 9. `10 GB` (10240 MB) - - chunk: `128 MB` (end of 1 GB \- 10 GB scaled range) + - chunk: `128 MB` (end of 1 GB - 10 GB scaled range) - parts: `80` 10. `10 GB + 1 B` - - chunk: `256 MB` (> 10 GB \- <= 100 GB fixed) + - chunk: `256 MB` (> 10 GB - <= 100 GB fixed) - parts: ceil((10240 MB + 1 B) / 256 MB) = `41` 11. `50 GB` (51200 MB) @@ -96,7 +129,7 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts) - parts: `400` 13. `100 GB + 1 B` - - chunk: `512 MB` (start of \> 100 GB scaled range) + - chunk: `512 MB` (start of > 100 GB scaled range) - parts: ceil((102400 MB + 1 B) / 512 MB) = `201` 14. `500 GB` (512000 MB) @@ -108,9 +141,9 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts) - parts: 1,048,576 / 1024 = `1024` Test design notes (concise) -1. Use table\-driven subtests in `client/upload/utils_test.go`. Include fields: name, `fileSize int64`, `wantChunk int64`, `wantParts int64`. +1. Use table-driven subtests in `client/upload/utils_test.go`. Include fields: name, `fileSize int64`, `wantChunk int64`, `wantParts int64`. 2. For scaled cases assert: MB alignment, clamped to min/max, and exact `wantParts`. Use integer arithmetic for parts. -3. Add explicit boundary triples for each threshold: exact, \-1 byte, \+1 byte. +3. Add explicit boundary triples for each threshold: exact, -1 byte, +1 byte. 4. Include negative and zero cases to verify fallback behavior. 5. Keep tests deterministic and fast (no external deps).