Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions client/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package common

import (
"fmt"
"log"
"os"
"os/exec"
"strconv"
Expand All @@ -18,7 +19,7 @@ const (
MB
// GB is gigabytes
GB
// TB is terrabytes
// TB is terabytes
TB
)
const (
Expand Down Expand Up @@ -75,10 +76,10 @@ const (
HeaderContentType = "Content-Type"
MIMEApplicationJSON = "application/json"

// FileSizeLimit is the maximun single file size for non-multipart upload (5GB)
// FileSizeLimit is the maximum single file size for non-multipart upload (5GB)
FileSizeLimit = 5 * GB

// MultipartFileSizeLimit is the maximun single file size for multipart upload (5TB)
// MultipartFileSizeLimit is the maximum single file size for multipart upload (5TB)
MultipartFileSizeLimit = 5 * TB
MinMultipartChunkSize = 10 * MB

Expand All @@ -89,7 +90,6 @@ const (
MaxMultipartParts = 10000
MaxConcurrentUploads = 10
MaxRetries = 5
// MinChunkSize = 10 * 1024 * 1024
)

var (
Expand All @@ -100,12 +100,12 @@ var (
func init() {
v, err := GetLfsCustomTransferInt("lfs.customtransfer.drs.multipart-min-chunk-size", 10)
if err != nil {
log.Printf("Warning: Could not read git config for multipart-min-chunk-size, using default (10 MB): %v\n", err)
MinChunkSize = int64(10) * MB
return
}

MinChunkSize = int64(v) * MB

}

func GetLfsCustomTransferInt(key string, defaultValue int64) (int64, error) {
Expand All @@ -121,7 +121,7 @@ func GetLfsCustomTransferInt(key string, defaultValue int64) (int64, error) {

parsed, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return defaultValue, fmt.Errorf("invalid int value for %s: >%q<", key, value)
return defaultValue, fmt.Errorf("invalid int value for %s: %q", key, value)
}

if parsed < 0 {
Expand Down
1 change: 0 additions & 1 deletion client/upload/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ func OptimalChunkSize(fileSize int64) int64 {
switch {
case fileSize <= 100*common.MB:
// Single PUT: return whole file size

return fileSize

case fileSize <= 1*common.GB:
Expand Down
73 changes: 62 additions & 11 deletions client/upload/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,97 @@ func TestOptimalChunkSize(t *testing.T) {
name string
fileSize int64
wantChunkSize int64
wantChunks int64
wantParts int64
}{
{
name: "0 bytes",
fileSize: 0,
wantChunkSize: 1 * common.MB,
wantParts: 0,
},
{
name: "1MB",
fileSize: 1 * common.MB,
wantChunkSize: 1 * common.MB,
wantParts: 1,
},
Comment on lines +16 to +27
Copy link

Copilot AI Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test table exercises the 0-byte, exact-threshold, and +1 byte boundary cases, but it does not include the documented -1 byte boundaries or a negative fileSize, even though docs/optimal-chunk-size.md ("Test design notes" #3 and #4) explicitly calls for those cases. To fully match the documented test plan and guard against off-by-one or fallback regressions, consider adding subtests for fileSize < 0 and for each X - 1 byte threshold (e.g., 100 MB - 1 B, 1 GB - 1 B, etc.).

Copilot uses AI. Check for mistakes.
{
name: "100MB",
fileSize: 100 * common.MB,
wantChunkSize: 100 * common.MB,
wantChunks: 1,
wantParts: 1,
},
{
name: "100MB+1B",
fileSize: 100*common.MB + 1,
wantChunkSize: 10 * common.MB,
wantParts: 11,
},
{
name: "500MB",
fileSize: 500 * common.MB,
wantChunkSize: 10 * common.MB,
wantParts: 50,
},
{
name: "1GB",
fileSize: 1 * common.GB,
wantChunkSize: 10 * common.MB,
wantChunks: 103,
wantParts: 103,
},
{
name: "1GB+1B",
fileSize: 1*common.GB + 1,
wantChunkSize: 25 * common.MB,
wantParts: 41,
},
{
name: "5GB",
fileSize: 5 * common.GB,
wantChunkSize: 70 * common.MB,
wantChunks: 74,
wantParts: 74,
},
{
name: "10GB",
fileSize: 10 * common.GB,
wantChunkSize: 128 * common.MB,
wantChunks: 80,
wantParts: 80,
},
{
name: "10GB+1B",
fileSize: 10*common.GB + 1,
wantChunkSize: 256 * common.MB,
wantParts: 41,
},
{
name: "50GB",
fileSize: 50 * common.GB,
wantChunkSize: 256 * common.MB,
wantChunks: 200,
wantParts: 200,
},
{
name: "100GB",
fileSize: 100 * common.GB,
wantChunkSize: 256 * common.MB,
wantChunks: 400,
wantParts: 400,
},
{
name: "100GB+1B",
fileSize: 100*common.GB + 1,
wantChunkSize: 512 * common.MB,
wantParts: 201,
},
{
name: "500GB",
fileSize: 500 * common.GB,
wantChunkSize: 739 * common.MB,
wantParts: 693,
},
{
name: "1TB",
fileSize: 1 * common.TB,
wantChunkSize: 1 * common.GB,
wantChunks: 1024,
wantParts: 1024,
},
}

Expand All @@ -64,9 +112,12 @@ func TestOptimalChunkSize(t *testing.T) {
t.Fatalf("chunk size = %d, want %d", chunkSize, tt.wantChunkSize)
}

chunks := (tt.fileSize + chunkSize - 1) / chunkSize
if chunks != tt.wantChunks {
t.Fatalf("chunks = %d, want %d", chunks, tt.wantChunks)
parts := int64(0)
if tt.fileSize > 0 && chunkSize > 0 {
parts = (tt.fileSize + chunkSize - 1) / chunkSize
}
if parts != tt.wantParts {
t.Fatalf("parts = %d, want %d", parts, tt.wantParts)
}
})
}
Expand Down
47 changes: 40 additions & 7 deletions docs/optimal-chunk-size.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,39 @@

## NEW

OptimalChunkSize determines the ideal chunk/part size for multipart upload based on file size.
The chunk size (also known as "message size" or "part size") affects upload performance and
must comply with S3 constraints.

Calculation logic:
- For files ≤ 100 MB: Returns the file size itself (single PUT, no multipart)
- For files > 100 MB and ≤ 1 GB: Returns 10 MB chunks
- For files > 1 GB and ≤ 10 GB: Scales linearly between 25 MB and 128 MB
- For files > 10 GB and ≤ 100 GB: Returns 256 MB chunks
- For files > 100 GB: Scales linearly between 512 MB and 1024 MB (capped at 1 TB for ratio purposes)
- All chunk sizes are rounded down to the nearest MB
- Minimum chunk size is 1 MB (for zero or negative input)

This results in:
- Files ≤ 100 MB: Single PUT upload
- Files 100 MB - 1 GB: 10 MB chunks
- Files 1 GB - 10 GB: 25-128 MB chunks (scaled)
- Files 10 GB - 100 GB: 256 MB chunks
- Files > 100 GB: 512-1024 MB chunks (scaled)

Examples:
- 100 MB file → 100 MB chunk (1 part, single PUT)
- 500 MB file → 10 MB chunks (50 parts)
- 1 GB file → 10 MB chunks (103 parts)
- 5 GB file → 70 MB chunks (74 parts, scaled)
- 10 GB file → 128 MB chunks (80 parts)
- 50 GB file → 256 MB chunks (200 parts)
- 100 GB file → 256 MB chunks (400 parts)
- 500 GB file → 739 MB chunks (693 parts, scaled)
- 1 TB file → 1024 MB chunks (1024 parts)

### Testing


```bash
go test ./client/upload -run '^TestOptimalChunkSize$' -v
Expand Down Expand Up @@ -60,7 +93,7 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts)
- parts: `1`

4. `100 MB + 1 B`
- chunk: `10 MB` (> 100 MB \- <= 1 GB)
- chunk: `10 MB` (> 100 MB - <= 1 GB)
- parts: ceil((100 MB + 1 B) / 10 MB) = `11`

5. `500 MB`
Expand All @@ -72,19 +105,19 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts)
- parts: ceil(1024 / 10) = `103`

7. `1 GB + 1 B`
- chunk: `25 MB` (start of 1 GB \- 10 GB scaled range)
- chunk: `25 MB` (start of 1 GB - 10 GB scaled range)
- parts: ceil((1024 MB + 1 B) / 25 MB) = `41`

8. `5 GB` (5120 MB)
- chunk: linear between `25 MB` and `128 MB` → ≈ `70 MB` (rounded down)
- parts: ceil(5120 / 70) = `74`

9. `10 GB` (10240 MB)
- chunk: `128 MB` (end of 1 GB \- 10 GB scaled range)
- chunk: `128 MB` (end of 1 GB - 10 GB scaled range)
- parts: `80`

10. `10 GB + 1 B`
- chunk: `256 MB` (> 10 GB \- <= 100 GB fixed)
- chunk: `256 MB` (> 10 GB - <= 100 GB fixed)
- parts: ceil((10240 MB + 1 B) / 256 MB) = `41`

11. `50 GB` (51200 MB)
Expand All @@ -96,7 +129,7 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts)
- parts: `400`

13. `100 GB + 1 B`
- chunk: `512 MB` (start of \> 100 GB scaled range)
- chunk: `512 MB` (start of > 100 GB scaled range)
- parts: ceil((102400 MB + 1 B) / 512 MB) = `201`

14. `500 GB` (512000 MB)
Expand All @@ -108,9 +141,9 @@ Parameterized test cases (file size ⇒ expected chunk ⇒ expected parts)
- parts: 1,048,576 / 1024 = `1024`

Test design notes (concise)
1. Use table\-driven subtests in `client/upload/utils_test.go`. Include fields: name, `fileSize int64`, `wantChunk int64`, `wantParts int64`.
1. Use table-driven subtests in `client/upload/utils_test.go`. Include fields: name, `fileSize int64`, `wantChunk int64`, `wantParts int64`.
2. For scaled cases assert: MB alignment, clamped to min/max, and exact `wantParts`. Use integer arithmetic for parts.
3. Add explicit boundary triples for each threshold: exact, \-1 byte, \+1 byte.
3. Add explicit boundary triples for each threshold: exact, -1 byte, +1 byte.
4. Include negative and zero cases to verify fallback behavior.
5. Keep tests deterministic and fast (no external deps).

Expand Down
Loading