diff --git a/config.example.yaml b/config.example.yaml index ea17d15..8f37450 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -29,6 +29,23 @@ storage: # Empty or "0" means unlimited max_size: "" + # Redirect cached artifact downloads to presigned storage URLs (HTTP 302) + # instead of streaming through the proxy. Only effective for S3 and Azure. + # Leave disabled if clients reach the proxy through an authenticating gateway, + # since presigned URLs bypass it. + direct_serve: false + + # How long presigned URLs remain valid (e.g. "5m", "1h"). Default: "15m". + direct_serve_ttl: "15m" + + # Public base URL to substitute into presigned URLs. Set this when the + # proxy reaches storage at an internal address (127.0.0.1, a Docker + # service name) but clients must use a public hostname. Only scheme and + # host are used; the signed path and query are preserved. For S3/MinIO + # the reverse proxy at this address must forward requests with the + # internal Host header or the SigV4 signature will not validate. + # direct_serve_base_url: "https://minio.example.com" + # Database configuration database: # Database driver: "sqlite" (default) or "postgres" diff --git a/go.mod b/go.mod index 1067a3a..6226ab6 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,8 @@ require ( require ( 4d63.com/gocheckcompilerdirectives v1.3.0 // indirect 4d63.com/gochecknoglobals v0.2.2 // indirect + cloud.google.com/go/auth v0.18.2 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect codeberg.org/chavacava/garif v0.2.0 // indirect codeberg.org/polyfloyd/go-errorlint v1.9.0 // indirect @@ -40,6 +42,13 @@ require ( github.com/Antonboom/errname v1.1.1 // indirect github.com/Antonboom/nilnil v1.1.1 // indirect github.com/Antonboom/testifylint v1.6.4 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 // indirect + github.com/Azure/go-autorest v14.2.0+incompatible // indirect + github.com/Azure/go-autorest/autorest/to v0.4.1 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect github.com/BurntSushi/toml v1.6.0 // indirect github.com/Djarvur/go-err113 v0.1.1 // indirect github.com/KyleBanks/depth v1.2.1 // indirect @@ -138,6 +147,7 @@ require ( github.com/gobwas/glob v0.2.3 // indirect github.com/godoc-lint/godoc-lint v0.11.2 // indirect github.com/gofrs/flock v0.13.0 // indirect + github.com/golang-jwt/jwt/v5 v5.3.0 // indirect github.com/golangci/asciicheck v0.5.0 // indirect github.com/golangci/dupl v0.0.0-20250308024227-f665c8d69b32 // indirect github.com/golangci/go-printf-func-name v0.1.1 // indirect @@ -150,8 +160,10 @@ require ( github.com/golangci/swaggoswag v0.0.0-20250504205917-77f2aca3143e // indirect github.com/golangci/unconvert v0.0.0-20250410112200-a129a6e6413e // indirect github.com/google/go-cmp v0.7.0 // indirect + github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/google/wire v0.7.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.12 // indirect github.com/googleapis/gax-go/v2 v2.17.0 // indirect github.com/gordonklaus/ineffassign v0.2.0 // indirect github.com/gostaticanalysis/analysisutil v0.7.1 // indirect @@ -174,6 +186,7 @@ require ( github.com/kkHAIKE/contextcheck v1.1.6 // indirect github.com/kulti/thelper v0.7.1 // indirect github.com/kunwardeep/paralleltest v1.0.15 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/lasiar/canonicalheader v1.1.2 // indirect github.com/ldez/exptostd v0.4.5 // indirect github.com/ldez/gomoddirectives v0.8.0 // indirect @@ -209,6 +222,7 @@ require ( github.com/pandatix/go-cvss v0.6.2 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect + github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/common v0.67.5 // indirect github.com/prometheus/procfs v0.20.1 // indirect @@ -277,10 +291,12 @@ require ( go.uber.org/zap v1.27.1 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.48.0 // indirect golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect golang.org/x/exp/typeparams v0.0.0-20260209203927-2842357ff358 // indirect golang.org/x/mod v0.33.0 // indirect golang.org/x/net v0.51.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.34.0 // indirect golang.org/x/tools v0.42.0 // indirect diff --git a/go.sum b/go.sum index 49b11f9..26647ed 100644 --- a/go.sum +++ b/go.sum @@ -43,6 +43,26 @@ github.com/Antonboom/nilnil v1.1.1 h1:9Mdr6BYd8WHCDngQnNVV0b554xyisFioEKi30sksuf github.com/Antonboom/nilnil v1.1.1/go.mod h1:yCyAmSw3doopbOWhJlVci+HuyNRuHJKIv6V2oYQa8II= github.com/Antonboom/testifylint v1.6.4 h1:gs9fUEy+egzxkEbq9P4cpcMB6/G0DYdMeiFS87UiqmQ= github.com/Antonboom/testifylint v1.6.4/go.mod h1:YO33FROXX2OoUfwjz8g+gUxQXio5i9qpVy7nXGbxDD4= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1 h1:/Zt+cDPnpC3OVDm/JKLOs7M2DKmLRIIp3XIx9pHHiig= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1/go.mod h1:Ng3urmn6dYe8gnbCMoHHVl5APYz2txho3koEkV2o2HA= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 h1:ZJJNFaQ86GVKQ9ehwqyAFE6pIfyicpuJ8IkVaPBc6/4= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3/go.mod h1:URuDvhmATVKqHBH9/0nOiNKk0+YcwfQ3WkK5PqHKxc8= +github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest/to v0.4.1 h1:CxNHBqdzTr7rLtdrtb5CMjJcDut+WNGCVv7OmS5+lTc= +github.com/Azure/go-autorest/autorest/to v0.4.1/go.mod h1:EtaofgU4zmtvn1zT2ARsjRFdq9vXx0YWtmElwL+GZ9M= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= @@ -305,6 +325,8 @@ github.com/godoc-lint/godoc-lint v0.11.2 h1:Bp0FkJWoSdNsBikdNgIcgtaoo+xz6I/Y9s5W github.com/godoc-lint/godoc-lint v0.11.2/go.mod h1:iVpGdL1JCikNH2gGeAn3Hh+AgN5Gx/I/cxV+91L41jo= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golangci/asciicheck v0.5.0 h1:jczN/BorERZwK8oiFBOGvlGPknhvq0bjnysTj4nUfo0= @@ -396,6 +418,8 @@ github.com/julz/importas v0.2.0 h1:y+MJN/UdL63QbFJHws9BVC5RpA2iq0kpjrFajTGivjQ= github.com/julz/importas v0.2.0/go.mod h1:pThlt589EnCYtMnmhmRYY/qn9lCf/frPOK+WMx3xiJY= github.com/karamaru-alpha/copyloopvar v1.2.2 h1:yfNQvP9YaGQR7VaWLYcfZUlRP2eo2vhExWKxD/fP6q0= github.com/karamaru-alpha/copyloopvar v1.2.2/go.mod h1:oY4rGZqZ879JkJMtX3RRkcXRkmUvH0x35ykgaKgsgJY= +github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU= +github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k= github.com/kisielk/errcheck v1.9.0 h1:9xt1zI9EBfcYBvdU1nVrzMzzUPUtPKs9bVSIM3TAb3M= github.com/kisielk/errcheck v1.9.0/go.mod h1:kQxWMMVZgIkDq7U8xtG/n2juOjbLgZtedi0D+/VL/i8= github.com/kkHAIKE/contextcheck v1.1.6 h1:7HIyRcnyzxL9Lz06NGhiKvenXq7Zw6Q0UQu/ttjfJCE= @@ -509,6 +533,8 @@ github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0 github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea h1:sKwxy1H95npauwu8vtF95vG/syrL0p8fSZo/XlDg5gk= github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea/go.mod h1:1VcHEd3ro4QMoHfiNl/j7Jkln9+KQuorp0PItHMJYNg= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -771,6 +797,7 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211105183446-c75c47738b0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/config/config.go b/internal/config/config.go index ad0acc0..7dafab1 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -51,6 +51,7 @@ import ( "encoding/base64" "encoding/json" "fmt" + "net/url" "os" "path/filepath" "strconv" @@ -133,6 +134,21 @@ type StorageConfig struct { // When exceeded, least recently used artifacts are evicted. // Empty or "0" means unlimited. MaxSize string `json:"max_size" yaml:"max_size"` + + // DirectServe enables redirecting cached artifact downloads to presigned + // storage URLs (HTTP 302) instead of streaming bytes through the proxy. + // Only effective for backends that support URL signing (S3, Azure). + DirectServe bool `json:"direct_serve" yaml:"direct_serve"` + + // DirectServeTTL is how long presigned URLs remain valid. + // Uses Go duration syntax (e.g. "5m", "1h"). Default: "15m". + DirectServeTTL string `json:"direct_serve_ttl" yaml:"direct_serve_ttl"` + + // DirectServeBaseURL overrides the scheme and host of presigned URLs + // before returning them to clients. Useful when the proxy reaches + // storage at an internal address (e.g. 127.0.0.1 or a Docker hostname) + // but clients must use a public one. + DirectServeBaseURL string `json:"direct_serve_base_url" yaml:"direct_serve_base_url"` } // DatabaseConfig configures the cache database. @@ -303,6 +319,15 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_STORAGE_MAX_SIZE"); v != "" { c.Storage.MaxSize = v } + if v := os.Getenv("PROXY_STORAGE_DIRECT_SERVE"); v != "" { + c.Storage.DirectServe = envBool(v) + } + if v := os.Getenv("PROXY_STORAGE_DIRECT_SERVE_TTL"); v != "" { + c.Storage.DirectServeTTL = v + } + if v := os.Getenv("PROXY_STORAGE_DIRECT_SERVE_BASE_URL"); v != "" { + c.Storage.DirectServeBaseURL = v + } if v := os.Getenv("PROXY_DATABASE_DRIVER"); v != "" { c.Database.Driver = v } @@ -322,10 +347,10 @@ func (c *Config) LoadFromEnv() { c.Cooldown.Default = v } if v := os.Getenv("PROXY_CACHE_METADATA"); v != "" { - c.CacheMetadata = v == "true" || v == "1" + c.CacheMetadata = envBool(v) } if v := os.Getenv("PROXY_MIRROR_API"); v != "" { - c.MirrorAPI = v == "true" || v == "1" + c.MirrorAPI = envBool(v) } if v := os.Getenv("PROXY_METADATA_TTL"); v != "" { c.MetadataTTL = v @@ -379,6 +404,21 @@ func (c *Config) Validate() error { } } + // Validate direct serve TTL if specified + if c.Storage.DirectServeTTL != "" { + if _, err := time.ParseDuration(c.Storage.DirectServeTTL); err != nil { + return fmt.Errorf("invalid storage.direct_serve_ttl %q: %w", c.Storage.DirectServeTTL, err) + } + } + + // Validate direct serve base URL if specified + if c.Storage.DirectServeBaseURL != "" { + u, err := url.Parse(c.Storage.DirectServeBaseURL) + if err != nil || u.Scheme == "" || u.Host == "" { + return fmt.Errorf("invalid storage.direct_serve_base_url %q: must be an absolute URL", c.Storage.DirectServeBaseURL) + } + } + // Validate metadata TTL if specified if c.MetadataTTL != "" && c.MetadataTTL != "0" { if _, err := time.ParseDuration(c.MetadataTTL); err != nil { @@ -389,7 +429,10 @@ func (c *Config) Validate() error { return nil } -const defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default +const ( + defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default + defaultDirectServeTTL = 15 * time.Minute //nolint:mnd // sensible default +) // ParseMetadataTTL returns the metadata TTL duration. // Returns 5 minutes if unset, 0 if explicitly disabled. @@ -407,6 +450,19 @@ func (c *Config) ParseMetadataTTL() time.Duration { return d } +// ParseDirectServeTTL returns the presigned URL expiry duration. +// Returns 15 minutes if unset. +func (c *Config) ParseDirectServeTTL() time.Duration { + if c.Storage.DirectServeTTL == "" { + return defaultDirectServeTTL + } + d, err := time.ParseDuration(c.Storage.DirectServeTTL) + if err != nil { + return defaultDirectServeTTL + } + return d +} + // ParseSize parses a human-readable size string (e.g., "10GB", "500MB"). // Returns the size in bytes. func ParseSize(s string) (int64, error) { @@ -487,3 +543,7 @@ func (a *AuthConfig) Header() (name, value string) { func expandEnv(s string) string { return os.Expand(s, os.Getenv) } + +func envBool(v string) bool { + return v == "true" || v == "1" +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 6e8c3a0..2d85fd6 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -355,3 +355,77 @@ func TestLoadMetadataTTLFromEnv(t *testing.T) { t.Errorf("MetadataTTL = %q, want %q", cfg.MetadataTTL, "10m") } } + +func TestParseDirectServeTTL(t *testing.T) { + tests := []struct { + name string + ttl string + want time.Duration + }{ + {"empty defaults to 15m", "", 15 * time.Minute}, + {"5 minutes", "5m", 5 * time.Minute}, + {"1 hour", "1h", 1 * time.Hour}, + {"invalid defaults to 15m", "not-a-duration", 15 * time.Minute}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := Default() + cfg.Storage.DirectServeTTL = tt.ttl + got := cfg.ParseDirectServeTTL() + if got != tt.want { + t.Errorf("ParseDirectServeTTL() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestValidateDirectServeTTL(t *testing.T) { + cfg := Default() + cfg.Storage.DirectServeTTL = "invalid" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for invalid storage.direct_serve_ttl") + } + + cfg.Storage.DirectServeTTL = "5m" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for valid storage.direct_serve_ttl: %v", err) + } +} + +func TestLoadDirectServeFromEnv(t *testing.T) { + cfg := Default() + t.Setenv("PROXY_STORAGE_DIRECT_SERVE", "true") + t.Setenv("PROXY_STORAGE_DIRECT_SERVE_TTL", "30m") + t.Setenv("PROXY_STORAGE_DIRECT_SERVE_BASE_URL", "https://cdn.example.com") + cfg.LoadFromEnv() + + if !cfg.Storage.DirectServe { + t.Error("Storage.DirectServe should be true") + } + if cfg.Storage.DirectServeTTL != "30m" { + t.Errorf("Storage.DirectServeTTL = %q, want %q", cfg.Storage.DirectServeTTL, "30m") + } + if cfg.Storage.DirectServeBaseURL != "https://cdn.example.com" { + t.Errorf("Storage.DirectServeBaseURL = %q, want %q", cfg.Storage.DirectServeBaseURL, "https://cdn.example.com") + } +} + +func TestValidateDirectServeBaseURL(t *testing.T) { + cfg := Default() + + cfg.Storage.DirectServeBaseURL = "not a url" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for relative direct_serve_base_url") + } + + cfg.Storage.DirectServeBaseURL = "://bad" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for unparseable direct_serve_base_url") + } + + cfg.Storage.DirectServeBaseURL = "https://cdn.example.com" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for valid direct_serve_base_url: %v", err) + } +} diff --git a/internal/handler/handler.go b/internal/handler/handler.go index d7d79c9..746b9e8 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -10,6 +10,7 @@ import ( "io" "log/slog" "net/http" + "net/url" "strconv" "strings" "time" @@ -61,15 +62,21 @@ func ReadMetadata(r io.Reader) ([]byte, error) { // Proxy provides shared functionality for protocol handlers. type Proxy struct { - DB *database.DB - Storage storage.Storage - Fetcher fetch.FetcherInterface - Resolver *fetch.Resolver - Logger *slog.Logger - Cooldown *cooldown.Config - CacheMetadata bool - MetadataTTL time.Duration - HTTPClient *http.Client + DB *database.DB + Storage storage.Storage + Fetcher fetch.FetcherInterface + Resolver *fetch.Resolver + Logger *slog.Logger + Cooldown *cooldown.Config + CacheMetadata bool + MetadataTTL time.Duration + DirectServe bool + DirectServeTTL time.Duration + // DirectServeBaseURL, if set, replaces the scheme and host of presigned + // URLs so clients receive a public address even when the proxy reaches + // storage at an internal one. + DirectServeBaseURL string + HTTPClient *http.Client } // NewProxy creates a new Proxy with the given dependencies. @@ -92,6 +99,7 @@ func NewProxy(db *database.DB, store storage.Storage, fetcher fetch.FetcherInter // CacheResult contains information about a cached or fetched artifact. type CacheResult struct { Reader io.ReadCloser + RedirectURL string Size int64 ContentType string Hash string @@ -138,6 +146,26 @@ func (p *Proxy) checkCache(ctx context.Context, pkgPURL, versionPURL, filename s return nil, nil } + result := &CacheResult{ + Size: artifact.Size.Int64, + ContentType: artifact.ContentType.String, + Hash: artifact.ContentHash.String, + Cached: true, + } + + if p.DirectServe { + signed, err := p.Storage.SignedURL(ctx, artifact.StoragePath.String, p.DirectServeTTL) + if err == nil { + result.RedirectURL = rewriteSignedURLHost(signed, p.DirectServeBaseURL) + p.recordCacheHit(pkgPURL, versionPURL, filename) + return result, nil + } + if !errors.Is(err, storage.ErrSignedURLUnsupported) { + p.Logger.Warn("failed to sign storage URL, falling back to streaming", + "path", artifact.StoragePath.String, "error", err) + } + } + start := time.Now() reader, err := p.Storage.Open(ctx, artifact.StoragePath.String) metrics.RecordStorageOperation("read", time.Since(start)) @@ -148,20 +176,36 @@ func (p *Proxy) checkCache(ctx context.Context, pkgPURL, versionPURL, filename s return nil, nil } - _ = p.DB.RecordArtifactHit(versionPURL, filename) + result.Reader = reader + p.recordCacheHit(pkgPURL, versionPURL, filename) + return result, nil +} - // Extract ecosystem from pkgPURL for metrics - if p, err := purl.Parse(pkgPURL); err == nil { - metrics.RecordCacheHit(purl.PURLTypeToEcosystem(p.Type)) +// rewriteSignedURLHost replaces the scheme and host of a signed URL with those +// from baseURL, preserving the path and query (which carry the signature). +// Returns signed unchanged if baseURL is empty or either URL fails to parse. +func rewriteSignedURLHost(signed, baseURL string) string { + if baseURL == "" { + return signed + } + s, err := url.Parse(signed) + if err != nil { + return signed + } + b, err := url.Parse(baseURL) + if err != nil || b.Scheme == "" || b.Host == "" { + return signed } + s.Scheme = b.Scheme + s.Host = b.Host + return s.String() +} - return &CacheResult{ - Reader: reader, - Size: artifact.Size.Int64, - ContentType: artifact.ContentType.String, - Hash: artifact.ContentHash.String, - Cached: true, - }, nil +func (p *Proxy) recordCacheHit(pkgPURL, versionPURL, filename string) { + _ = p.DB.RecordArtifactHit(versionPURL, filename) + if parsed, err := purl.Parse(pkgPURL); err == nil { + metrics.RecordCacheHit(purl.PURLTypeToEcosystem(parsed.Type)) + } } func (p *Proxy) fetchAndCache(ctx context.Context, ecosystem, name, version, filename, pkgPURL, versionPURL string) (*CacheResult, error) { @@ -276,6 +320,15 @@ func (p *Proxy) updateCacheDB(ecosystem, name, filename, pkgPURL, versionPURL, u // ServeArtifact writes a CacheResult to an HTTP response. func ServeArtifact(w http.ResponseWriter, result *CacheResult) { + if result.RedirectURL != "" { + if result.Hash != "" { + w.Header().Set("ETag", fmt.Sprintf(`"%s"`, result.Hash)) + } + w.Header().Set("Location", result.RedirectURL) + w.WriteHeader(http.StatusFound) + return + } + defer func() { _ = result.Reader.Close() }() if result.ContentType != "" { diff --git a/internal/handler/handler_test.go b/internal/handler/handler_test.go index 78ed415..3a1d2ab 100644 --- a/internal/handler/handler_test.go +++ b/internal/handler/handler_test.go @@ -21,9 +21,11 @@ import ( // mockStorage implements storage.Storage for testing. type mockStorage struct { - files map[string][]byte - storeErr error - openErr error + files map[string][]byte + storeErr error + openErr error + signedURL string + signErr error } func newMockStorage() *mockStorage { @@ -79,6 +81,16 @@ func (s *mockStorage) UsedSpace(_ context.Context) (int64, error) { return total, nil } +func (s *mockStorage) SignedURL(_ context.Context, _ string, _ time.Duration) (string, error) { + if s.signErr != nil { + return "", s.signErr + } + if s.signedURL == "" { + return "", storage.ErrSignedURLUnsupported + } + return s.signedURL, nil +} + func (s *mockStorage) URL() string { return "mem://" } func (s *mockStorage) Close() error { return nil } @@ -311,6 +323,213 @@ func TestGetOrFetchArtifactFromURL_CacheMiss_StorageMissing(t *testing.T) { } } +func TestGetOrFetchArtifact_DirectServe_Redirect(t *testing.T) { + proxy, db, store, fetcher := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached content") + + proxy.DirectServe = true + proxy.DirectServeTTL = 15 * time.Minute + store.signedURL = "https://bucket.s3.amazonaws.com/npm/lodash?X-Amz-Signature=abc" + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !result.Cached { + t.Error("expected result to be cached") + } + if result.RedirectURL != store.signedURL { + t.Errorf("RedirectURL = %q, want %q", result.RedirectURL, store.signedURL) + } + if result.Reader != nil { + t.Error("Reader should be nil when redirecting") + } + if fetcher.fetchCalled { + t.Error("fetcher should not be called on cache hit") + } + + // Hit count should still be recorded on the redirect path. + art, _ := db.GetArtifact("pkg:npm/lodash@4.17.21", "lodash-4.17.21.tgz") + if art == nil || art.HitCount != 1 { + t.Errorf("artifact hit count not recorded: %+v", art) + } +} + +func TestGetOrFetchArtifact_DirectServe_BaseURLRewrite(t *testing.T) { + proxy, db, store, _ := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached content") + + proxy.DirectServe = true + proxy.DirectServeBaseURL = "https://cdn.example.com" + store.signedURL = "http://127.0.0.1:9000/bucket/npm/lodash?X-Amz-Signature=abc&X-Amz-Expires=900" + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + want := "https://cdn.example.com/bucket/npm/lodash?X-Amz-Signature=abc&X-Amz-Expires=900" + if result.RedirectURL != want { + t.Errorf("RedirectURL = %q, want %q", result.RedirectURL, want) + } +} + +func TestRewriteSignedURLHost(t *testing.T) { + tests := []struct { + name string + signed string + baseURL string + want string + }{ + { + "empty base url is no-op", + "http://127.0.0.1:9000/bucket/key?sig=abc", + "", + "http://127.0.0.1:9000/bucket/key?sig=abc", + }, + { + "replaces scheme and host", + "http://127.0.0.1:9000/bucket/key?sig=abc", + "https://cdn.example.com", + "https://cdn.example.com/bucket/key?sig=abc", + }, + { + "preserves path and query", + "http://minio:9000/bucket/npm/lodash/4.17.21/lodash.tgz?X-Amz-Signature=abc&X-Amz-Date=20260101", + "https://files.example.com", + "https://files.example.com/bucket/npm/lodash/4.17.21/lodash.tgz?X-Amz-Signature=abc&X-Amz-Date=20260101", + }, + { + "ignores base url path", + "http://127.0.0.1:9000/bucket/key?sig=abc", + "https://cdn.example.com/ignored", + "https://cdn.example.com/bucket/key?sig=abc", + }, + { + "invalid base url is no-op", + "http://127.0.0.1:9000/bucket/key?sig=abc", + "://bad", + "http://127.0.0.1:9000/bucket/key?sig=abc", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := rewriteSignedURLHost(tt.signed, tt.baseURL) + if got != tt.want { + t.Errorf("rewriteSignedURLHost(%q, %q) = %q, want %q", tt.signed, tt.baseURL, got, tt.want) + } + }) + } +} + +func TestGetOrFetchArtifact_DirectServe_FallbackOnUnsupported(t *testing.T) { + proxy, db, store, _ := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached content") + + proxy.DirectServe = true + // store.signedURL is empty so SignedURL returns ErrSignedURLUnsupported. + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = result.Reader.Close() }() + + if result.RedirectURL != "" { + t.Errorf("RedirectURL should be empty, got %q", result.RedirectURL) + } + if result.Reader == nil { + t.Fatal("Reader should be set when signing is unsupported") + } + body, _ := io.ReadAll(result.Reader) + if string(body) != "cached content" { + t.Errorf("got body %q, want %q", body, "cached content") + } +} + +func TestGetOrFetchArtifact_DirectServe_FallbackOnError(t *testing.T) { + proxy, db, store, _ := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached content") + + proxy.DirectServe = true + store.signErr = errors.New("signing failed") + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = result.Reader.Close() }() + + if result.RedirectURL != "" { + t.Errorf("RedirectURL should be empty on signing error, got %q", result.RedirectURL) + } + if result.Reader == nil { + t.Fatal("Reader should be set when signing fails") + } +} + +func TestGetOrFetchArtifact_DirectServe_DisabledIgnoresSigning(t *testing.T) { + proxy, db, store, _ := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached content") + + proxy.DirectServe = false + store.signedURL = "https://bucket.example/should-not-be-used" + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = result.Reader.Close() }() + + if result.RedirectURL != "" { + t.Errorf("RedirectURL should be empty when DirectServe is off, got %q", result.RedirectURL) + } +} + +func TestServeArtifact_Redirect(t *testing.T) { + w := httptest.NewRecorder() + ServeArtifact(w, &CacheResult{ + RedirectURL: "https://bucket.s3.amazonaws.com/file?sig=abc", + Hash: "abc123", + Cached: true, + }) + + if w.Code != http.StatusFound { + t.Errorf("status = %d, want %d", w.Code, http.StatusFound) + } + if loc := w.Header().Get("Location"); loc != "https://bucket.s3.amazonaws.com/file?sig=abc" { + t.Errorf("Location = %q", loc) + } + if etag := w.Header().Get("ETag"); etag != `"abc123"` { + t.Errorf("ETag = %q, want %q", etag, `"abc123"`) + } + if cl := w.Header().Get("Content-Length"); cl != "" { + t.Errorf("Content-Length should not be set on redirect, got %q", cl) + } +} + +func TestServeArtifact_Stream(t *testing.T) { + w := httptest.NewRecorder() + ServeArtifact(w, &CacheResult{ + Reader: io.NopCloser(strings.NewReader("payload")), + Size: 7, + ContentType: "application/octet-stream", + Hash: "abc123", + }) + + if w.Code != http.StatusOK { + t.Errorf("status = %d, want %d", w.Code, http.StatusOK) + } + if w.Body.String() != "payload" { + t.Errorf("body = %q, want %q", w.Body.String(), "payload") + } + if ct := w.Header().Get("Content-Type"); ct != "application/octet-stream" { + t.Errorf("Content-Type = %q", ct) + } +} + func TestGetOrFetchArtifactFromURL_CacheHit(t *testing.T) { proxy, db, store, fetcher := setupTestProxy(t) seedPackage(t, db, store, "pypi", "requests", "2.28.0", "requests-2.28.0.tar.gz", "pypi content") diff --git a/internal/server/server.go b/internal/server/server.go index 5d544a2..fdfda20 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -148,6 +148,9 @@ func (s *Server) Start() error { proxy.Cooldown = cd proxy.CacheMetadata = s.cfg.CacheMetadata proxy.MetadataTTL = s.cfg.ParseMetadataTTL() + proxy.DirectServe = s.cfg.Storage.DirectServe + proxy.DirectServeTTL = s.cfg.ParseDirectServeTTL() + proxy.DirectServeBaseURL = s.cfg.Storage.DirectServeBaseURL // Create router with Chi r := chi.NewRouter() diff --git a/internal/storage/blob.go b/internal/storage/blob.go index 2d6af46..dc41668 100644 --- a/internal/storage/blob.go +++ b/internal/storage/blob.go @@ -6,12 +6,15 @@ import ( "encoding/hex" "fmt" "io" + "net/http" "os" "path/filepath" "runtime" "strings" + "time" "gocloud.dev/blob" + _ "gocloud.dev/blob/azureblob" _ "gocloud.dev/blob/fileblob" _ "gocloud.dev/blob/s3blob" "gocloud.dev/gcerrors" @@ -138,6 +141,20 @@ func (b *Blob) Delete(ctx context.Context, path string) error { return nil } +func (b *Blob) SignedURL(ctx context.Context, path string, expiry time.Duration) (string, error) { + url, err := b.bucket.SignedURL(ctx, path, &blob.SignedURLOptions{ + Method: http.MethodGet, + Expiry: expiry, + }) + if err != nil { + if gcerrors.Code(err) == gcerrors.Unimplemented { + return "", ErrSignedURLUnsupported + } + return "", fmt.Errorf("signing URL: %w", err) + } + return url, nil +} + func (b *Blob) Size(ctx context.Context, path string) (int64, error) { attrs, err := b.bucket.Attributes(ctx, path) if err != nil { diff --git a/internal/storage/blob_test.go b/internal/storage/blob_test.go index bb2d089..d80290b 100644 --- a/internal/storage/blob_test.go +++ b/internal/storage/blob_test.go @@ -10,6 +10,7 @@ import ( "runtime" "strings" "testing" + "time" ) func TestOpenBucket(t *testing.T) { @@ -188,6 +189,18 @@ func TestBlobLargeFile(t *testing.T) { assertLargeFileRoundTrip(t, createTestBlob(t)) } +func TestBlobSignedURLUnsupported(t *testing.T) { + b := createTestBlob(t) + ctx := context.Background() + + // fileblob has no URL signer configured, so this must surface as + // ErrSignedURLUnsupported rather than a generic error. + _, err := b.SignedURL(ctx, "test/file.txt", time.Minute) + if !errors.Is(err, ErrSignedURLUnsupported) { + t.Errorf("SignedURL on fileblob = %v, want ErrSignedURLUnsupported", err) + } +} + func TestBlobOverwrite(t *testing.T) { b := createTestBlob(t) ctx := context.Background() diff --git a/internal/storage/filesystem.go b/internal/storage/filesystem.go index cf6a1fe..7a4147a 100644 --- a/internal/storage/filesystem.go +++ b/internal/storage/filesystem.go @@ -8,6 +8,7 @@ import ( "io" "os" "path/filepath" + "time" ) // Filesystem implements Storage using the local filesystem. @@ -129,6 +130,10 @@ func (fs *Filesystem) Delete(ctx context.Context, path string) error { return nil } +func (fs *Filesystem) SignedURL(_ context.Context, _ string, _ time.Duration) (string, error) { + return "", ErrSignedURLUnsupported +} + func (fs *Filesystem) Size(ctx context.Context, path string) (int64, error) { fullPath := fs.fullPath(path) diff --git a/internal/storage/filesystem_test.go b/internal/storage/filesystem_test.go index 7b7828d..cdd7f6b 100644 --- a/internal/storage/filesystem_test.go +++ b/internal/storage/filesystem_test.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "testing" + "time" ) func TestNewFilesystem(t *testing.T) { @@ -236,6 +237,15 @@ func TestFilesystemLargeFile(t *testing.T) { assertLargeFileRoundTrip(t, createTestFilesystem(t)) } +func TestFilesystemSignedURLUnsupported(t *testing.T) { + fs := createTestFilesystem(t) + + _, err := fs.SignedURL(context.Background(), "test/file.txt", time.Minute) + if !errors.Is(err, ErrSignedURLUnsupported) { + t.Errorf("SignedURL = %v, want ErrSignedURLUnsupported", err) + } +} + func createTestFilesystem(t *testing.T) *Filesystem { t.Helper() dir := t.TempDir() diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 8a9026c..0dba46a 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -15,12 +15,17 @@ import ( "encoding/hex" "errors" "io" + "time" ) const dirPermissions = 0755 var ( ErrNotFound = errors.New("artifact not found") + + // ErrSignedURLUnsupported is returned by SignedURL when the backend + // cannot generate presigned URLs (e.g. local filesystem). + ErrSignedURLUnsupported = errors.New("signed URLs not supported by storage backend") ) // Storage defines the interface for artifact storage backends. @@ -45,6 +50,10 @@ type Storage interface { // Returns ErrNotFound if the path does not exist. Size(ctx context.Context, path string) (int64, error) + // SignedURL returns a presigned URL granting time-limited GET access to path. + // Returns ErrSignedURLUnsupported if the backend cannot generate presigned URLs. + SignedURL(ctx context.Context, path string, expiry time.Duration) (string, error) + // UsedSpace returns the total bytes used by all stored content. UsedSpace(ctx context.Context) (int64, error)