diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 82ee83c..e0d3cfb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -38,7 +38,7 @@ jobs: images: ghcr.io/${{ github.repository }} - name: Build and push Docker image - uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f with: context: . push: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c23de56..baba90b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,7 +27,7 @@ jobs: go-version-file: go.mod cache: false - - uses: goreleaser/goreleaser-action@5daf1e915a5f0af01ddbcd89a43b8061ff4f1a89 # v7.2.2 + - uses: goreleaser/goreleaser-action@1a80836c5c9d9e5755a25cb59ec6f45a3b5f41a8 # v7.2.1 with: version: "~> v2" args: release --clean diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml index f621254..e34bdc6 100644 --- a/.github/workflows/zizmor.yml +++ b/.github/workflows/zizmor.yml @@ -26,4 +26,4 @@ jobs: persist-credentials: false - name: Run zizmor - uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 + uses: zizmorcore/zizmor-action@a16621b09c6db4281f81a93cb393b05dcd7b7165 # v0.5.5 diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go index 15a71c0..a8c8bdf 100644 --- a/cmd/proxy/main.go +++ b/cmd/proxy/main.go @@ -470,7 +470,6 @@ func runMirror() { proxy := handler.NewProxy(db, store, fetcher, resolver, logger) proxy.CacheMetadata = true // mirror always caches metadata proxy.MetadataTTL = cfg.ParseMetadataTTL() - proxy.MetadataMaxSize = cfg.ParseMetadataMaxSize() m := mirror.New(proxy, db, store, logger, *concurrency) diff --git a/docs/configuration.md b/docs/configuration.md index 1310bd0..cf6c101 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -265,16 +265,6 @@ Set to `"0"` to always revalidate with upstream (ETag-based conditional requests When upstream is unreachable and the cached entry is past its TTL, the proxy serves the stale cached copy with a `Warning: 110 - "Response is Stale"` header so clients can tell the data may be outdated. -### Metadata size limit - -Upstream metadata responses are buffered in memory before being rewritten and served. `metadata_max_size` caps that buffer to protect against OOM from a misbehaving upstream. Some npm packages with thousands of versions (for example `renovate`) exceed the 100 MB default, so raise this if you see `metadata response exceeds size limit` in the logs. - -```yaml -metadata_max_size: "100MB" # default -``` - -Or via environment variable: `PROXY_METADATA_MAX_SIZE=250MB`. - ## Mirror API The `/api/mirror` endpoints are disabled by default. Enable them to allow starting mirror jobs via HTTP: diff --git a/internal/config/config.go b/internal/config/config.go index 0e8405d..87e23ac 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -96,11 +96,6 @@ type Config struct { // Default: "5m". Set to "0" to always revalidate. MetadataTTL string `json:"metadata_ttl" yaml:"metadata_ttl"` - // MetadataMaxSize is the maximum size of an upstream metadata response - // the proxy will buffer (e.g. "100MB", "250MB"). Responses over this - // size return ErrMetadataTooLarge. Default: "100MB". - MetadataMaxSize string `json:"metadata_max_size" yaml:"metadata_max_size"` - // MirrorAPI enables the /api/mirror endpoints for starting mirror jobs via HTTP. // Disabled by default to prevent unauthenticated users from triggering downloads. MirrorAPI bool `json:"mirror_api" yaml:"mirror_api"` @@ -429,9 +424,6 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_METADATA_TTL"); v != "" { c.MetadataTTL = v } - if v := os.Getenv("PROXY_METADATA_MAX_SIZE"); v != "" { - c.MetadataMaxSize = v - } if v := os.Getenv("PROXY_GRADLE_BUILD_CACHE_READ_ONLY"); v != "" { c.Gradle.BuildCache.ReadOnly = v == "true" || v == "1" } @@ -521,10 +513,6 @@ func (c *Config) Validate() error { } } - if err := validateMetadataMaxSize(c.MetadataMaxSize); err != nil { - return err - } - if err := c.Health.Validate(); err != nil { return err } @@ -594,7 +582,6 @@ func (g *GradleBuildCacheConfig) Validate() error { const ( defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default defaultDirectServeTTL = 15 * time.Minute //nolint:mnd // sensible default - defaultMetadataMaxSize = 100 << 20 defaultGradleBuildCacheMaxUploadSize = 100 << 20 defaultGradleBuildCacheSweepInterval = 10 * time.Minute defaultGradleMaxUploadSizeStr = "100MB" @@ -614,33 +601,6 @@ func (c *Config) ParseMaxSize() int64 { return size } -func validateMetadataMaxSize(s string) error { - if s == "" { - return nil - } - size, err := ParseSize(s) - if err != nil { - return fmt.Errorf("invalid metadata_max_size: %w", err) - } - if size <= 0 { - return fmt.Errorf("invalid metadata_max_size %q: must be positive", s) - } - return nil -} - -// ParseMetadataMaxSize returns the maximum metadata response size in bytes. -// Returns 100MB if unset or invalid. -func (c *Config) ParseMetadataMaxSize() int64 { - if c.MetadataMaxSize == "" { - return defaultMetadataMaxSize - } - size, err := ParseSize(c.MetadataMaxSize) - if err != nil || size <= 0 { - return defaultMetadataMaxSize - } - return size -} - // ParseMetadataTTL returns the metadata TTL duration. // Returns 5 minutes if unset, 0 if explicitly disabled. func (c *Config) ParseMetadataTTL() time.Duration { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index d633c25..05fec3a 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -428,52 +428,6 @@ func TestParseMetadataTTL(t *testing.T) { } } -func TestParseMetadataMaxSize(t *testing.T) { - tests := []struct { - name string - size string - want int64 - }{ - {"unset uses default", "", defaultMetadataMaxSize}, - {"explicit value", "250MB", 250 << 20}, - {"bytes", "1024", 1024}, - {"invalid uses default", "lots", defaultMetadataMaxSize}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - cfg := Default() - cfg.MetadataMaxSize = tt.size - got := cfg.ParseMetadataMaxSize() - if got != tt.want { - t.Errorf("ParseMetadataMaxSize() = %d, want %d", got, tt.want) - } - }) - } -} - -func TestValidateMetadataMaxSize(t *testing.T) { - cfg := Default() - cfg.MetadataMaxSize = "not-a-size" - if err := cfg.Validate(); err == nil { - t.Error("expected validation error for invalid metadata_max_size") - } - - cfg.MetadataMaxSize = "0" - if err := cfg.Validate(); err == nil { - t.Error("expected validation error for zero metadata_max_size") - } - - cfg.MetadataMaxSize = "250MB" - if err := cfg.Validate(); err != nil { - t.Errorf("unexpected error for valid metadata_max_size: %v", err) - } - - cfg.MetadataMaxSize = "" - if err := cfg.Validate(); err != nil { - t.Errorf("unexpected error for unset metadata_max_size: %v", err) - } -} - func TestValidateMetadataTTL(t *testing.T) { cfg := Default() cfg.MetadataTTL = "invalid" diff --git a/internal/handler/conda.go b/internal/handler/conda.go index cfa20c8..1336f94 100644 --- a/internal/handler/conda.go +++ b/internal/handler/conda.go @@ -161,7 +161,7 @@ func (h *CondaHandler) handleRepodata(w http.ResponseWriter, r *http.Request) { return } - body, err := h.proxy.ReadMetadata(resp.Body) + body, err := ReadMetadata(resp.Body) if err != nil { http.Error(w, "failed to read response", http.StatusInternalServerError) return diff --git a/internal/handler/handler.go b/internal/handler/handler.go index d06ca83..3df2777 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -52,25 +52,23 @@ const contentTypeJSON = "application/json" const headerAcceptEncoding = "Accept-Encoding" -// defaultMetadataMaxSize is used when Proxy.MetadataMaxSize is unset. -const defaultMetadataMaxSize = 100 << 20 +// maxMetadataSize is the maximum size of upstream metadata responses (100 MB). +// Package metadata (e.g. npm with many versions) can be large, but unbounded +// reads risk OOM if an upstream misbehaves. +const maxMetadataSize = 100 << 20 -// ErrMetadataTooLarge is returned when upstream metadata exceeds the configured limit. +// ErrMetadataTooLarge is returned when upstream metadata exceeds maxMetadataSize. var ErrMetadataTooLarge = errors.New("metadata response exceeds size limit") // ReadMetadata reads an upstream response body with a size limit to prevent OOM // from unexpectedly large responses. Returns ErrMetadataTooLarge if the response // is truncated by the limit. -func (p *Proxy) ReadMetadata(r io.Reader) ([]byte, error) { - limit := p.MetadataMaxSize - if limit <= 0 { - limit = defaultMetadataMaxSize - } - data, err := io.ReadAll(io.LimitReader(r, limit+1)) +func ReadMetadata(r io.Reader) ([]byte, error) { + data, err := io.ReadAll(io.LimitReader(r, maxMetadataSize+1)) if err != nil { return nil, err } - if int64(len(data)) > limit { + if int64(len(data)) > maxMetadataSize { return nil, ErrMetadataTooLarge } return data, nil @@ -86,7 +84,6 @@ type Proxy struct { Cooldown *cooldown.Config CacheMetadata bool MetadataTTL time.Duration - MetadataMaxSize int64 GradleReadOnly bool GradleMaxUploadSize int64 DirectServe bool @@ -477,7 +474,7 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u cached, readErr := p.Storage.Open(ctx, entry.StoragePath) if readErr == nil { defer func() { _ = cached.Close() }() - data, readErr := p.ReadMetadata(cached) + data, readErr := ReadMetadata(cached) if readErr == nil { ct := contentTypeJSON if entry.ContentType.Valid { @@ -522,7 +519,7 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u } defer func() { _ = cached.Close() }() - data, readErr := p.ReadMetadata(cached) + data, readErr := ReadMetadata(cached) if readErr != nil { return nil, "", fmt.Errorf("upstream failed and cached read error: %w", err) } @@ -564,7 +561,7 @@ func (p *Proxy) fetchUpstreamMetadata(ctx context.Context, upstreamURL string, e return nil, "", "", zeroTime, errStale304 } defer func() { _ = cached.Close() }() - data, readErr := p.ReadMetadata(cached) + data, readErr := ReadMetadata(cached) if readErr != nil { return nil, "", "", zeroTime, errStale304 } @@ -586,7 +583,7 @@ func (p *Proxy) fetchUpstreamMetadata(ctx context.Context, upstreamURL string, e return nil, "", "", zeroTime, fmt.Errorf("upstream returned %d", resp.StatusCode) } - body, err := p.ReadMetadata(resp.Body) + body, err := ReadMetadata(resp.Body) if err != nil { return nil, "", "", zeroTime, fmt.Errorf("reading response: %w", err) } diff --git a/internal/handler/nuget.go b/internal/handler/nuget.go index 40b8b5f..3cce7f8 100644 --- a/internal/handler/nuget.go +++ b/internal/handler/nuget.go @@ -193,7 +193,7 @@ func (h *NuGetHandler) handleRegistration(w http.ResponseWriter, r *http.Request return } - body, err := h.proxy.ReadMetadata(resp.Body) + body, err := ReadMetadata(resp.Body) if err != nil { http.Error(w, "failed to read response", http.StatusInternalServerError) return diff --git a/internal/handler/read_metadata_test.go b/internal/handler/read_metadata_test.go index b13bddb..60c1cf2 100644 --- a/internal/handler/read_metadata_test.go +++ b/internal/handler/read_metadata_test.go @@ -7,12 +7,9 @@ import ( ) func TestReadMetadata(t *testing.T) { - const limit = 1024 - p := &Proxy{MetadataMaxSize: limit} - t.Run("small body", func(t *testing.T) { data := []byte("hello world") - got, err := p.ReadMetadata(bytes.NewReader(data)) + got, err := ReadMetadata(bytes.NewReader(data)) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -22,39 +19,27 @@ func TestReadMetadata(t *testing.T) { }) t.Run("exactly at limit", func(t *testing.T) { - data := make([]byte, limit) + data := make([]byte, maxMetadataSize) for i := range data { data[i] = 'x' } - got, err := p.ReadMetadata(bytes.NewReader(data)) + got, err := ReadMetadata(bytes.NewReader(data)) if err != nil { t.Fatalf("unexpected error: %v", err) } - if len(got) != limit { - t.Errorf("got length %d, want %d", len(got), limit) + if len(got) != int(maxMetadataSize) { + t.Errorf("got length %d, want %d", len(got), maxMetadataSize) } }) t.Run("over limit returns error", func(t *testing.T) { - data := make([]byte, limit+100) + data := make([]byte, maxMetadataSize+100) for i := range data { data[i] = 'x' } - _, err := p.ReadMetadata(bytes.NewReader(data)) + _, err := ReadMetadata(bytes.NewReader(data)) if !errors.Is(err, ErrMetadataTooLarge) { t.Errorf("got error %v, want ErrMetadataTooLarge", err) } }) - - t.Run("zero limit uses default", func(t *testing.T) { - p := &Proxy{} - data := make([]byte, 1<<20) - got, err := p.ReadMetadata(bytes.NewReader(data)) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(got) != len(data) { - t.Errorf("got length %d, want %d", len(got), len(data)) - } - }) } diff --git a/internal/server/server.go b/internal/server/server.go index 7de5041..251386e 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -160,7 +160,6 @@ func (s *Server) Start() error { proxy.Cooldown = cd proxy.CacheMetadata = s.cfg.CacheMetadata proxy.MetadataTTL = s.cfg.ParseMetadataTTL() - proxy.MetadataMaxSize = s.cfg.ParseMetadataMaxSize() proxy.GradleReadOnly = s.cfg.Gradle.BuildCache.ReadOnly proxy.GradleMaxUploadSize = s.cfg.ParseGradleBuildCacheMaxUploadSize() proxy.DirectServe = s.cfg.Storage.DirectServe