diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e67fc10..784d851 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: persist-credentials: false - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: ${{ matrix.go-version }} @@ -40,7 +40,7 @@ jobs: persist-credentials: false - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: '1.25' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e34e687..69cac16 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: persist-credentials: false - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version-file: go.mod cache: false diff --git a/.github/workflows/swagger.yml b/.github/workflows/swagger.yml index 85c2fba..6bc3514 100644 --- a/.github/workflows/swagger.yml +++ b/.github/workflows/swagger.yml @@ -17,7 +17,7 @@ jobs: persist-credentials: false - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: '1.25' diff --git a/README.md b/README.md index ff35672..93ec8ba 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,14 @@ A 3-day cooldown means that when `lodash` publishes version `4.18.0`, your build Resolution order: package override, then ecosystem override, then global default. This lets you set a conservative default and carve out exceptions for packages where you need faster updates. -Currently works with npm, PyPI, pub.dev, and Composer, which all include publish timestamps in their metadata. See [docs/configuration.md](docs/configuration.md) for the full config reference. +Currently works with npm, PyPI, pub.dev, Composer, and Cargo, which all include publish timestamps in their metadata. See [docs/configuration.md](docs/configuration.md) for the full config reference. ## Supported Registries | Registry | Language/Platform | Cooldown | Completed | |----------|-------------------|:--------:|:---------:| | npm | JavaScript | Yes | ✓ | -| Cargo | Rust | | ✓ | +| Cargo | Rust | Yes | ✓ | | RubyGems | Ruby | | ✓ | | Go proxy | Go | | ✓ | | Hex | Elixir | | ✓ | @@ -688,7 +688,7 @@ Response: "cached_artifacts": 142, "total_size_bytes": 523456789, "total_size": "499.2 MB", - "storage_path": "./cache/artifacts", + "storage_url": "file:///path/to/cache/artifacts", "database_path": "./cache/proxy.db" } ``` diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 2343f32..76d835d 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -939,7 +939,7 @@ const docTemplate = `{ "database_path": { "type": "string" }, - "storage_path": { + "storage_url": { "type": "string" }, "total_size": { diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 0f2e364..8f0edb9 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -932,7 +932,7 @@ "database_path": { "type": "string" }, - "storage_path": { + "storage_url": { "type": "string" }, "total_size": { diff --git a/go.mod b/go.mod index 4ac7b16..0f5f271 100644 --- a/go.mod +++ b/go.mod @@ -5,10 +5,10 @@ go 1.25.6 require ( github.com/git-pkgs/archives v0.2.0 github.com/git-pkgs/enrichment v0.2.1 - github.com/git-pkgs/purl v0.1.9 - github.com/git-pkgs/registries v0.3.0 - github.com/git-pkgs/spdx v0.1.1 - github.com/git-pkgs/vers v0.2.3 + github.com/git-pkgs/purl v0.1.10 + github.com/git-pkgs/registries v0.4.0 + github.com/git-pkgs/spdx v0.1.2 + github.com/git-pkgs/vers v0.2.4 github.com/git-pkgs/vulns v0.1.3 github.com/go-chi/chi/v5 v5.2.5 github.com/jmoiron/sqlx v1.4.0 @@ -276,7 +276,7 @@ require ( golang.org/x/exp/typeparams v0.0.0-20260209203927-2842357ff358 // indirect golang.org/x/mod v0.33.0 // indirect golang.org/x/net v0.51.0 // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.34.0 // indirect golang.org/x/tools v0.42.0 // indirect diff --git a/go.sum b/go.sum index 5ee370b..63f37e1 100644 --- a/go.sum +++ b/go.sum @@ -230,14 +230,14 @@ github.com/git-pkgs/enrichment v0.2.1 h1:mJJt4YQBzl9aOfu4226ylnC9H6YO9YZDjGpbSPV github.com/git-pkgs/enrichment v0.2.1/go.mod h1:q9eDZpRrUbYwzD4Mtg/T6LRdBMlt2DYRIvVRDULFnKg= github.com/git-pkgs/packageurl-go v0.3.1 h1:WM3RBABQZLaRBxgKyYughc3cVBE8KyQxbSC6Jt5ak7M= github.com/git-pkgs/packageurl-go v0.3.1/go.mod h1:rcIxiG37BlQLB6FZfgdj9Fm7yjhRQd3l+5o7J0QPAk4= -github.com/git-pkgs/purl v0.1.9 h1:zSHKBVwRTJiMGwiYIiHgoIUfJTdtC7kVQ0+0RHckwxc= -github.com/git-pkgs/purl v0.1.9/go.mod h1:6YX25yhztts1Byktw4pOlykru57GOJaanA+WmOBFtdU= -github.com/git-pkgs/registries v0.3.0 h1:eIM78ry7l1CfwbPMXQ/vCsN9xJNWN1uDmkl76MS+OT8= -github.com/git-pkgs/registries v0.3.0/go.mod h1:RAqG9XyGLV56F8tBXXyzmEaHTBkub7MWFD9KGjt4WtQ= -github.com/git-pkgs/spdx v0.1.1 h1:jjchxLhvTnTR7fLcdXdNVDh/tLq6B2S6LnaKEzBjhRQ= -github.com/git-pkgs/spdx v0.1.1/go.mod h1:nbZdJ09OuZg9/bgRnnyEM5F5uR8K7Iwf5oDHQvK3WcE= -github.com/git-pkgs/vers v0.2.3 h1:elyuJZ2mBRIncRUF6SjpnwIwSuRRnPdAEJBZcVgU450= -github.com/git-pkgs/vers v0.2.3/go.mod h1:biTbSQK1qdbrsxDEKnqe3Jzclxz8vW6uDcwKjfUGcOo= +github.com/git-pkgs/purl v0.1.10 h1:NMjeF10nzFn3tdQlz6rbmHB+i+YkyrFQxho3e33ePTQ= +github.com/git-pkgs/purl v0.1.10/go.mod h1:C5Vp/kyZ/wGckCLexx4wPVfUxEiToRkdsOPh5Z7ig/I= +github.com/git-pkgs/registries v0.4.0 h1:GO7fQ8/jot0ulSQHBdxLSNSX/p8eB3gEXWO+98fmoEo= +github.com/git-pkgs/registries v0.4.0/go.mod h1:49UCPFWQmwNV7rBEr9TrTDWKR7vYxFcxp3VfdkeFbdE= +github.com/git-pkgs/spdx v0.1.2 h1:wHSK+CqFsO5N7yDTPvxDmer5LgNEa7vAsiZhi5Aci0A= +github.com/git-pkgs/spdx v0.1.2/go.mod h1:V98MgZapNgYw54/pdGR82d7RU93qzJoybahbpZqTfw8= +github.com/git-pkgs/vers v0.2.4 h1:Zr3jR/Xf1i/6cvBaJKPxhCwjzqz7uvYHE0Fhid/GPBk= +github.com/git-pkgs/vers v0.2.4/go.mod h1:biTbSQK1qdbrsxDEKnqe3Jzclxz8vW6uDcwKjfUGcOo= github.com/git-pkgs/vulns v0.1.3 h1:Q9GixxhAYpP5vVDetKNMACHxGnWwB8aE5c9kbE8xxqU= github.com/git-pkgs/vulns v0.1.3/go.mod h1:/PVy7S1oZNVF9X8yVOZ9SX5MFpyVWCtLnIX0kAfPjY0= github.com/github/go-spdx/v2 v2.4.0 h1:+4IwVwJJbm3rzvrQ6P1nI9BDMcy3la4RchRy5uehV/M= @@ -738,8 +738,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/handler/cargo.go b/internal/handler/cargo.go index b5a3fb0..9602fe6 100644 --- a/internal/handler/cargo.go +++ b/internal/handler/cargo.go @@ -1,11 +1,15 @@ package handler import ( + "bufio" "encoding/json" "fmt" "io" "net/http" "strings" + "time" + + "github.com/git-pkgs/purl" ) const ( @@ -60,7 +64,7 @@ func (h *CargoHandler) Routes() http.Handler { // CargoConfig is the registry configuration returned by config.json. type CargoConfig struct { - DL string `json:"dl"` + DL string `json:"dl"` API string `json:"api,omitempty"` } @@ -120,8 +124,68 @@ func (h *CargoHandler) handleIndex(w http.ResponseWriter, r *http.Request) { w.Header().Set("Last-Modified", lastMod) } - w.WriteHeader(http.StatusOK) - _, _ = io.Copy(w, resp.Body) + h.applyCooldownFiltering(w, resp.Body) + +} + +type crateIndexEntry struct { + Name string `json:"name"` + Version string `json:"vers"` + PublishTime string `json:"pubtime,omitempty"` +} + +func (h *CargoHandler) applyCooldownFiltering(downstreamResponse io.Writer, upstreamBody io.Reader) { + if h.proxy.Cooldown == nil || !h.proxy.Cooldown.Enabled() { + // not using cooldowns, just copy the upstream to the downstream + _, _ = io.Copy(downstreamResponse, upstreamBody) + return + } + + // create a scanner on the body of the http response + requestScanner := bufio.NewScanner(upstreamBody) + + // the response is newline-delimited JSON, loop through each line + for requestScanner.Scan() { + line := requestScanner.Text() + + // decode the line + var crate crateIndexEntry + err := json.Unmarshal([]byte(line), &crate) + + if err != nil { + // if there is an error parsing this line then exclude it and move to the next entry + h.proxy.Logger.Error("failed to parse json entry in index", "error", err) + continue + } + + // parse publish time + publishedAt, err := time.Parse(time.RFC3339, crate.PublishTime) + + if crate.PublishTime == "" || err != nil { + // publish time is empty/missing/invalid, presumably was published before pubtime was added as a field + // write line to response + _, _ = downstreamResponse.Write([]byte(line + "\n")) + continue + } + + // make PURL + cratePURL := purl.MakePURLString("cargo", crate.Name, "") + + if !h.proxy.Cooldown.IsAllowed("cargo", cratePURL, publishedAt) { + // crate is not allowed, move to next crate + h.proxy.Logger.Info("cooldown: filtering cargo version", + "crate", crate.Name, "version", crate.Version, + "published", crate.PublishTime) + continue + } + + // crate passes, write to response + _, _ = downstreamResponse.Write([]byte(line + "\n")) + } + + if err := requestScanner.Err(); err != nil { + h.proxy.Logger.Error("error reading index response", "error", err) + } } // buildIndexPath builds the sparse index path for a crate name. diff --git a/internal/handler/cargo_test.go b/internal/handler/cargo_test.go index 3fe0307..5e7f2e4 100644 --- a/internal/handler/cargo_test.go +++ b/internal/handler/cargo_test.go @@ -1,11 +1,16 @@ package handler import ( + "bytes" "encoding/json" "log/slog" "net/http" "net/http/httptest" + "strings" "testing" + "time" + + "github.com/git-pkgs/proxy/internal/cooldown" ) func cargoTestProxy() *Proxy { @@ -28,8 +33,8 @@ func TestCargoBuildIndexPath(t *testing.T) { {"abcd", "ab/cd/abcd"}, {"serde", "se/rd/serde"}, {"tokio", "to/ki/tokio"}, - {"A", "1/a"}, // lowercase - {"SERDE", "se/rd/serde"}, // lowercase + {"A", "1/a"}, // lowercase + {"SERDE", "se/rd/serde"}, // lowercase {"rand_core", "ra/nd/rand_core"}, } @@ -146,3 +151,57 @@ func TestCargoRoutes(t *testing.T) { t.Errorf("config.json status = %d, want %d", w.Code, http.StatusOK) } } + +type filterTestCase struct { + line string + expected bool +} + +func TestCargoCooldown(t *testing.T) { + now := time.Now() + + createCase := func(name string, version string, age time.Duration, expected bool) filterTestCase { + return filterTestCase{line: `{"name":"` + name + `","vers":"` + version + `","cksum":"abcd","features":{},"yanked":false,"pubtime":"` + now.Add(-1*age).Format(time.RFC3339) + `"}`, expected: expected} + } + + testCases := []filterTestCase{ + // one week ago + createCase("serde", "1.0.0", 168*time.Hour, true), + // one hour ago + createCase("serde", "1.0.1", 1*time.Hour, false), + // two hours ago with custom filter (1h) + createCase("tokio", "1.0.0", 2*time.Hour, true), + // one hour ago with custom filter (1h) + createCase("tokio", "1.0.0", 1*time.Minute, false), + } + + var testInput strings.Builder + var expectedOutput strings.Builder + + for _, testCase := range testCases { + testInput.WriteString(testCase.line + "\n") + if testCase.expected { + expectedOutput.WriteString(testCase.line + "\n") + } + } + + proxy := testProxy() + proxy.Cooldown = &cooldown.Config{ + Default: "3d", + Packages: map[string]string{"pkg:cargo/tokio": "1h"}, + } + + h := &CargoHandler{ + proxy: proxy, + proxyURL: "http://localhost:8080", + } + + var outputBuffer bytes.Buffer + h.applyCooldownFiltering(&outputBuffer, strings.NewReader(testInput.String())) + output := outputBuffer.String() + + if output != expectedOutput.String() { + t.Errorf("output = %q, want %q", output, expectedOutput.String()) + } + +} diff --git a/internal/handler/container.go b/internal/handler/container.go index fc5f98c..8aa82eb 100644 --- a/internal/handler/container.go +++ b/internal/handler/container.go @@ -103,20 +103,22 @@ func (h *ContainerHandler) handleBlobDownload(w http.ResponseWriter, r *http.Req return } - // Try to get from cache first + // Try to get from cache, or fetch from upstream with auth filename := digest - result, err := h.proxy.GetOrFetchArtifactFromURL( + headers := http.Header{"Authorization": {"Bearer " + token}} + result, err := h.proxy.GetOrFetchArtifactFromURLWithHeaders( r.Context(), "oci", name, digest, // use digest as version filename, fmt.Sprintf("%s/v2/%s/blobs/%s", h.registryURL, name, digest), + headers, ) if err != nil { - // Fetch directly with auth - h.proxyBlobWithAuth(w, r, name, digest, token) + h.proxy.Logger.Error("failed to fetch blob", "error", err) + h.containerError(w, http.StatusBadGateway, "BLOB_UNKNOWN", "failed to fetch blob") return } @@ -304,34 +306,6 @@ func (h *ContainerHandler) proxyBlobHead(w http.ResponseWriter, r *http.Request, w.WriteHeader(resp.StatusCode) } -// proxyBlobWithAuth proxies a blob download with authentication. -func (h *ContainerHandler) proxyBlobWithAuth(w http.ResponseWriter, r *http.Request, name, digest, token string) { - upstreamURL := fmt.Sprintf("%s/v2/%s/blobs/%s", h.registryURL, name, digest) - - req, err := http.NewRequestWithContext(r.Context(), http.MethodGet, upstreamURL, nil) - if err != nil { - h.containerError(w, http.StatusInternalServerError, "INTERNAL_ERROR", "failed to create request") - return - } - - req.Header.Set("Authorization", "Bearer "+token) - - resp, err := h.proxy.HTTPClient.Do(req) - if err != nil { - h.containerError(w, http.StatusBadGateway, "INTERNAL_ERROR", "failed to fetch from upstream") - return - } - defer func() { _ = resp.Body.Close() }() - - for _, header := range []string{"Content-Type", "Content-Length", "Docker-Content-Digest"} { - if v := resp.Header.Get(header); v != "" { - w.Header().Set(header, v) - } - } - - w.WriteHeader(resp.StatusCode) - _, _ = io.Copy(w, resp.Body) -} // containerError writes an OCI-compliant error response. func (h *ContainerHandler) containerError(w http.ResponseWriter, status int, code, message string) { diff --git a/internal/handler/container_test.go b/internal/handler/container_test.go index b84adfd..b34a250 100644 --- a/internal/handler/container_test.go +++ b/internal/handler/container_test.go @@ -1,9 +1,17 @@ package handler import ( + "bytes" + "context" + "encoding/json" + "io" + "log/slog" "net/http" "net/http/httptest" "testing" + + "github.com/git-pkgs/proxy/internal/database" + "github.com/git-pkgs/registries/fetch" ) func TestContainerHandler_parseBlobPath(t *testing.T) { @@ -127,6 +135,92 @@ func TestContainerHandler_parseTagsListPath(t *testing.T) { } } +func TestContainerHandler_BlobDownload_CachesWithAuth(t *testing.T) { + // Set up a mock auth server that returns a token + authServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"token": "test-token-123"}) + })) + defer authServer.Close() + + // Set up mock fetcher that captures headers + var capturedHeaders http.Header + mf := &mockFetcherWithHeaders{ + fetchFn: func(_ context.Context, _ string, headers http.Header) (*fetch.Artifact, error) { + capturedHeaders = headers + return &fetch.Artifact{ + Body: io.NopCloser(bytes.NewReader([]byte("blob-content"))), + Size: 12, + ContentType: "application/octet-stream", + }, nil + }, + } + + dir := t.TempDir() + db, err := database.Create(dir + "/test.db") + if err != nil { + t.Fatalf("failed to create test database: %v", err) + } + t.Cleanup(func() { _ = db.Close() }) + + store := newMockStorage() + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + proxy := &Proxy{ + DB: db, + Storage: store, + Fetcher: mf, + Logger: logger, + HTTPClient: &http.Client{}, + } + + h := &ContainerHandler{ + proxy: proxy, + registryURL: "https://registry-1.docker.io", + authURL: authServer.URL, + proxyURL: "http://localhost:8080", + } + + handler := h.Routes() + req := httptest.NewRequest(http.MethodGet, "/library/nginx/blobs/sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Errorf("got status %d, want %d; body: %s", w.Code, http.StatusOK, w.Body.String()) + } + + // Verify auth header was passed to the fetcher + if capturedHeaders == nil { + t.Fatal("expected headers to be passed to fetcher, got nil") + } + auth := capturedHeaders.Get("Authorization") + if auth != "Bearer test-token-123" { + t.Errorf("Authorization = %q, want %q", auth, "Bearer test-token-123") + } + + // Verify response headers + if got := w.Header().Get("Docker-Content-Digest"); got != "sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd" { + t.Errorf("Docker-Content-Digest = %q, want digest", got) + } +} + +// mockFetcherWithHeaders captures headers passed to FetchWithHeaders. +type mockFetcherWithHeaders struct { + fetchFn func(ctx context.Context, url string, headers http.Header) (*fetch.Artifact, error) +} + +func (f *mockFetcherWithHeaders) Fetch(ctx context.Context, url string) (*fetch.Artifact, error) { + return f.FetchWithHeaders(ctx, url, nil) +} + +func (f *mockFetcherWithHeaders) FetchWithHeaders(ctx context.Context, url string, headers http.Header) (*fetch.Artifact, error) { + return f.fetchFn(ctx, url, headers) +} + +func (f *mockFetcherWithHeaders) Head(_ context.Context, _ string) (int64, string, error) { + return 0, "", nil +} + func TestContainerHandler_Routes_VersionCheck(t *testing.T) { h := NewContainerHandler(nil, "http://localhost:8080") diff --git a/internal/handler/download_test.go b/internal/handler/download_test.go index a6e0cb3..d560b82 100644 --- a/internal/handler/download_test.go +++ b/internal/handler/download_test.go @@ -522,6 +522,11 @@ func TestCondaHandler_CacheMiss(t *testing.T) { if !fetcher.fetchCalled { t.Error("expected fetcher to be called on cache miss") } + + want := upstream.URL + "/conda-forge/linux-64/pandas-2.0.0-py311h320fe9a_0.conda" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } } func TestCRANHandler_SourceDownloadCacheHit(t *testing.T) { @@ -615,7 +620,7 @@ func TestCRANHandler_CacheMiss(t *testing.T) { } h := NewCRANHandler(proxy, "http://localhost") - h.upstreamURL = "http://should-not-be-reached" + h.upstreamURL = "https://cran.r-project.org" srv := httptest.NewServer(h.Routes()) defer srv.Close() @@ -629,6 +634,40 @@ func TestCRANHandler_CacheMiss(t *testing.T) { if !fetcher.fetchCalled { t.Error("expected fetcher to be called on cache miss") } + + want := "https://cran.r-project.org/src/contrib/tidyr_1.3.0.tar.gz" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestCRANHandler_BinaryDownloadCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("fetched binary")), + ContentType: "application/zip", + } + + h := NewCRANHandler(proxy, "http://localhost") + h.upstreamURL = "https://cran.r-project.org" + + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/bin/windows/contrib/4.3/dplyr_1.1.0.zip") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "https://cran.r-project.org/bin/windows/contrib/4.3/dplyr_1.1.0.zip" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } } func TestMavenHandler_DownloadCacheHit(t *testing.T) { @@ -764,4 +803,144 @@ func TestMavenHandler_CacheMiss(t *testing.T) { if !fetcher.fetchCalled { t.Error("expected fetcher to be called on cache miss") } + + want := "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestNuGetHandler_DownloadCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("fetched nupkg")), + ContentType: "application/octet-stream", + } + + h := NewNuGetHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/v3-flatcontainer/newtonsoft.json/13.0.3/newtonsoft.json.13.0.3.nupkg") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "https://api.nuget.org/v3-flatcontainer/newtonsoft.json/13.0.3/newtonsoft.json.13.0.3.nupkg" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestConanHandler_RecipeFileCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("conan export")), + ContentType: "application/octet-stream", + } + + h := NewConanHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/v2/files/zlib/1.3/_/_/abc123/recipe/conan_export.tgz") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "https://center.conan.io/v2/files/zlib/1.3/_/_/abc123/recipe/conan_export.tgz" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestConanHandler_PackageFileCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("conan package")), + ContentType: "application/octet-stream", + } + + h := NewConanHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/v2/files/zlib/1.3/_/_/abc123/package/def456/ghi789/conan_package.tgz") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "https://center.conan.io/v2/files/zlib/1.3/_/_/abc123/package/def456/ghi789/conan_package.tgz" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestDebianHandler_DownloadCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("fetched deb")), + ContentType: "application/vnd.debian.binary-package", + } + + h := NewDebianHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/pool/main/n/nginx/nginx_1.18.0-6_amd64.deb") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "http://deb.debian.org/debian/pool/main/n/nginx/nginx_1.18.0-6_amd64.deb" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestRPMHandler_DownloadCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("fetched rpm")), + ContentType: "application/x-rpm", + } + + h := NewRPMHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/releases/39/Everything/x86_64/os/Packages/n/nginx-1.24.0-1.fc39.x86_64.rpm") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } + + want := "https://dl.fedoraproject.org/pub/fedora/linux/releases/39/Everything/x86_64/os/Packages/n/nginx-1.24.0-1.fc39.x86_64.rpm" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } } diff --git a/internal/handler/handler.go b/internal/handler/handler.go index 91d8960..109eacd 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -380,6 +380,13 @@ func JSONError(w http.ResponseWriter, status int, message string) { // GetOrFetchArtifactFromURL retrieves an artifact from cache or fetches from a specific URL. // This is useful for registries where download URLs are determined from metadata. func (p *Proxy) GetOrFetchArtifactFromURL(ctx context.Context, ecosystem, name, version, filename, downloadURL string) (*CacheResult, error) { + return p.GetOrFetchArtifactFromURLWithHeaders(ctx, ecosystem, name, version, filename, downloadURL, nil) +} + +// GetOrFetchArtifactFromURLWithHeaders retrieves an artifact from cache or fetches from a URL +// with additional HTTP headers. This is needed for registries that require authentication +// (e.g. Docker Hub requires a Bearer token even for public images). +func (p *Proxy) GetOrFetchArtifactFromURLWithHeaders(ctx context.Context, ecosystem, name, version, filename, downloadURL string, headers http.Header) (*CacheResult, error) { pkgPURL := purl.MakePURLString(ecosystem, name, "") versionPURL := purl.MakePURLString(ecosystem, name, version) @@ -389,14 +396,14 @@ func (p *Proxy) GetOrFetchArtifactFromURL(ctx context.Context, ecosystem, name, return cached, nil } - return p.fetchAndCacheFromURL(ctx, ecosystem, name, version, filename, pkgPURL, versionPURL, downloadURL) + return p.fetchAndCacheFromURL(ctx, ecosystem, name, version, filename, pkgPURL, versionPURL, downloadURL, headers) } -func (p *Proxy) fetchAndCacheFromURL(ctx context.Context, ecosystem, name, version, filename, pkgPURL, versionPURL, downloadURL string) (*CacheResult, error) { +func (p *Proxy) fetchAndCacheFromURL(ctx context.Context, ecosystem, name, version, filename, pkgPURL, versionPURL, downloadURL string, headers http.Header) (*CacheResult, error) { p.Logger.Info("fetching from upstream", "ecosystem", ecosystem, "name", name, "version", version, "url", downloadURL) - artifact, err := p.Fetcher.Fetch(ctx, downloadURL) + artifact, err := p.Fetcher.FetchWithHeaders(ctx, downloadURL, headers) if err != nil { return nil, fmt.Errorf("fetching from upstream: %w", err) } diff --git a/internal/handler/handler_test.go b/internal/handler/handler_test.go index dd85a17..4c71319 100644 --- a/internal/handler/handler_test.go +++ b/internal/handler/handler_test.go @@ -79,6 +79,10 @@ func (s *mockStorage) UsedSpace(_ context.Context) (int64, error) { return total, nil } +func (s *mockStorage) URL() string { return "mem://" } + +func (s *mockStorage) Close() error { return nil } + // mockFetcher implements fetch.FetcherInterface for testing. type mockFetcher struct { artifact *fetch.Artifact @@ -87,7 +91,11 @@ type mockFetcher struct { fetchedURL string } -func (f *mockFetcher) Fetch(_ context.Context, url string) (*fetch.Artifact, error) { +func (f *mockFetcher) Fetch(ctx context.Context, url string) (*fetch.Artifact, error) { + return f.FetchWithHeaders(ctx, url, nil) +} + +func (f *mockFetcher) FetchWithHeaders(_ context.Context, url string, _ http.Header) (*fetch.Artifact, error) { f.fetchCalled = true f.fetchedURL = url if f.fetchErr != nil { diff --git a/internal/handler/pypi.go b/internal/handler/pypi.go index 51b2871..aac33a7 100644 --- a/internal/handler/pypi.go +++ b/internal/handler/pypi.go @@ -451,8 +451,10 @@ func (h *PyPIHandler) handleDownload(w http.ResponseWriter, r *http.Request) { h.proxy.Logger.Info("pypi download request", "name", name, "version", version, "filename", filename) - // Construct upstream URL - upstreamURL := fmt.Sprintf("https://files.pythonhosted.org/packages/%s", path) + // Construct upstream URL; the incoming path starts with + // '/packages' so there is no need to include it in the format + // string + upstreamURL := fmt.Sprintf("https://files.pythonhosted.org/%s", path) result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "pypi", name, version, filename, upstreamURL) if err != nil { diff --git a/internal/handler/pypi_test.go b/internal/handler/pypi_test.go index 18d2637..9e2ade0 100644 --- a/internal/handler/pypi_test.go +++ b/internal/handler/pypi_test.go @@ -2,11 +2,16 @@ package handler import ( "encoding/json" + "io" "log/slog" + "net/http" + "net/http/httptest" + "strings" "testing" "time" "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/registries/fetch" ) func TestPyPIParseFilename(t *testing.T) { @@ -111,3 +116,79 @@ func TestIsPythonTag(t *testing.T) { } } } + +func TestPyPIHandler_DownloadUpstreamURL(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("wheel data")), + ContentType: "application/octet-stream", + } + + h := NewPyPIHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + // The path wildcard {path...} captures everything after /packages/, + // which includes "packages/" from the rewritten URL. The upstream URL + // must not double the "packages" segment. + resp, err := http.Get(srv.URL + "/packages/packages/ab/cd/ef0123456789/requests-2.31.0-py3-none-any.whl") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Fatal("expected fetcher to be called on cache miss") + } + + want := "https://files.pythonhosted.org/packages/ab/cd/ef0123456789/requests-2.31.0-py3-none-any.whl" + if fetcher.fetchedURL != want { + t.Errorf("upstream URL = %q, want %q", fetcher.fetchedURL, want) + } +} + +func TestPyPIHandler_DownloadCacheHit(t *testing.T) { + proxy, db, store, _ := setupTestProxy(t) + seedPackage(t, db, store, "pypi", "requests", "2.31.0", + "requests-2.31.0-py3-none-any.whl", "wheel binary data") + + h := NewPyPIHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/packages/packages/ab/cd/ef0123456789/requests-2.31.0-py3-none-any.whl") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + body, _ := io.ReadAll(resp.Body) + if string(body) != "wheel binary data" { + t.Errorf("body = %q, want %q", body, "wheel binary data") + } +} + +func TestPyPIHandler_DownloadCacheMiss(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("fetched wheel")), + ContentType: "application/octet-stream", + } + + h := NewPyPIHandler(proxy, "http://localhost") + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/packages/packages/ab/cd/ef0123456789/newpkg-1.0.0.tar.gz") + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer func() { _ = resp.Body.Close() }() + + if !fetcher.fetchCalled { + t.Error("expected fetcher to be called on cache miss") + } +} diff --git a/internal/server/server.go b/internal/server/server.go index 8e6b588..bc3187b 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -112,9 +112,19 @@ func New(cfg *config.Config, logger *slog.Logger) (*Server, error) { return nil, fmt.Errorf("initializing storage: %w", err) } + // Verify storage is accessible (catches bad S3 credentials/endpoints early). + // Exists returns (false, nil) for a missing key, so only real connectivity + // or permission errors surface here. + if _, err := store.Exists(context.Background(), ".health-check"); err != nil { + _ = store.Close() + _ = db.Close() + return nil, fmt.Errorf("verifying storage connectivity: %w", err) + } + // Load templates templates, err := NewTemplates() if err != nil { + _ = store.Close() _ = db.Close() return nil, fmt.Errorf("loading templates: %w", err) } @@ -244,7 +254,7 @@ func (s *Server) Start() error { s.logger.Info("starting server", "listen", s.cfg.Listen, "base_url", s.cfg.BaseURL, - "storage", s.cfg.Storage.Path, //nolint:staticcheck // backwards compat + "storage", s.storage.URL(), "database", s.cfg.Database.Path) // Start background goroutine to update cache stats metrics @@ -287,6 +297,12 @@ func (s *Server) Shutdown(ctx context.Context) error { } } + if s.storage != nil { + if err := s.storage.Close(); err != nil { + errs = append(errs, fmt.Errorf("storage close: %w", err)) + } + } + if s.db != nil { if err := s.db.Close(); err != nil { errs = append(errs, fmt.Errorf("database close: %w", err)) @@ -707,7 +723,7 @@ type StatsResponse struct { CachedArtifacts int64 `json:"cached_artifacts"` TotalSize int64 `json:"total_size_bytes"` TotalSizeHuman string `json:"total_size"` - StoragePath string `json:"storage_path"` + StorageURL string `json:"storage_url"` DatabasePath string `json:"database_path"` } @@ -739,7 +755,7 @@ func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) { CachedArtifacts: count, TotalSize: size, TotalSizeHuman: formatSize(size), - StoragePath: s.cfg.Storage.Path, //nolint:staticcheck // backwards compat + StorageURL: s.storage.URL(), DatabasePath: s.cfg.Database.Path, } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 69f36e8..7e56f2c 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -223,6 +223,10 @@ func TestStatsEndpoint(t *testing.T) { if stats.CachedArtifacts != 0 { t.Errorf("expected 0 cached artifacts, got %d", stats.CachedArtifacts) } + + if !strings.HasPrefix(stats.StorageURL, "file://") { + t.Errorf("expected storage_url to start with file://, got %q", stats.StorageURL) + } } func TestDashboard(t *testing.T) { @@ -867,3 +871,56 @@ func TestHandlePackagesListPage(t *testing.T) { t.Error("expected packages list to contain seeded package") } } + +func TestNewServer_StorageConnectivityCheck(t *testing.T) { + tempDir := t.TempDir() + dbPath := filepath.Join(tempDir, "test.db") + storagePath := filepath.Join(tempDir, "artifacts") + + cfg := &config.Config{ + Listen: ":0", + BaseURL: "http://localhost:8080", + Storage: config.StorageConfig{URL: "file://" + storagePath}, + Database: config.DatabaseConfig{Path: dbPath}, + } + + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + srv, err := New(cfg, logger) + if err != nil { + t.Fatalf("New() failed: %v", err) + } + + // On Windows, OpenBucket normalises to file:///C:/path; on Unix the + // absolute path already starts with /, so file:// + /path == file:///path. + wantPrefix := "file://" + wantSuffix := filepath.ToSlash(storagePath) + got := srv.storage.URL() + if !strings.HasPrefix(got, wantPrefix) || !strings.HasSuffix(got, wantSuffix) { + t.Errorf("expected storage URL ending with %s, got %s", wantSuffix, got) + } + + _ = srv.db.Close() +} + +func TestStatsEndpoint_StorageURL(t *testing.T) { + ts := newTestServer(t) + defer ts.close() + + req := httptest.NewRequest("GET", "/stats", nil) + w := httptest.NewRecorder() + ts.handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected status 200, got %d", w.Code) + } + + // Verify the JSON response uses storage_url (not storage_path) + body := w.Body.String() + if !strings.Contains(body, `"storage_url"`) { + t.Errorf("expected JSON key storage_url in response, got: %s", body) + } + if strings.Contains(body, `"storage_path"`) { + t.Errorf("unexpected JSON key storage_path in response (should be storage_url)") + } +} diff --git a/internal/storage/filesystem.go b/internal/storage/filesystem.go index 8dec48b..cf6a1fe 100644 --- a/internal/storage/filesystem.go +++ b/internal/storage/filesystem.go @@ -172,3 +172,11 @@ func (fs *Filesystem) Root() string { func (fs *Filesystem) FullPath(path string) string { return fs.fullPath(path) } + +func (fs *Filesystem) URL() string { + return "file://" + filepath.ToSlash(fs.root) +} + +func (fs *Filesystem) Close() error { + return nil +} diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 93053ca..8a9026c 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -47,6 +47,12 @@ type Storage interface { // UsedSpace returns the total bytes used by all stored content. UsedSpace(ctx context.Context) (int64, error) + + // URL returns the storage backend URL (e.g. "file:///path" or "s3://bucket"). + URL() string + + // Close releases any resources held by the storage backend. + Close() error } // ArtifactPath builds a storage path for an artifact.