diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e0d3cfb..82ee83c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -38,7 +38,7 @@ jobs: images: ghcr.io/${{ github.repository }} - name: Build and push Docker image - uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf with: context: . push: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cc79d4a..c23de56 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: fetch-depth: 0 persist-credentials: false - - uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1 + - uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2 - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 @@ -27,7 +27,7 @@ jobs: go-version-file: go.mod cache: false - - uses: goreleaser/goreleaser-action@e24998b8b67b290c2fa8b7c14fcfa7de2c5c9b8c # v7.1.0 + - uses: goreleaser/goreleaser-action@5daf1e915a5f0af01ddbcd89a43b8061ff4f1a89 # v7.2.2 with: version: "~> v2" args: release --clean diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml index 4acd19b..f621254 100644 --- a/.github/workflows/zizmor.yml +++ b/.github/workflows/zizmor.yml @@ -26,4 +26,4 @@ jobs: persist-credentials: false - name: Run zizmor - uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3 + uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..9d4b957 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,28 @@ +version: "2" + +linters: + enable: + - gocritic + - gocognit + - gocyclo + - maintidx + - dupl + - mnd + - unparam + - ireturn + - goconst + - errcheck + settings: + goconst: + min-len: 4 + min-occurrences: 5 + ignore-tests: true + ignore-string-values: + - "^[a-z]+$" + exclusions: + rules: + - path: _test\.go + linters: + - goconst + - dupl + - mnd diff --git a/Dockerfile b/Dockerfile index 71a9fcc..5124b1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.25-alpine AS builder +FROM golang:1.26.3-alpine AS builder WORKDIR /src @@ -15,7 +15,7 @@ COPY . . # Build the binary RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /proxy ./cmd/proxy -FROM alpine:3.21 +FROM alpine:3.23.4 RUN apk add --no-cache ca-certificates diff --git a/README.md b/README.md index 22beaaf..abf3e11 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Resolution order: package override, then ecosystem override, then global default | Conan | C/C++ | | ✓ | | Conda | Python/R | Yes | ✓ | | CRAN | R | | ✓ | +| Julia | Julia | | ✓ | | Container | Docker/OCI | | ✓ | | Debian | Debian/Ubuntu | | ✓ | | RPM | RHEL/Fedora | | ✓ | @@ -209,6 +210,18 @@ Add to your `~/.m2/settings.xml`: ``` +The `/maven/` endpoint uses Maven Central as primary upstream and falls back to the Gradle Plugin Portal for Gradle plugin marker metadata and related artifacts when the primary upstream returns not found. + +For Gradle plugin resolution via the same proxy endpoint: + +```kotlin +pluginManagement { + repositories { + maven(url = "http://localhost:8080/maven/") + } +} +``` + ### Gradle HTTP Build Cache Configure in `settings.gradle(.kts)`: @@ -312,6 +325,21 @@ local({ }) ``` +### Julia + +Set the Pkg server before starting Julia: + +```bash +export JULIA_PKG_SERVER=http://localhost:8080/julia +``` + +Or inside a running session: + +```julia +ENV["JULIA_PKG_SERVER"] = "http://localhost:8080/julia" +using Pkg; Pkg.update() +``` + ### Docker / Container Registry Configure Docker to use the proxy as a registry mirror in `/etc/docker/daemon.json`: @@ -370,6 +398,7 @@ sudo dnf update ## Configuration The proxy can be configured via: + 1. Command line flags (highest priority) 2. Environment variables 3. Configuration file (YAML or JSON) @@ -577,7 +606,7 @@ Recently cached: | Endpoint | Description | |----------|-------------| | `GET /` | Dashboard (web UI) | -| `GET /health` | Health check (returns "ok" if healthy) | +| `GET /health` | Health check (JSON; HTTP 200 healthy, 503 unhealthy) | | `GET /stats` | Cache statistics (JSON) | | `GET /metrics` | Prometheus metrics | | `GET /npm/*` | npm registry protocol | @@ -593,6 +622,7 @@ Recently cached: | `GET /conan/*` | Conan C/C++ protocol | | `GET /conda/*` | Conda/Anaconda protocol | | `GET /cran/*` | CRAN (R) protocol | +| `GET /julia/*` | Julia Pkg server protocol | | `GET /v2/*` | OCI/Docker registry protocol | | `GET /debian/*` | Debian/APT repository protocol | | `GET /rpm/*` | RPM/Yum repository protocol | @@ -815,9 +845,28 @@ The proxy exposes Prometheus metrics at `GET /metrics`. All metric names are pre | `proxy_storage_operation_duration_seconds` | histogram | `operation` | Storage read/write latency | | `proxy_storage_errors_total` | counter | `operation` | Storage read/write failures | | `proxy_active_requests` | gauge | | In-flight requests | +| `proxy_health_probe_failures_total` | counter | `step` | Storage health probe failures by failing step (`write`, `size`, `read`, `verify`, `delete`). | Cache size and artifact count are refreshed every 60 seconds. The remaining metrics update on each request. +### Health Check + +`/health` returns a structured JSON report of subsystem health. HTTP 200 if all checks pass; 503 if any fail. + +```json +{ + "status": "ok", + "checks": { + "database": {"status": "ok"}, + "storage": {"status": "ok"} + } +} +``` + +Failing checks include an `"error"` field. Storage failures also include a `"step"` field identifying which probe step failed (`write`, `size`, `read`, `verify`, `delete`). When the database check fails, the storage entry reports `{"status": "skipped"}` so the response always carries the same key set. + +Storage probe results are cached for `health.storage_probe_interval` (default 30s) to bound the cost of probing remote backends. A probe holds an internal mutex for up to 10 seconds (the hardcoded per-probe timeout), so `/health` is intended as a Kubernetes **readiness** probe rather than a liveness probe — a slow S3 round-trip should pull the pod from rotation, not restart it. + Scrape config for Prometheus: ```yaml @@ -941,6 +990,7 @@ The proxy will recreate the database on next start. ## Building from Source Requirements: + - Go 1.25 or later ```bash diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go index 946d12a..15a71c0 100644 --- a/cmd/proxy/main.go +++ b/cmd/proxy/main.go @@ -72,11 +72,14 @@ // PROXY_DATABASE_URL - PostgreSQL connection URL // PROXY_LOG_LEVEL - Log level // PROXY_LOG_FORMAT - Log format +// PROXY_UPSTREAM_MAVEN - Maven repository upstream URL +// PROXY_UPSTREAM_GRADLE_PLUGIN_PORTAL - Gradle Plugin Portal upstream URL // PROXY_GRADLE_BUILD_CACHE_READ_ONLY - Disable Gradle PUT uploads // PROXY_GRADLE_BUILD_CACHE_MAX_UPLOAD_SIZE - Max Gradle PUT request body size // PROXY_GRADLE_BUILD_CACHE_MAX_AGE - Gradle cache max age eviction // PROXY_GRADLE_BUILD_CACHE_MAX_SIZE - Gradle cache max total size // PROXY_GRADLE_BUILD_CACHE_SWEEP_INTERVAL - Gradle cache eviction sweep interval +// PROXY_HEALTH_STORAGE_PROBE_INTERVAL - Storage health probe cache interval (default "30s") // // Example: // @@ -198,11 +201,14 @@ func runServe() { fmt.Fprintf(os.Stderr, " PROXY_DATABASE_URL PostgreSQL connection URL\n") fmt.Fprintf(os.Stderr, " PROXY_LOG_LEVEL Log level\n") fmt.Fprintf(os.Stderr, " PROXY_LOG_FORMAT Log format\n") + fmt.Fprintf(os.Stderr, " PROXY_UPSTREAM_MAVEN Maven repository upstream URL\n") + fmt.Fprintf(os.Stderr, " PROXY_UPSTREAM_GRADLE_PLUGIN_PORTAL Gradle Plugin Portal upstream URL\n") fmt.Fprintf(os.Stderr, " PROXY_GRADLE_BUILD_CACHE_READ_ONLY Disable Gradle PUT uploads\n") fmt.Fprintf(os.Stderr, " PROXY_GRADLE_BUILD_CACHE_MAX_UPLOAD_SIZE Max Gradle PUT request body size\n") fmt.Fprintf(os.Stderr, " PROXY_GRADLE_BUILD_CACHE_MAX_AGE Gradle cache max age eviction\n") fmt.Fprintf(os.Stderr, " PROXY_GRADLE_BUILD_CACHE_MAX_SIZE Gradle cache max total size\n") fmt.Fprintf(os.Stderr, " PROXY_GRADLE_BUILD_CACHE_SWEEP_INTERVAL Gradle cache eviction sweep interval\n") + fmt.Fprintf(os.Stderr, " PROXY_HEALTH_STORAGE_PROBE_INTERVAL Storage health probe cache interval\n") } _ = fs.Parse(os.Args[1:]) @@ -464,6 +470,7 @@ func runMirror() { proxy := handler.NewProxy(db, store, fetcher, resolver, logger) proxy.CacheMetadata = true // mirror always caches metadata proxy.MetadataTTL = cfg.ParseMetadataTTL() + proxy.MetadataMaxSize = cfg.ParseMetadataMaxSize() m := mirror.New(proxy, db, store, logger, *concurrency) diff --git a/config.example.yaml b/config.example.yaml index 4505849..11c751c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -71,6 +71,12 @@ upstream: # npm registry URL npm: "https://registry.npmjs.org" + # Maven repository URL (used by /maven endpoint) + maven: "https://repo1.maven.org/maven2" + + # Gradle Plugin Portal Maven URL (fallback for plugin marker artifacts) + gradle_plugin_portal: "https://plugins.gradle.org/m2" + # Cargo sparse index URL cargo: "https://index.crates.io" @@ -128,6 +134,15 @@ gradle: # How often eviction runs when max_age or max_size is set sweep_interval: "10m" +# Health endpoint configuration. +health: + # Minimum time between storage backend probes. + # The /health endpoint runs a write/read/verify/delete round-trip + # against the configured storage backend and caches the result for + # this interval. Set to "0" to probe on every request. + # Default: "30s". + storage_probe_interval: "30s" + # Version cooldown configuration # Hides package versions published too recently, giving the community time # to spot malicious releases before they're pulled into projects. diff --git a/docs/architecture.md b/docs/architecture.md index 81c41cf..85e5aaf 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -14,7 +14,7 @@ The proxy is a caching HTTP server that sits between package manager clients and │ │ /npm/* -> NPMHandler /health -> healthHandler │ │ │ │ /cargo/* -> CargoHandler /stats -> statsHandler │ │ │ │ /gem/* -> GemHandler /metrics -> prometheus │ │ -│ │ ...16 ecosystems /api/* -> APIHandler │ │ +│ │ ...17 ecosystems /api/* -> APIHandler │ │ │ │ / -> Web UI │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ │ │ │ @@ -277,15 +277,15 @@ HTTP server setup, web UI, and API handlers. - Web UI: dashboard, package browser, source browser, version comparison - Templates are embedded in the binary via `//go:embed` - Enrichment API for package metadata, vulnerability scanning, and outdated detection -- Health, stats, and Prometheus metrics endpoints +- Health, stats, and Prometheus metrics endpoints. `/health` runs an active write → size-check → read → verify → delete probe against the storage backend and returns a structured JSON response (`HealthResponse`) with `"ok"` / `"error"` status per subsystem. Probe results are cached (default 30 s, configurable via `health.storage_probe_interval`) to avoid overwhelming remote backends. ### `internal/metrics` Prometheus metrics for cache performance, upstream latency, storage operations, and active requests. See the Monitoring section of the README for the full metric list. -### `internal/cooldown` +### Cooldown -Version age filtering for supply chain attack mitigation. Configurable at global, ecosystem, and per-package levels. Supported by npm, PyPI, pub.dev, and Composer handlers. +Version age filtering for supply chain attack mitigation, provided by [github.com/git-pkgs/cooldown](https://github.com/git-pkgs/cooldown). Configurable at global, ecosystem, and per-package levels. Supported by npm, PyPI, pub.dev, and Composer handlers. ### `internal/enrichment` diff --git a/docs/configuration.md b/docs/configuration.md index ac85d54..1310bd0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -114,6 +114,8 @@ Override default upstream registry URLs: ```yaml upstream: npm: "https://registry.npmjs.org" + maven: "https://repo1.maven.org/maven2" + gradle_plugin_portal: "https://plugins.gradle.org/m2" cargo: "https://index.crates.io" cargo_download: "https://static.crates.io/crates" ``` @@ -263,6 +265,16 @@ Set to `"0"` to always revalidate with upstream (ETag-based conditional requests When upstream is unreachable and the cached entry is past its TTL, the proxy serves the stale cached copy with a `Warning: 110 - "Response is Stale"` header so clients can tell the data may be outdated. +### Metadata size limit + +Upstream metadata responses are buffered in memory before being rewritten and served. `metadata_max_size` caps that buffer to protect against OOM from a misbehaving upstream. Some npm packages with thousands of versions (for example `renovate`) exceed the 100 MB default, so raise this if you see `metadata response exceeds size limit` in the logs. + +```yaml +metadata_max_size: "100MB" # default +``` + +Or via environment variable: `PROXY_METADATA_MAX_SIZE=250MB`. + ## Mirror API The `/api/mirror` endpoints are disabled by default. Enable them to allow starting mirror jobs via HTTP: diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 34aedbf..23ff54a 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -399,7 +399,7 @@ const docTemplate = `{ "/health": { "get": { "produces": [ - "text/plain" + "application/json" ], "tags": [ "meta" @@ -409,13 +409,13 @@ const docTemplate = `{ "200": { "description": "OK", "schema": { - "type": "string" + "$ref": "#/definitions/server.HealthResponse" } }, "503": { "description": "Service Unavailable", "schema": { - "type": "string" + "$ref": "#/definitions/server.HealthResponse" } } } @@ -515,6 +515,34 @@ const docTemplate = `{ } } }, + "server.HealthCheck": { + "type": "object", + "properties": { + "error": { + "type": "string" + }, + "status": { + "type": "string" + }, + "step": { + "type": "string" + } + } + }, + "server.HealthResponse": { + "type": "object", + "properties": { + "checks": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.HealthCheck" + } + }, + "status": { + "type": "string" + } + } + }, "server.OutdatedPackage": { "type": "object", "properties": { diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index b775e63..c2b4dfc 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -392,7 +392,7 @@ "/health": { "get": { "produces": [ - "text/plain" + "application/json" ], "tags": [ "meta" @@ -402,13 +402,13 @@ "200": { "description": "OK", "schema": { - "type": "string" + "$ref": "#/definitions/server.HealthResponse" } }, "503": { "description": "Service Unavailable", "schema": { - "type": "string" + "$ref": "#/definitions/server.HealthResponse" } } } @@ -508,6 +508,34 @@ } } }, + "server.HealthCheck": { + "type": "object", + "properties": { + "error": { + "type": "string" + }, + "status": { + "type": "string" + }, + "step": { + "type": "string" + } + } + }, + "server.HealthResponse": { + "type": "object", + "properties": { + "checks": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.HealthCheck" + } + }, + "status": { + "type": "string" + } + } + }, "server.OutdatedPackage": { "type": "object", "properties": { diff --git a/go.mod b/go.mod index 87fd2db..199c8c8 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,15 @@ module github.com/git-pkgs/proxy go 1.25.6 require ( - github.com/CycloneDX/cyclonedx-go v0.10.0 + github.com/BurntSushi/toml v1.6.0 + github.com/CycloneDX/cyclonedx-go v0.11.0 github.com/git-pkgs/archives v0.3.0 - github.com/git-pkgs/enrichment v0.2.2 + github.com/git-pkgs/cooldown v0.1.1 + github.com/git-pkgs/enrichment v0.2.3 github.com/git-pkgs/purl v0.1.12 - github.com/git-pkgs/registries v0.5.1 - github.com/git-pkgs/spdx v0.1.3 - github.com/git-pkgs/vers v0.2.5 + github.com/git-pkgs/registries v0.6.1 + github.com/git-pkgs/spdx v0.1.4 + github.com/git-pkgs/vers v0.2.6 github.com/git-pkgs/vulns v0.1.5 github.com/go-chi/chi/v5 v5.2.5 github.com/jmoiron/sqlx v1.4.0 @@ -22,7 +24,7 @@ require ( golang.org/x/sync v0.20.0 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 - modernc.org/sqlite v1.50.0 + modernc.org/sqlite v1.50.1 ) require ( @@ -49,7 +51,6 @@ require ( github.com/Azure/go-autorest v14.2.0+incompatible // indirect github.com/Azure/go-autorest/autorest/to v0.4.1 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect - github.com/BurntSushi/toml v1.6.0 // indirect github.com/Djarvur/go-err113 v0.1.1 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect @@ -128,7 +129,7 @@ require ( github.com/ghostiam/protogetter v0.3.20 // indirect github.com/git-pkgs/packageurl-go v0.3.1 // indirect github.com/git-pkgs/pom v0.1.4 // indirect - github.com/github/go-spdx/v2 v2.6.0 // indirect + github.com/github/go-spdx/v2 v2.7.0 // indirect github.com/go-critic/go-critic v0.14.3 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -309,7 +310,7 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect honnef.co/go/tools v0.7.0 // indirect - modernc.org/libc v1.72.0 // indirect + modernc.org/libc v1.72.3 // indirect modernc.org/mathutil v1.7.1 // indirect modernc.org/memory v1.11.0 // indirect mvdan.cc/gofumpt v0.9.2 // indirect diff --git a/go.sum b/go.sum index 80df597..23c3df7 100644 --- a/go.sum +++ b/go.sum @@ -66,8 +66,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= -github.com/CycloneDX/cyclonedx-go v0.10.0 h1:7xyklU7YD+CUyGzSFIARG18NYLsKVn4QFg04qSsu+7Y= -github.com/CycloneDX/cyclonedx-go v0.10.0/go.mod h1:vUvbCXQsEm48OI6oOlanxstwNByXjCZ2wuleUlwGEO8= +github.com/CycloneDX/cyclonedx-go v0.11.0 h1:GokP8FiRC+foiuwWhSSLpSD5H4hSWtGnR3wo7apkBFI= +github.com/CycloneDX/cyclonedx-go v0.11.0/go.mod h1:vUvbCXQsEm48OI6oOlanxstwNByXjCZ2wuleUlwGEO8= github.com/Djarvur/go-err113 v0.1.1 h1:eHfopDqXRwAi+YmCUas75ZE0+hoBHJ2GQNLYRSxao4g= github.com/Djarvur/go-err113 v0.1.1/go.mod h1:IaWJdYFLg76t2ihfflPZnM1LIQszWOsFDh2hhhAVF6k= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= @@ -252,24 +252,26 @@ github.com/ghostiam/protogetter v0.3.20 h1:oW7OPFit2FxZOpmMRPP9FffU4uUpfeE/rEdE1 github.com/ghostiam/protogetter v0.3.20/go.mod h1:FjIu5Yfs6FT391m+Fjp3fbAYJ6rkL/J6ySpZBfnODuI= github.com/git-pkgs/archives v0.3.0 h1:iXKyO83jEFub1PGEDlHmk2tQ7XeV5LySTc0sEkH3x78= github.com/git-pkgs/archives v0.3.0/go.mod h1:LTJ1iQVFA7otizWMOyiI82NYVmyBWAPRzwu/e30rcXU= -github.com/git-pkgs/enrichment v0.2.2 h1:vaQu5vs3tjQB5JI0gzBrUCynUc9z3l5byPhgKFaNZrc= -github.com/git-pkgs/enrichment v0.2.2/go.mod h1:5JWGmlHWcv5HQHUrctcpnRUNpEF5VAixD2z4zvqKejs= +github.com/git-pkgs/cooldown v0.1.1 h1:9OqqzCB8gANz/y44SmqGD0Jp8Qtu81D1sCbKl6Ehg7w= +github.com/git-pkgs/cooldown v0.1.1/go.mod h1:v7APuK/UouTiu8mWQZbdDmj7DfxxkGUeuhjaRB5gv9E= +github.com/git-pkgs/enrichment v0.2.3 h1:42mqoUhQZNGhlEO671pboI/Cu6F+DoffJoFbVhb2jlw= +github.com/git-pkgs/enrichment v0.2.3/go.mod h1:MBv5nhHzjwLxeSgx2+7waCcpReUjhCD+9B0bvufpMO0= github.com/git-pkgs/packageurl-go v0.3.1 h1:WM3RBABQZLaRBxgKyYughc3cVBE8KyQxbSC6Jt5ak7M= github.com/git-pkgs/packageurl-go v0.3.1/go.mod h1:rcIxiG37BlQLB6FZfgdj9Fm7yjhRQd3l+5o7J0QPAk4= github.com/git-pkgs/pom v0.1.4 h1:C6st+XSbF75eKuwfdkDZZtYHoTcaWRIEQYar5VtszUo= github.com/git-pkgs/pom v0.1.4/go.mod h1:ufdMBe1lKzqOeP9IUb9NPZ458xKV8E8NvuyBMxOfwIk= github.com/git-pkgs/purl v0.1.12 h1:qCskrEU1LWQhCkIVZd992W5++Bsxazvx2Cx1/65qCvU= github.com/git-pkgs/purl v0.1.12/go.mod h1:ofp4mHsR0cUeVONQaf33n6Wxg2QTEvtUdRfCedI8ouA= -github.com/git-pkgs/registries v0.5.1 h1:UPE42CyZAsOfqO3N5bDelu28wS4Ifx/aOj0XZS4qYeI= -github.com/git-pkgs/registries v0.5.1/go.mod h1:BY0YW+V0WDGBMuDR2aSMR3NzOPFK4K+F3j6+ch+cq3M= -github.com/git-pkgs/spdx v0.1.3 h1:YQou23mLfzbW//6JlHUuc5x1P5VNIIDSku5gvauf86I= -github.com/git-pkgs/spdx v0.1.3/go.mod h1:4HGGWyC8tg4DjOhrtBTYl4Lu+5i2BFuauGX8zcVcYPg= -github.com/git-pkgs/vers v0.2.5 h1:tDtUMik9Iw1lyPHdT5V6LXjLo9LsJc0xOawURz7ibQU= -github.com/git-pkgs/vers v0.2.5/go.mod h1:biTbSQK1qdbrsxDEKnqe3Jzclxz8vW6uDcwKjfUGcOo= +github.com/git-pkgs/registries v0.6.1 h1:xZfVZQmffIfdeJthn5o2EozbVJ6gBeImYwKQnfdKUfU= +github.com/git-pkgs/registries v0.6.1/go.mod h1:a3BP/56VW3O/CFRqiJCtSy+OqRrSH25wF1PWHP76ka0= +github.com/git-pkgs/spdx v0.1.4 h1:eQ0waEV3uUeItpWAOvdN1K1rL9hTgsU7fF74r1mDXMs= +github.com/git-pkgs/spdx v0.1.4/go.mod h1:cqRoZcvl530s/W+oGNvwjt4ODN8T1W6D/20MUZEFdto= +github.com/git-pkgs/vers v0.2.6 h1:IelZd7BP/JhzTloUTDY67nehUgoYva3g9viqAMCHJg8= +github.com/git-pkgs/vers v0.2.6/go.mod h1:biTbSQK1qdbrsxDEKnqe3Jzclxz8vW6uDcwKjfUGcOo= github.com/git-pkgs/vulns v0.1.5 h1:mtX88/27toFl+B95kaH5QbAdOCQ3YIDGjJrlrrnqQTE= github.com/git-pkgs/vulns v0.1.5/go.mod h1:bZFikfrR/5gC0ZMwXh7qcEu2gpKfXMBhVsy4kF12Ae0= -github.com/github/go-spdx/v2 v2.6.0 h1:Y/Chr7L8oG85Ilbzl11xkUSQFUfG1kGkLP18LyInvhg= -github.com/github/go-spdx/v2 v2.6.0/go.mod h1:Ftc45YYG1WzpzwEPKRVm9Jv8vDqOrN4gWoCkK+bHer0= +github.com/github/go-spdx/v2 v2.7.0 h1:GzfXx4wFdlilARxmFRXW/mgUy3A4vSqZocCMFV6XFdQ= +github.com/github/go-spdx/v2 v2.7.0/go.mod h1:Ftc45YYG1WzpzwEPKRVm9Jv8vDqOrN4gWoCkK+bHer0= github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= github.com/go-critic/go-critic v0.14.3 h1:5R1qH2iFeo4I/RJU8vTezdqs08Egi4u5p6vOESA0pog= @@ -882,10 +884,10 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.7.0 h1:w6WUp1VbkqPEgLz4rkBzH/CSU6HkoqNLp6GstyTx3lU= honnef.co/go/tools v0.7.0/go.mod h1:pm29oPxeP3P82ISxZDgIYeOaf9ta6Pi0EWvCFoLG2vc= -modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U= -modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8= -modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU= -modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0= +modernc.org/cc/v4 v4.28.2 h1:3tQ0lf2ADtoby2EtSP+J7IE2SHwEJdP8ioR59wx7XpY= +modernc.org/cc/v4 v4.28.2/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= +modernc.org/ccgo/v4 v4.34.0 h1:yRLPFZieg532OT4rp4JFNIVcquwalMX26G95WQDqwCQ= +modernc.org/ccgo/v4 v4.34.0/go.mod h1:AS5WYMyBakQ+fhsHhtP8mWB82KTGPkNNJDGfGQCe0/A= modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= @@ -894,18 +896,18 @@ modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo= modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= -modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c= -modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ= +modernc.org/libc v1.72.3 h1:ZnDF4tXn4NBXFutMMQC4vtbTFSXhhKzR73fv0beZEAU= +modernc.org/libc v1.72.3/go.mod h1:dn0dZNnnn1clLyvRxLxYExxiKRZIRENOfqQ8XEeg4Qs= modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= -modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= -modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg= +modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= -modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM= -modernc.org/sqlite v1.50.0/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew= +modernc.org/sqlite v1.50.1 h1:l+cQvn0sd0zJJtfygGHuQJ5AjlrwXmWPw4KP3ZMwr9w= +modernc.org/sqlite v1.50.1/go.mod h1:tcNzv5p84E0skkmJn038y+hWJbLQXQqEnQfeh5r2JLM= modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= diff --git a/internal/config/config.go b/internal/config/config.go index b728f68..0e8405d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -96,12 +96,20 @@ type Config struct { // Default: "5m". Set to "0" to always revalidate. MetadataTTL string `json:"metadata_ttl" yaml:"metadata_ttl"` + // MetadataMaxSize is the maximum size of an upstream metadata response + // the proxy will buffer (e.g. "100MB", "250MB"). Responses over this + // size return ErrMetadataTooLarge. Default: "100MB". + MetadataMaxSize string `json:"metadata_max_size" yaml:"metadata_max_size"` + // MirrorAPI enables the /api/mirror endpoints for starting mirror jobs via HTTP. // Disabled by default to prevent unauthenticated users from triggering downloads. MirrorAPI bool `json:"mirror_api" yaml:"mirror_api"` // Gradle configures Gradle HttpBuildCache behavior. Gradle GradleConfig `json:"gradle" yaml:"gradle"` + + // Health configures the /health endpoint behavior. + Health HealthConfig `json:"health" yaml:"health"` } // CooldownConfig configures version cooldown periods. @@ -182,6 +190,14 @@ type GradleBuildCacheConfig struct { SweepInterval string `json:"sweep_interval" yaml:"sweep_interval"` } +// HealthConfig configures the /health endpoint. +type HealthConfig struct { + // StorageProbeInterval is the minimum time between storage backend probes. + // Uses Go duration syntax (e.g. "30s", "1m"). Default: "30s". + // Set to "0" to probe on every /health request (useful for low-traffic deployments). + StorageProbeInterval string `json:"storage_probe_interval" yaml:"storage_probe_interval"` +} + // DatabaseConfig configures the cache database. type DatabaseConfig struct { // Driver is the database driver: "sqlite" or "postgres". @@ -210,6 +226,15 @@ type UpstreamConfig struct { // Default: https://registry.npmjs.org NPM string `json:"npm" yaml:"npm"` + // Maven is the upstream Maven repository URL. + // Default: https://repo1.maven.org/maven2 + Maven string `json:"maven" yaml:"maven"` + + // GradlePluginPortal is the upstream Gradle Plugin Portal Maven URL. + // Used to resolve Gradle plugin marker artifacts. + // Default: https://plugins.gradle.org/m2 + GradlePluginPortal string `json:"gradle_plugin_portal" yaml:"gradle_plugin_portal"` + // Cargo is the upstream cargo index URL. // Default: https://index.crates.io Cargo string `json:"cargo" yaml:"cargo"` @@ -287,17 +312,19 @@ func Default() *Config { Format: "text", }, Upstream: UpstreamConfig{ - NPM: "https://registry.npmjs.org", - Cargo: "https://index.crates.io", - CargoDownload: "https://static.crates.io/crates", + NPM: "https://registry.npmjs.org", + Maven: "https://repo1.maven.org/maven2", + GradlePluginPortal: "https://plugins.gradle.org/m2", + Cargo: "https://index.crates.io", + CargoDownload: "https://static.crates.io/crates", }, Gradle: GradleConfig{ BuildCache: GradleBuildCacheConfig{ ReadOnly: false, - MaxUploadSize: "100MB", + MaxUploadSize: defaultGradleMaxUploadSizeStr, MaxAge: "168h", MaxSize: "", - SweepInterval: "10m", + SweepInterval: defaultGradleSweepIntervalStr, }, }, } @@ -343,6 +370,7 @@ func Load(path string) (*Config, error) { // - PROXY_DATABASE_PATH // - PROXY_LOG_LEVEL // - PROXY_LOG_FORMAT +// - PROXY_HEALTH_STORAGE_PROBE_INTERVAL func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_LISTEN"); v != "" { c.Listen = v @@ -383,6 +411,12 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_LOG_FORMAT"); v != "" { c.Log.Format = v } + if v := os.Getenv("PROXY_UPSTREAM_MAVEN"); v != "" { + c.Upstream.Maven = v + } + if v := os.Getenv("PROXY_UPSTREAM_GRADLE_PLUGIN_PORTAL"); v != "" { + c.Upstream.GradlePluginPortal = v + } if v := os.Getenv("PROXY_COOLDOWN_DEFAULT"); v != "" { c.Cooldown.Default = v } @@ -395,6 +429,9 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_METADATA_TTL"); v != "" { c.MetadataTTL = v } + if v := os.Getenv("PROXY_METADATA_MAX_SIZE"); v != "" { + c.MetadataMaxSize = v + } if v := os.Getenv("PROXY_GRADLE_BUILD_CACHE_READ_ONLY"); v != "" { c.Gradle.BuildCache.ReadOnly = v == "true" || v == "1" } @@ -410,6 +447,9 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_GRADLE_BUILD_CACHE_SWEEP_INTERVAL"); v != "" { c.Gradle.BuildCache.SweepInterval = v } + if v := os.Getenv("PROXY_HEALTH_STORAGE_PROBE_INTERVAL"); v != "" { + c.Health.StorageProbeInterval = v + } } // Validate checks the configuration for errors. @@ -481,51 +521,84 @@ func (c *Config) Validate() error { } } - // Validate Gradle build cache upload size (always required and must be > 0). - if c.Gradle.BuildCache.MaxUploadSize == "" { - c.Gradle.BuildCache.MaxUploadSize = "100MB" + if err := validateMetadataMaxSize(c.MetadataMaxSize); err != nil { + return err } - uploadSize, err := ParseSize(c.Gradle.BuildCache.MaxUploadSize) + + if err := c.Health.Validate(); err != nil { + return err + } + + if err := c.Gradle.BuildCache.Validate(); err != nil { + return err + } + + return nil +} + +// Validate checks the /health configuration. An unset interval is allowed +// (the cache uses its default); explicit values must parse and be non-negative. +func (h *HealthConfig) Validate() error { + if h.StorageProbeInterval == "" || h.StorageProbeInterval == "0" { + return nil + } + d, err := time.ParseDuration(h.StorageProbeInterval) + if err != nil { + return fmt.Errorf("invalid health.storage_probe_interval %q: %w", h.StorageProbeInterval, err) + } + if d < 0 { + return fmt.Errorf("invalid health.storage_probe_interval %q: must be non-negative", h.StorageProbeInterval) + } + return nil +} + +// Validate checks Gradle build cache settings, applying the default upload +// size if unset. +func (g *GradleBuildCacheConfig) Validate() error { + if g.MaxUploadSize == "" { + g.MaxUploadSize = defaultGradleMaxUploadSizeStr + } + uploadSize, err := ParseSize(g.MaxUploadSize) if err != nil { return fmt.Errorf("invalid gradle.build_cache.max_upload_size: %w", err) } if uploadSize <= 0 { - return fmt.Errorf("invalid gradle.build_cache.max_upload_size %q: must be > 0", c.Gradle.BuildCache.MaxUploadSize) + return fmt.Errorf("invalid gradle.build_cache.max_upload_size %q: must be > 0", g.MaxUploadSize) } - // Validate Gradle max age if specified. - if c.Gradle.BuildCache.MaxAge != "" && c.Gradle.BuildCache.MaxAge != "0" { - if _, err := time.ParseDuration(c.Gradle.BuildCache.MaxAge); err != nil { - return fmt.Errorf("invalid gradle.build_cache.max_age %q: %w", c.Gradle.BuildCache.MaxAge, err) + if g.MaxAge != "" && g.MaxAge != "0" { + if _, err := time.ParseDuration(g.MaxAge); err != nil { + return fmt.Errorf("invalid gradle.build_cache.max_age %q: %w", g.MaxAge, err) } } - // Validate Gradle max size if specified. - if c.Gradle.BuildCache.MaxSize != "" { - if _, err := ParseSize(c.Gradle.BuildCache.MaxSize); err != nil { + if g.MaxSize != "" { + if _, err := ParseSize(g.MaxSize); err != nil { return fmt.Errorf("invalid gradle.build_cache.max_size: %w", err) } } - // Validate Gradle sweep interval if specified. - if c.Gradle.BuildCache.SweepInterval != "" { - d, err := time.ParseDuration(c.Gradle.BuildCache.SweepInterval) + if g.SweepInterval != "" { + d, err := time.ParseDuration(g.SweepInterval) if err != nil { - return fmt.Errorf("invalid gradle.build_cache.sweep_interval %q: %w", c.Gradle.BuildCache.SweepInterval, err) + return fmt.Errorf("invalid gradle.build_cache.sweep_interval %q: %w", g.SweepInterval, err) } if d <= 0 { - return fmt.Errorf("invalid gradle.build_cache.sweep_interval %q: must be > 0", c.Gradle.BuildCache.SweepInterval) + return fmt.Errorf("invalid gradle.build_cache.sweep_interval %q: must be > 0", g.SweepInterval) } } return nil } -const defaultGradleBuildCacheMaxUploadSize = 100 << 20 -const defaultGradleBuildCacheSweepInterval = 10 * time.Minute const ( - defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default - defaultDirectServeTTL = 15 * time.Minute //nolint:mnd // sensible default + defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default + defaultDirectServeTTL = 15 * time.Minute //nolint:mnd // sensible default + defaultMetadataMaxSize = 100 << 20 + defaultGradleBuildCacheMaxUploadSize = 100 << 20 + defaultGradleBuildCacheSweepInterval = 10 * time.Minute + defaultGradleMaxUploadSizeStr = "100MB" + defaultGradleSweepIntervalStr = "10m" ) // ParseMaxSize returns the maximum cache size in bytes. @@ -541,6 +614,33 @@ func (c *Config) ParseMaxSize() int64 { return size } +func validateMetadataMaxSize(s string) error { + if s == "" { + return nil + } + size, err := ParseSize(s) + if err != nil { + return fmt.Errorf("invalid metadata_max_size: %w", err) + } + if size <= 0 { + return fmt.Errorf("invalid metadata_max_size %q: must be positive", s) + } + return nil +} + +// ParseMetadataMaxSize returns the maximum metadata response size in bytes. +// Returns 100MB if unset or invalid. +func (c *Config) ParseMetadataMaxSize() int64 { + if c.MetadataMaxSize == "" { + return defaultMetadataMaxSize + } + size, err := ParseSize(c.MetadataMaxSize) + if err != nil || size <= 0 { + return defaultMetadataMaxSize + } + return size +} + // ParseMetadataTTL returns the metadata TTL duration. // Returns 5 minutes if unset, 0 if explicitly disabled. func (c *Config) ParseMetadataTTL() time.Duration { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 26a0fc6..d633c25 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -31,6 +31,12 @@ func TestDefault(t *testing.T) { if cfg.Gradle.BuildCache.MaxAge != "168h" { t.Errorf("Gradle.BuildCache.MaxAge = %q, want %q", cfg.Gradle.BuildCache.MaxAge, "168h") } + if cfg.Upstream.Maven != "https://repo1.maven.org/maven2" { + t.Errorf("Upstream.Maven = %q, want %q", cfg.Upstream.Maven, "https://repo1.maven.org/maven2") + } + if cfg.Upstream.GradlePluginPortal != "https://plugins.gradle.org/m2" { + t.Errorf("Upstream.GradlePluginPortal = %q, want %q", cfg.Upstream.GradlePluginPortal, "https://plugins.gradle.org/m2") + } } func TestValidate(t *testing.T) { @@ -264,6 +270,8 @@ func TestLoadFromEnv(t *testing.T) { t.Setenv("PROXY_BASE_URL", "https://env.example.com") t.Setenv("PROXY_STORAGE_PATH", "/env/cache") t.Setenv("PROXY_LOG_LEVEL", testLevelDebug) + t.Setenv("PROXY_UPSTREAM_MAVEN", "https://maven.example.com/repository/maven-public") + t.Setenv("PROXY_UPSTREAM_GRADLE_PLUGIN_PORTAL", "https://plugins.example.com/m2") t.Setenv("PROXY_GRADLE_BUILD_CACHE_READ_ONLY", "true") t.Setenv("PROXY_GRADLE_BUILD_CACHE_MAX_UPLOAD_SIZE", "32MB") t.Setenv("PROXY_GRADLE_BUILD_CACHE_MAX_AGE", "12h") @@ -284,6 +292,12 @@ func TestLoadFromEnv(t *testing.T) { if cfg.Log.Level != testLevelDebug { t.Errorf("Log.Level = %q, want %q", cfg.Log.Level, testLevelDebug) } + if cfg.Upstream.Maven != "https://maven.example.com/repository/maven-public" { + t.Errorf("Upstream.Maven = %q, want %q", cfg.Upstream.Maven, "https://maven.example.com/repository/maven-public") + } + if cfg.Upstream.GradlePluginPortal != "https://plugins.example.com/m2" { + t.Errorf("Upstream.GradlePluginPortal = %q, want %q", cfg.Upstream.GradlePluginPortal, "https://plugins.example.com/m2") + } if !cfg.Gradle.BuildCache.ReadOnly { t.Error("Gradle.BuildCache.ReadOnly = false, want true") } @@ -414,6 +428,52 @@ func TestParseMetadataTTL(t *testing.T) { } } +func TestParseMetadataMaxSize(t *testing.T) { + tests := []struct { + name string + size string + want int64 + }{ + {"unset uses default", "", defaultMetadataMaxSize}, + {"explicit value", "250MB", 250 << 20}, + {"bytes", "1024", 1024}, + {"invalid uses default", "lots", defaultMetadataMaxSize}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := Default() + cfg.MetadataMaxSize = tt.size + got := cfg.ParseMetadataMaxSize() + if got != tt.want { + t.Errorf("ParseMetadataMaxSize() = %d, want %d", got, tt.want) + } + }) + } +} + +func TestValidateMetadataMaxSize(t *testing.T) { + cfg := Default() + cfg.MetadataMaxSize = "not-a-size" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for invalid metadata_max_size") + } + + cfg.MetadataMaxSize = "0" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for zero metadata_max_size") + } + + cfg.MetadataMaxSize = "250MB" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for valid metadata_max_size: %v", err) + } + + cfg.MetadataMaxSize = "" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for unset metadata_max_size: %v", err) + } +} + func TestValidateMetadataTTL(t *testing.T) { cfg := Default() cfg.MetadataTTL = "invalid" @@ -432,6 +492,34 @@ func TestValidateMetadataTTL(t *testing.T) { } } +func TestValidateHealthStorageProbeInterval(t *testing.T) { + cfg := Default() + cfg.Health.StorageProbeInterval = "not-a-duration" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for invalid health.storage_probe_interval") + } + + cfg.Health.StorageProbeInterval = "30s" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for valid health.storage_probe_interval: %v", err) + } + + cfg.Health.StorageProbeInterval = "0" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for zero health.storage_probe_interval: %v", err) + } + + cfg.Health.StorageProbeInterval = "" + if err := cfg.Validate(); err != nil { + t.Errorf("unexpected error for empty health.storage_probe_interval: %v", err) + } + + cfg.Health.StorageProbeInterval = "-5s" + if err := cfg.Validate(); err == nil { + t.Error("expected validation error for negative health.storage_probe_interval") + } +} + func TestLoadMetadataTTLFromEnv(t *testing.T) { cfg := Default() t.Setenv("PROXY_METADATA_TTL", "10m") diff --git a/internal/cooldown/cooldown.go b/internal/cooldown/cooldown.go deleted file mode 100644 index f37a2b9..0000000 --- a/internal/cooldown/cooldown.go +++ /dev/null @@ -1,125 +0,0 @@ -package cooldown - -import ( - "fmt" - "strconv" - "strings" - "time" -) - -const hoursPerDay = 24 - -// Config holds cooldown settings for version filtering. -// Cooldown hides package versions published too recently, giving the community -// time to spot malicious releases before they're pulled into projects. -type Config struct { - // Default is the global default cooldown duration (e.g., "3d", "48h"). - Default string `json:"default" yaml:"default"` - - // Ecosystems overrides the default for specific ecosystems. - // Keys are ecosystem names (e.g., "npm", "pypi"). - Ecosystems map[string]string `json:"ecosystems" yaml:"ecosystems"` - - // Packages overrides the cooldown for specific packages. - // Keys are PURLs (e.g., "pkg:npm/lodash", "pkg:npm/@babel/core"). - Packages map[string]string `json:"packages" yaml:"packages"` - - defaultDuration time.Duration - ecosystemDurations map[string]time.Duration - packageDurations map[string]time.Duration - parsed bool -} - -// parse resolves all string durations into time.Duration values. -// Called lazily on first use. -func (c *Config) parse() { - if c.parsed { - return - } - c.parsed = true - - c.defaultDuration, _ = ParseDuration(c.Default) - - c.ecosystemDurations = make(map[string]time.Duration, len(c.Ecosystems)) - for k, v := range c.Ecosystems { - d, _ := ParseDuration(v) - c.ecosystemDurations[k] = d - } - - c.packageDurations = make(map[string]time.Duration, len(c.Packages)) - for k, v := range c.Packages { - d, _ := ParseDuration(v) - c.packageDurations[k] = d - } -} - -// For returns the effective cooldown duration for a given ecosystem and package PURL. -// Resolution order: package override > ecosystem override > global default. -func (c *Config) For(ecosystem, packagePURL string) time.Duration { - c.parse() - - if d, ok := c.packageDurations[packagePURL]; ok { - return d - } - if d, ok := c.ecosystemDurations[ecosystem]; ok { - return d - } - return c.defaultDuration -} - -// IsAllowed returns true if a version with the given publish time has passed -// the cooldown period for this ecosystem/package. -func (c *Config) IsAllowed(ecosystem, packagePURL string, publishedAt time.Time) bool { - d := c.For(ecosystem, packagePURL) - if d == 0 { - return true - } - if publishedAt.IsZero() { - return true - } - return time.Since(publishedAt) >= d -} - -// Enabled returns true if any cooldown is configured. -func (c *Config) Enabled() bool { - c.parse() - if c.defaultDuration > 0 { - return true - } - for _, d := range c.ecosystemDurations { - if d > 0 { - return true - } - } - for _, d := range c.packageDurations { - if d > 0 { - return true - } - } - return false -} - -// ParseDuration parses a duration string supporting days (e.g., "3d"), -// in addition to Go's standard time.ParseDuration formats ("48h", "30m"). -// "0" means disabled (returns 0). -func ParseDuration(s string) (time.Duration, error) { - s = strings.TrimSpace(s) - if s == "" || s == "0" { - return 0, nil - } - - // Handle day suffix - if numStr, ok := strings.CutSuffix(s, "d"); ok { - days, err := strconv.ParseFloat(numStr, 64) - if err != nil { - return 0, fmt.Errorf("invalid duration %q: %w", s, err) - } - return time.Duration(days * float64(hoursPerDay*time.Hour)), nil - } - - d, err := time.ParseDuration(s) - if err != nil { - return 0, fmt.Errorf("invalid duration %q: %w", s, err) - } - return d, nil -} diff --git a/internal/cooldown/cooldown_test.go b/internal/cooldown/cooldown_test.go deleted file mode 100644 index c366077..0000000 --- a/internal/cooldown/cooldown_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package cooldown - -import ( - "testing" - "time" -) - -func TestParseDuration(t *testing.T) { - tests := []struct { - input string - want time.Duration - wantErr bool - }{ - {"", 0, false}, - {"0", 0, false}, - {"3d", 3 * 24 * time.Hour, false}, - {"7d", 7 * 24 * time.Hour, false}, - {"14d", 14 * 24 * time.Hour, false}, - {"1.5d", 36 * time.Hour, false}, - {"48h", 48 * time.Hour, false}, - {"30m", 30 * time.Minute, false}, - {"1h30m", 90 * time.Minute, false}, - {"invalid", 0, true}, - {"d", 0, true}, - {"xd", 0, true}, - } - - for _, tt := range tests { - got, err := ParseDuration(tt.input) - if (err != nil) != tt.wantErr { - t.Errorf("ParseDuration(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr) - continue - } - if got != tt.want { - t.Errorf("ParseDuration(%q) = %v, want %v", tt.input, got, tt.want) - } - } -} - -func TestConfigFor(t *testing.T) { - c := &Config{ - Default: "3d", - Ecosystems: map[string]string{ - "npm": "7d", - "cargo": "0", - }, - Packages: map[string]string{ - "pkg:npm/lodash": "0", - "pkg:npm/@babel/core": "14d", - }, - } - - tests := []struct { - ecosystem string - packagePURL string - want time.Duration - }{ - // Package override takes priority - {"npm", "pkg:npm/lodash", 0}, - {"npm", "pkg:npm/@babel/core", 14 * 24 * time.Hour}, - // Ecosystem override - {"npm", "pkg:npm/express", 7 * 24 * time.Hour}, - {"cargo", "pkg:cargo/serde", 0}, - // Global default - {"pypi", "pkg:pypi/requests", 3 * 24 * time.Hour}, - {"pub", "pkg:pub/flutter", 3 * 24 * time.Hour}, - } - - for _, tt := range tests { - got := c.For(tt.ecosystem, tt.packagePURL) - if got != tt.want { - t.Errorf("For(%q, %q) = %v, want %v", tt.ecosystem, tt.packagePURL, got, tt.want) - } - } -} - -func TestConfigIsAllowed(t *testing.T) { - c := &Config{ - Default: "3d", - Packages: map[string]string{ - "pkg:npm/lodash": "0", - }, - } - - now := time.Now() - - tests := []struct { - name string - ecosystem string - packagePURL string - publishedAt time.Time - want bool - }{ - {"old enough", "npm", "pkg:npm/express", now.Add(-4 * 24 * time.Hour), true}, - {"too recent", "npm", "pkg:npm/express", now.Add(-1 * 24 * time.Hour), false}, - {"exactly at boundary", "npm", "pkg:npm/express", now.Add(-3 * 24 * time.Hour), true}, - {"exempt package", "npm", "pkg:npm/lodash", now.Add(-1 * time.Minute), true}, - {"zero time", "npm", "pkg:npm/express", time.Time{}, true}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := c.IsAllowed(tt.ecosystem, tt.packagePURL, tt.publishedAt) - if got != tt.want { - t.Errorf("IsAllowed(%q, %q, %v) = %v, want %v", - tt.ecosystem, tt.packagePURL, tt.publishedAt, got, tt.want) - } - }) - } -} - -func TestConfigEnabled(t *testing.T) { - tests := []struct { - name string - cfg Config - want bool - }{ - {"empty config", Config{}, false}, - {"default only", Config{Default: "3d"}, true}, - {"ecosystem only", Config{Ecosystems: map[string]string{"npm": "7d"}}, true}, - {"package only", Config{Packages: map[string]string{"pkg:npm/x": "1d"}}, true}, - {"all zero", Config{Default: "0", Ecosystems: map[string]string{"npm": "0"}}, false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := tt.cfg.Enabled() - if got != tt.want { - t.Errorf("Enabled() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/internal/database/schema.go b/internal/database/schema.go index e6f284f..c8d8d1e 100644 --- a/internal/database/schema.go +++ b/internal/database/schema.go @@ -6,7 +6,11 @@ import ( "time" ) -const postgresTimestamp = "TIMESTAMP" +const ( + postgresTimestamp = "TIMESTAMP" + sqliteDatetime = "DATETIME" + colTypeText = "TEXT" +) // Schema for proxy-specific tables. The packages and versions tables // are compatible with git-pkgs, allowing the proxy to use an existing @@ -369,9 +373,9 @@ func isTableNotFound(err error) bool { func (db *DB) createMigrationsTable() error { var ts string if db.dialect == DialectPostgres { - ts = "TIMESTAMP" + ts = postgresTimestamp } else { - ts = "DATETIME" + ts = sqliteDatetime } query := fmt.Sprintf(`CREATE TABLE IF NOT EXISTS migrations ( @@ -457,12 +461,12 @@ func (db *DB) MigrateSchema() error { func migrateAddPackagesEnrichmentColumns(db *DB) error { columns := map[string]string{ - "registry_url": "TEXT", - "supplier_name": "TEXT", - "supplier_type": "TEXT", - "source": "TEXT", - "enriched_at": "DATETIME", - "vulns_synced_at": "DATETIME", + "registry_url": colTypeText, + "supplier_name": colTypeText, + "supplier_type": colTypeText, + "source": colTypeText, + "enriched_at": sqliteDatetime, + "vulns_synced_at": sqliteDatetime, } if db.dialect == DialectPostgres { @@ -487,10 +491,10 @@ func migrateAddPackagesEnrichmentColumns(db *DB) error { func migrateAddVersionsEnrichmentColumns(db *DB) error { columns := map[string]string{ - "integrity": "TEXT", + "integrity": colTypeText, "yanked": "INTEGER DEFAULT 0", - "source": "TEXT", - "enriched_at": "DATETIME", + "source": colTypeText, + "enriched_at": sqliteDatetime, } if db.dialect == DialectPostgres { diff --git a/internal/handler/cargo_test.go b/internal/handler/cargo_test.go index 5ce81b6..10d3faf 100644 --- a/internal/handler/cargo_test.go +++ b/internal/handler/cargo_test.go @@ -9,7 +9,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func cargoTestProxy() *Proxy { diff --git a/internal/handler/composer.go b/internal/handler/composer.go index 0933ece..065ddf9 100644 --- a/internal/handler/composer.go +++ b/internal/handler/composer.go @@ -16,6 +16,7 @@ import ( const ( composerUpstream = "https://packagist.org" composerRepo = "https://repo.packagist.org" + composerUnset = "__unset" vendorPackageParts = 2 ) @@ -150,7 +151,8 @@ func (h *ComposerHandler) rewriteMetadata(body []byte) ([]byte, error) { // expandMinifiedVersions expands the Composer v2 minified format where each // version entry only contains fields that differ from the previous entry. -// The "~dev" sentinel string resets the inheritance chain. +// The "~dev" sentinel string resets the inheritance chain, and the "__unset" +// value removes a field from the inherited state. func expandMinifiedVersions(versionList []any) []any { expanded := make([]any, 0, len(versionList)) inherited := map[string]any{} @@ -174,6 +176,10 @@ func expandMinifiedVersions(versionList []any) []any { merged[k] = deepCopyValue(val) } for k, val := range vmap { + if val == composerUnset { + delete(merged, k) + continue + } merged[k] = val } diff --git a/internal/handler/composer_test.go b/internal/handler/composer_test.go index 94ff8cb..baf13b6 100644 --- a/internal/handler/composer_test.go +++ b/internal/handler/composer_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func TestComposerRewriteMetadata(t *testing.T) { @@ -177,6 +177,80 @@ func TestComposerRewriteMetadataMinifiedDevReset(t *testing.T) { } } +func TestComposerRewriteMetadataUnset(t *testing.T) { + h := &ComposerHandler{ + proxy: &Proxy{Logger: slog.Default()}, + proxyURL: "http://localhost:8080", + } + + // In the minified format, "__unset" removes a field from the inherited + // state. v1.29.0 has require-dev, v1.28.0 unsets it, v1.27.0 inherits the + // unset state. Composer rejects metadata where require-dev (or any link + // field) is the literal string "__unset" rather than an object. + input := `{ + "minified": "composer/2.0", + "packages": { + "venturecraft/revisionable": [ + { + "name": "venturecraft/revisionable", + "version": "1.29.0", + "require": {"php": ">=5.4"}, + "require-dev": {"orchestra/testbench": "~3.0"}, + "dist": {"url": "https://example.com/a.zip", "type": "zip"} + }, + { + "version": "1.28.0", + "require-dev": "__unset" + }, + { + "version": "1.27.0" + }, + { + "version": "1.26.0", + "require-dev": {"foo/bar": "1.0"} + } + ] + } + }` + + output, err := h.rewriteMetadata([]byte(input)) + if err != nil { + t.Fatalf("rewriteMetadata failed: %v", err) + } + + var result map[string]any + if err := json.Unmarshal(output, &result); err != nil { + t.Fatalf("failed to parse output: %v", err) + } + + versions := result["packages"].(map[string]any)["venturecraft/revisionable"].([]any) + if len(versions) != 4 { + t.Fatalf("expected 4 versions, got %d", len(versions)) + } + + byVersion := map[string]map[string]any{} + for _, v := range versions { + vmap := v.(map[string]any) + byVersion[vmap["version"].(string)] = vmap + } + + if _, ok := byVersion["1.29.0"]["require-dev"].(map[string]any); !ok { + t.Errorf("1.29.0 require-dev should be an object, got %T", byVersion["1.29.0"]["require-dev"]) + } + if rd, ok := byVersion["1.28.0"]["require-dev"]; ok { + t.Errorf("1.28.0 require-dev should be absent, got %v", rd) + } + if rd, ok := byVersion["1.27.0"]["require-dev"]; ok { + t.Errorf("1.27.0 require-dev should be absent (inherited unset), got %v", rd) + } + if _, ok := byVersion["1.26.0"]["require-dev"].(map[string]any); !ok { + t.Errorf("1.26.0 require-dev should be an object, got %T", byVersion["1.26.0"]["require-dev"]) + } + if _, ok := byVersion["1.27.0"]["require"].(map[string]any); !ok { + t.Error("1.27.0 should still inherit require from 1.29.0") + } +} + func TestComposerRewriteMetadataCooldownPreservesNames(t *testing.T) { now := time.Now() old := now.Add(-10 * 24 * time.Hour).Format(time.RFC3339) diff --git a/internal/handler/conda.go b/internal/handler/conda.go index a986f01..cfa20c8 100644 --- a/internal/handler/conda.go +++ b/internal/handler/conda.go @@ -140,7 +140,7 @@ func (h *CondaHandler) handleRepodata(w http.ResponseWriter, r *http.Request) { http.Error(w, "failed to create request", http.StatusInternalServerError) return } - req.Header.Set("Accept-Encoding", "gzip") + req.Header.Set(headerAcceptEncoding, "gzip") resp, err := h.proxy.HTTPClient.Do(req) if err != nil { @@ -161,7 +161,7 @@ func (h *CondaHandler) handleRepodata(w http.ResponseWriter, r *http.Request) { return } - body, err := ReadMetadata(resp.Body) + body, err := h.proxy.ReadMetadata(resp.Body) if err != nil { http.Error(w, "failed to read response", http.StatusInternalServerError) return @@ -241,5 +241,5 @@ func (h *CondaHandler) proxyCached(w http.ResponseWriter, r *http.Request) { // proxyUpstream forwards a request to Anaconda without caching. func (h *CondaHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { - h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, []string{"Accept-Encoding"}) + h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, []string{headerAcceptEncoding}) } diff --git a/internal/handler/conda_test.go b/internal/handler/conda_test.go index 24b0236..1b57039 100644 --- a/internal/handler/conda_test.go +++ b/internal/handler/conda_test.go @@ -8,7 +8,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func TestCondaParseFilename(t *testing.T) { diff --git a/internal/handler/cran.go b/internal/handler/cran.go index 246fcaa..0ecd2a3 100644 --- a/internal/handler/cran.go +++ b/internal/handler/cran.go @@ -159,5 +159,5 @@ func (h *CRANHandler) proxyCached(w http.ResponseWriter, r *http.Request) { // proxyUpstream forwards a request to CRAN without caching. func (h *CRANHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { - h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, []string{"Accept-Encoding"}) + h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, []string{headerAcceptEncoding}) } diff --git a/internal/handler/download_test.go b/internal/handler/download_test.go index 639e976..980e234 100644 --- a/internal/handler/download_test.go +++ b/internal/handler/download_test.go @@ -673,7 +673,7 @@ func TestMavenHandler_DownloadCacheHit(t *testing.T) { proxy, db, store, _ := setupTestProxy(t) seedPackageWithPURL(t, db, store, "maven", "com.google.guava:guava", "32.1.3-jre", "guava-32.1.3-jre.jar", "jar content") - h := NewMavenHandler(proxy, "http://localhost") + h := NewMavenHandler(proxy, "http://localhost", "", "") srv := httptest.NewServer(h.Routes()) defer srv.Close() @@ -730,7 +730,7 @@ func TestMavenHandler_MetadataProxied(t *testing.T) { func TestMavenHandler_EmptyPathNotFound(t *testing.T) { proxy, _, _, _ := setupTestProxy(t) - h := NewMavenHandler(proxy, "http://localhost") + h := NewMavenHandler(proxy, "http://localhost", "", "") srv := httptest.NewServer(h.Routes()) defer srv.Close() @@ -748,7 +748,7 @@ func TestMavenHandler_EmptyPathNotFound(t *testing.T) { func TestMavenHandler_ArtifactExtensions(t *testing.T) { proxy, _, _, fetcher := setupTestProxy(t) - extensions := []string{".jar", ".war", ".ear", ".pom", ".aar", ".klib"} + extensions := []string{".jar", ".war", ".ear", ".pom", ".aar", ".klib", ".module"} for _, ext := range extensions { fetcher.artifact = &fetch.Artifact{ Body: io.NopCloser(strings.NewReader("artifact")), @@ -756,7 +756,7 @@ func TestMavenHandler_ArtifactExtensions(t *testing.T) { } fetcher.fetchCalled = false - h := NewMavenHandler(proxy, "http://localhost") + h := NewMavenHandler(proxy, "http://localhost", "", "") upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { t.Errorf("should not proxy artifact file %s to upstream", ext) @@ -789,7 +789,7 @@ func TestMavenHandler_CacheMiss(t *testing.T) { ContentType: "application/java-archive", } - h := NewMavenHandler(proxy, "http://localhost") + h := NewMavenHandler(proxy, "http://localhost", "", "") srv := httptest.NewServer(h.Routes()) defer srv.Close() @@ -809,6 +809,274 @@ func TestMavenHandler_CacheMiss(t *testing.T) { } } +func TestMavenHandler_GradlePluginMarkerFallbackAndCache(t *testing.T) { + tests := []struct { + name string + markerPath string + }{ + { + name: "Spotless", + markerPath: "/com/diffplug/spotless/com.diffplug.spotless.gradle.plugin/8.4.0/com.diffplug.spotless.gradle.plugin-8.4.0.pom", + }, + { + name: "BenManes", + markerPath: "/com/github/ben-manes/versions/com.github.ben-manes.versions.gradle.plugin/0.54.0/com.github.ben-manes.versions.gradle.plugin-0.54.0.pom", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + + primaryUpstream := "https://repo1.maven.org/maven2" + pluginPortalUpstream := "https://plugins.gradle.org/m2" + primaryURL := primaryUpstream + tt.markerPath + + fetcher.fetchErrByURL = map[string]error{ + primaryURL: ErrUpstreamNotFound, + } + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("")), + ContentType: "application/xml", + } + + h := NewMavenHandler(proxy, "http://localhost", primaryUpstream, pluginPortalUpstream) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + tt.markerPath) + if err != nil { + t.Fatalf("request failed: %v", err) + } + body, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + if string(body) != "" { + t.Fatalf("body = %q, want %q", body, "") + } + + wantFallbackURL := pluginPortalUpstream + tt.markerPath + if fetcher.fetchedURL != wantFallbackURL { + t.Fatalf("fallback URL = %q, want %q", fetcher.fetchedURL, wantFallbackURL) + } + + fetcher.fetchCalled = false + resp, err = http.Get(srv.URL + tt.markerPath) + if err != nil { + t.Fatalf("second request failed: %v", err) + } + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("second status = %d, want %d", resp.StatusCode, http.StatusOK) + } + if fetcher.fetchCalled { + t.Fatal("expected plugin marker POM to be served from cache on second request") + } + }) + } +} + +func TestMavenHandler_GradlePluginMarkerMetadataFallback(t *testing.T) { + paths := map[string]string{ + "/com/diffplug/spotless/com.diffplug.spotless.gradle.plugin/8.4.0/com.diffplug.spotless.gradle.plugin-8.4.0.pom.sha1": "sha1", + "/com/diffplug/spotless/com.diffplug.spotless.gradle.plugin/8.4.0/com.diffplug.spotless.gradle.plugin-8.4.0.pom.sha256": "sha256", + "/com/diffplug/spotless/com.diffplug.spotless.gradle.plugin/8.4.0/com.diffplug.spotless.gradle.plugin-8.4.0.pom.md5": "md5", + "/com/diffplug/spotless/com.diffplug.spotless.gradle.plugin/maven-metadata.xml": "", + } + + primaryHits := map[string]int{} + pluginHits := map[string]int{} + + primary := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + primaryHits[r.URL.Path]++ + if _, ok := paths[r.URL.Path]; ok { + http.NotFound(w, r) + return + } + t.Fatalf("unexpected path to primary upstream: %s", r.URL.Path) + })) + defer primary.Close() + + pluginPortal := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + pluginHits[r.URL.Path]++ + body, ok := paths[r.URL.Path] + if !ok { + http.NotFound(w, r) + return + } + _, _ = io.WriteString(w, body) + })) + defer pluginPortal.Close() + + proxy, _, _, _ := setupTestProxy(t) + proxy.HTTPClient = primary.Client() + + h := NewMavenHandler(proxy, "http://localhost", primary.URL, pluginPortal.URL) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + for reqPath, wantBody := range paths { + resp, err := http.Get(srv.URL + reqPath) + if err != nil { + t.Fatalf("GET %s failed: %v", reqPath, err) + } + body, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("GET %s: status = %d, want %d", reqPath, resp.StatusCode, http.StatusOK) + } + if string(body) != wantBody { + t.Fatalf("GET %s: body = %q, want %q", reqPath, body, wantBody) + } + + if primaryHits[reqPath] == 0 { + t.Fatalf("GET %s did not hit primary upstream", reqPath) + } + if pluginHits[reqPath] == 0 { + t.Fatalf("GET %s did not hit plugin portal fallback", reqPath) + } + } +} + +func TestMavenHandler_GradlePluginImplementationMetadataFallback(t *testing.T) { + paths := map[string]string{ + "/com/diffplug/spotless/spotless-plugin-gradle/8.4.0/spotless-plugin-gradle-8.4.0.jar.sha1": "impl-sha1", + "/com/diffplug/spotless/spotless-plugin-gradle/8.4.0/spotless-plugin-gradle-8.4.0.jar.sha256": "impl-sha256", + } + + primaryHits := map[string]int{} + pluginHits := map[string]int{} + + primary := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + primaryHits[r.URL.Path]++ + if _, ok := paths[r.URL.Path]; ok { + http.NotFound(w, r) + return + } + t.Fatalf("unexpected path to primary upstream: %s", r.URL.Path) + })) + defer primary.Close() + + pluginPortal := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + pluginHits[r.URL.Path]++ + body, ok := paths[r.URL.Path] + if !ok { + http.NotFound(w, r) + return + } + _, _ = io.WriteString(w, body) + })) + defer pluginPortal.Close() + + proxy, _, _, _ := setupTestProxy(t) + proxy.HTTPClient = primary.Client() + + h := NewMavenHandler(proxy, "http://localhost", primary.URL, pluginPortal.URL) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + for reqPath, wantBody := range paths { + resp, err := http.Get(srv.URL + reqPath) + if err != nil { + t.Fatalf("GET %s failed: %v", reqPath, err) + } + body, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("GET %s: status = %d, want %d", reqPath, resp.StatusCode, http.StatusOK) + } + if string(body) != wantBody { + t.Fatalf("GET %s: body = %q, want %q", reqPath, body, wantBody) + } + + if primaryHits[reqPath] == 0 { + t.Fatalf("GET %s did not hit primary upstream", reqPath) + } + if pluginHits[reqPath] == 0 { + t.Fatalf("GET %s did not hit plugin portal fallback", reqPath) + } + } +} + +func TestMavenHandler_GradlePluginImplementation_FallbackToPluginPortal(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + + primaryUpstream := "https://repo1.maven.org/maven2" + pluginPortalUpstream := "https://plugins.gradle.org/m2" + implPath := "/com/diffplug/spotless/spotless-plugin-gradle/8.4.0/spotless-plugin-gradle-8.4.0.jar" + primaryURL := primaryUpstream + implPath + pluginPortalURL := pluginPortalUpstream + implPath + + fetcher.fetchErrByURL = map[string]error{ + primaryURL: ErrUpstreamNotFound, + } + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("plugin impl jar")), + ContentType: "application/java-archive", + } + + h := NewMavenHandler(proxy, "http://localhost", primaryUpstream, pluginPortalUpstream) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + implPath) + if err != nil { + t.Fatalf("request failed: %v", err) + } + body, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + if string(body) != "plugin impl jar" { + t.Fatalf("body = %q, want %q", body, "plugin impl jar") + } + + if fetcher.fetchedURL != pluginPortalURL { + t.Fatalf("implementation artifact should fallback to plugin portal; fetched URL = %q, want %q", fetcher.fetchedURL, pluginPortalURL) + } +} + +func TestMavenHandler_GradlePluginImplementation_NotFoundInBothUpstreams(t *testing.T) { + proxy, _, _, fetcher := setupTestProxy(t) + + primaryUpstream := "https://repo1.maven.org/maven2" + pluginPortalUpstream := "https://plugins.gradle.org/m2" + implPath := "/com/diffplug/spotless/spotless-plugin-gradle/8.4.0/spotless-plugin-gradle-8.4.0.jar" + primaryURL := primaryUpstream + implPath + pluginPortalURL := pluginPortalUpstream + implPath + + fetcher.fetchErrByURL = map[string]error{ + primaryURL: ErrUpstreamNotFound, + pluginPortalURL: ErrUpstreamNotFound, + } + + h := NewMavenHandler(proxy, "http://localhost", primaryUpstream, pluginPortalUpstream) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + resp, err := http.Get(srv.URL + implPath) + if err != nil { + t.Fatalf("request failed: %v", err) + } + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusNotFound { + t.Fatalf("status = %d, want %d", resp.StatusCode, http.StatusNotFound) + } + + if fetcher.fetchedURL != pluginPortalURL { + t.Fatalf("expected fallback attempt to plugin portal; fetched URL = %q, want %q", fetcher.fetchedURL, pluginPortalURL) + } +} + func TestNuGetHandler_DownloadCacheMiss(t *testing.T) { proxy, _, _, fetcher := setupTestProxy(t) fetcher.artifact = &fetch.Artifact{ diff --git a/internal/handler/gem.go b/internal/handler/gem.go index bdb4bb9..9ec57e3 100644 --- a/internal/handler/gem.go +++ b/internal/handler/gem.go @@ -182,7 +182,7 @@ func (h *GemHandler) fetchCompactIndex(r *http.Request, name string) (*http.Resp if err != nil { return nil, err } - for _, hdr := range []string{"Accept", "Accept-Encoding", "If-None-Match", "If-Modified-Since"} { + for _, hdr := range []string{"Accept", headerAcceptEncoding, "If-None-Match", "If-Modified-Since"} { if v := r.Header.Get(hdr); v != "" { req.Header.Set(hdr, v) } @@ -311,7 +311,7 @@ func (h *GemHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { } // Copy relevant headers - for _, h := range []string{"Accept", "Accept-Encoding", "If-None-Match", "If-Modified-Since"} { + for _, h := range []string{"Accept", headerAcceptEncoding, "If-None-Match", "If-Modified-Since"} { if v := r.Header.Get(h); v != "" { req.Header.Set(h, v) } diff --git a/internal/handler/gem_test.go b/internal/handler/gem_test.go index 6dce324..7d90946 100644 --- a/internal/handler/gem_test.go +++ b/internal/handler/gem_test.go @@ -10,7 +10,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func TestGemParseFilename(t *testing.T) { diff --git a/internal/handler/gradle.go b/internal/handler/gradle.go index 41c9f76..3b703be 100644 --- a/internal/handler/gradle.go +++ b/internal/handler/gradle.go @@ -7,7 +7,9 @@ import ( "regexp" "strconv" "strings" + "time" + "github.com/git-pkgs/proxy/internal/metrics" "github.com/git-pkgs/proxy/internal/storage" ) @@ -74,7 +76,7 @@ func (h *GradleBuildCacheHandler) parseCacheKey(urlPath string) (string, int) { return "", http.StatusBadRequest } - if keyPath == "" || strings.Contains(keyPath, "/") { + if strings.Contains(keyPath, "/") { return "", http.StatusNotFound } @@ -94,18 +96,28 @@ func (h *GradleBuildCacheHandler) handleGetOrHead(w http.ResponseWriter, r *http w.Header().Set("Content-Type", gradleBuildCacheContentType) if r.Method == http.MethodHead { + existsStart := time.Now() exists, err := h.proxy.Storage.Exists(r.Context(), storagePath) + metrics.RecordStorageOperation("read", time.Since(existsStart)) if err != nil { + metrics.RecordStorageError("read") h.proxy.Logger.Error("failed to check gradle build cache entry", "key", key, "error", err) http.Error(w, "failed to read cache entry", http.StatusInternalServerError) return } if !exists { + metrics.RecordCacheMiss("gradle") http.NotFound(w, r) return } + metrics.RecordCacheHit("gradle") - if size, err := h.proxy.Storage.Size(r.Context(), storagePath); err == nil && size >= 0 { + sizeStart := time.Now() + size, err := h.proxy.Storage.Size(r.Context(), storagePath) + metrics.RecordStorageOperation("read", time.Since(sizeStart)) + if err != nil { + metrics.RecordStorageError("read") + } else if size >= 0 { w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) } @@ -113,17 +125,22 @@ func (h *GradleBuildCacheHandler) handleGetOrHead(w http.ResponseWriter, r *http return } + readStart := time.Now() reader, err := h.proxy.Storage.Open(r.Context(), storagePath) + metrics.RecordStorageOperation("read", time.Since(readStart)) if err != nil { if errors.Is(err, storage.ErrNotFound) { + metrics.RecordCacheMiss("gradle") http.NotFound(w, r) return } + metrics.RecordStorageError("read") h.proxy.Logger.Error("failed to open gradle build cache entry", "key", key, "error", err) http.Error(w, "failed to read cache entry", http.StatusInternalServerError) return } defer func() { _ = reader.Close() }() + metrics.RecordCacheHit("gradle") w.WriteHeader(http.StatusOK) _, _ = io.Copy(w, reader) @@ -138,7 +155,9 @@ func (h *GradleBuildCacheHandler) handlePut(w http.ResponseWriter, r *http.Reque r.Body = http.MaxBytesReader(w, r.Body, maxUploadSize) + storeStart := time.Now() _, hash, err := h.proxy.Storage.Store(r.Context(), storagePath, r.Body) + metrics.RecordStorageOperation("write", time.Since(storeStart)) if err != nil { var maxBytesErr *http.MaxBytesError if errors.As(err, &maxBytesErr) { @@ -146,6 +165,7 @@ func (h *GradleBuildCacheHandler) handlePut(w http.ResponseWriter, r *http.Reque return } + metrics.RecordStorageError("write") h.proxy.Logger.Error("failed to store gradle build cache entry", "key", key, "error", err) http.Error(w, "failed to write cache entry", http.StatusInternalServerError) return diff --git a/internal/handler/gradle_test.go b/internal/handler/gradle_test.go index f002a51..a05d07e 100644 --- a/internal/handler/gradle_test.go +++ b/internal/handler/gradle_test.go @@ -6,6 +6,9 @@ import ( "net/http/httptest" "strings" "testing" + + "github.com/git-pkgs/proxy/internal/metrics" + "github.com/prometheus/client_golang/prometheus/testutil" ) func TestGradleBuildCacheHandler_PutGetHead(t *testing.T) { @@ -227,3 +230,56 @@ func TestGradleBuildCacheHandler_PutTooLarge(t *testing.T) { t.Fatalf("PUT status = %d, want %d", resp.StatusCode, http.StatusRequestEntityTooLarge) } } + +func TestGradleBuildCacheHandler_RecordsMetrics(t *testing.T) { + proxy, _, _, _ := setupTestProxy(t) + h := NewGradleBuildCacheHandler(proxy) + srv := httptest.NewServer(h.Routes()) + defer srv.Close() + + hitsBefore := testutil.ToFloat64(metrics.CacheHits.WithLabelValues("gradle")) + missesBefore := testutil.ToFloat64(metrics.CacheMisses.WithLabelValues("gradle")) + + key := "metrics-key" + putReq, err := http.NewRequest(http.MethodPut, srv.URL+"/"+key, strings.NewReader("payload")) + if err != nil { + t.Fatalf("failed to create PUT request: %v", err) + } + putResp, err := http.DefaultClient.Do(putReq) + if err != nil { + t.Fatalf("PUT request failed: %v", err) + } + _ = putResp.Body.Close() + + getResp, err := http.Get(srv.URL + "/" + key) + if err != nil { + t.Fatalf("GET request failed: %v", err) + } + _ = getResp.Body.Close() + + headReq, err := http.NewRequest(http.MethodHead, srv.URL+"/"+key, nil) + if err != nil { + t.Fatalf("failed to create HEAD request: %v", err) + } + headResp, err := http.DefaultClient.Do(headReq) + if err != nil { + t.Fatalf("HEAD request failed: %v", err) + } + _ = headResp.Body.Close() + + missResp, err := http.Get(srv.URL + "/missing-key") + if err != nil { + t.Fatalf("GET miss request failed: %v", err) + } + _ = missResp.Body.Close() + + hitsAfter := testutil.ToFloat64(metrics.CacheHits.WithLabelValues("gradle")) + missesAfter := testutil.ToFloat64(metrics.CacheMisses.WithLabelValues("gradle")) + + if diff := hitsAfter - hitsBefore; diff != 2 { + t.Fatalf("cache hits delta = %.0f, want 2", diff) + } + if diff := missesAfter - missesBefore; diff != 1 { + t.Fatalf("cache misses delta = %.0f, want 1", diff) + } +} diff --git a/internal/handler/handler.go b/internal/handler/handler.go index d40a1dd..d06ca83 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -15,7 +15,7 @@ import ( "strings" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" "github.com/git-pkgs/proxy/internal/database" "github.com/git-pkgs/proxy/internal/metrics" "github.com/git-pkgs/proxy/internal/storage" @@ -50,23 +50,27 @@ const defaultHTTPTimeout = 30 * time.Second const contentTypeJSON = "application/json" -// maxMetadataSize is the maximum size of upstream metadata responses (100 MB). -// Package metadata (e.g. npm with many versions) can be large, but unbounded -// reads risk OOM if an upstream misbehaves. -const maxMetadataSize = 100 << 20 +const headerAcceptEncoding = "Accept-Encoding" -// ErrMetadataTooLarge is returned when upstream metadata exceeds maxMetadataSize. +// defaultMetadataMaxSize is used when Proxy.MetadataMaxSize is unset. +const defaultMetadataMaxSize = 100 << 20 + +// ErrMetadataTooLarge is returned when upstream metadata exceeds the configured limit. var ErrMetadataTooLarge = errors.New("metadata response exceeds size limit") // ReadMetadata reads an upstream response body with a size limit to prevent OOM // from unexpectedly large responses. Returns ErrMetadataTooLarge if the response // is truncated by the limit. -func ReadMetadata(r io.Reader) ([]byte, error) { - data, err := io.ReadAll(io.LimitReader(r, maxMetadataSize+1)) +func (p *Proxy) ReadMetadata(r io.Reader) ([]byte, error) { + limit := p.MetadataMaxSize + if limit <= 0 { + limit = defaultMetadataMaxSize + } + data, err := io.ReadAll(io.LimitReader(r, limit+1)) if err != nil { return nil, err } - if int64(len(data)) > maxMetadataSize { + if int64(len(data)) > limit { return nil, ErrMetadataTooLarge } return data, nil @@ -82,6 +86,7 @@ type Proxy struct { Cooldown *cooldown.Config CacheMetadata bool MetadataTTL time.Duration + MetadataMaxSize int64 GradleReadOnly bool GradleMaxUploadSize int64 DirectServe bool @@ -472,7 +477,7 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u cached, readErr := p.Storage.Open(ctx, entry.StoragePath) if readErr == nil { defer func() { _ = cached.Close() }() - data, readErr := ReadMetadata(cached) + data, readErr := p.ReadMetadata(cached) if readErr == nil { ct := contentTypeJSON if entry.ContentType.Valid { @@ -517,7 +522,7 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u } defer func() { _ = cached.Close() }() - data, readErr := ReadMetadata(cached) + data, readErr := p.ReadMetadata(cached) if readErr != nil { return nil, "", fmt.Errorf("upstream failed and cached read error: %w", err) } @@ -559,7 +564,7 @@ func (p *Proxy) fetchUpstreamMetadata(ctx context.Context, upstreamURL string, e return nil, "", "", zeroTime, errStale304 } defer func() { _ = cached.Close() }() - data, readErr := ReadMetadata(cached) + data, readErr := p.ReadMetadata(cached) if readErr != nil { return nil, "", "", zeroTime, errStale304 } @@ -581,7 +586,7 @@ func (p *Proxy) fetchUpstreamMetadata(ctx context.Context, upstreamURL string, e return nil, "", "", zeroTime, fmt.Errorf("upstream returned %d", resp.StatusCode) } - body, err := ReadMetadata(resp.Body) + body, err := p.ReadMetadata(resp.Body) if err != nil { return nil, "", "", zeroTime, fmt.Errorf("reading response: %w", err) } @@ -678,9 +683,14 @@ func (p *Proxy) ProxyCached(w http.ResponseWriter, r *http.Request, upstreamURL, return } + p.writeMetadataCachedResponse(w, r, ecosystem, cacheKey, body, contentType) +} + +// writeMetadataCachedResponse writes a cached metadata response and handles +// conditional request headers using metadata cache validators. +func (p *Proxy) writeMetadataCachedResponse(w http.ResponseWriter, r *http.Request, ecosystem, cacheKey string, body []byte, contentType string) { cm := p.lookupCachedMeta(ecosystem, cacheKey) - // Honor client conditional request headers if cm.etag != "" { if match := r.Header.Get("If-None-Match"); match != "" && match == cm.etag { w.WriteHeader(http.StatusNotModified) @@ -726,7 +736,7 @@ func (p *Proxy) proxyMetadataStream(w http.ResponseWriter, r *http.Request, upst } req.Header.Set("Accept", accept) - for _, header := range []string{"Accept-Encoding", "If-Modified-Since", "If-None-Match"} { + for _, header := range []string{headerAcceptEncoding, "If-Modified-Since", "If-None-Match"} { if v := r.Header.Get(header); v != "" { req.Header.Set(header, v) } diff --git a/internal/handler/handler_test.go b/internal/handler/handler_test.go index 3a1d2ab..bbcab72 100644 --- a/internal/handler/handler_test.go +++ b/internal/handler/handler_test.go @@ -97,10 +97,11 @@ func (s *mockStorage) Close() error { return nil } // mockFetcher implements fetch.FetcherInterface for testing. type mockFetcher struct { - artifact *fetch.Artifact - fetchErr error - fetchCalled bool - fetchedURL string + artifact *fetch.Artifact + fetchErr error + fetchErrByURL map[string]error + fetchCalled bool + fetchedURL string } func (f *mockFetcher) Fetch(ctx context.Context, url string) (*fetch.Artifact, error) { @@ -110,6 +111,11 @@ func (f *mockFetcher) Fetch(ctx context.Context, url string) (*fetch.Artifact, e func (f *mockFetcher) FetchWithHeaders(_ context.Context, url string, _ http.Header) (*fetch.Artifact, error) { f.fetchCalled = true f.fetchedURL = url + if f.fetchErrByURL != nil { + if err, ok := f.fetchErrByURL[url]; ok { + return nil, err + } + } if f.fetchErr != nil { return nil, f.fetchErr } diff --git a/internal/handler/hex_test.go b/internal/handler/hex_test.go index 19d34b4..b02540a 100644 --- a/internal/handler/hex_test.go +++ b/internal/handler/hex_test.go @@ -11,7 +11,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" "google.golang.org/protobuf/encoding/protowire" ) diff --git a/internal/handler/julia.go b/internal/handler/julia.go new file mode 100644 index 0000000..08b1fdf --- /dev/null +++ b/internal/handler/julia.go @@ -0,0 +1,347 @@ +package handler + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "sync" + + "github.com/BurntSushi/toml" +) + +const ( + juliaUpstream = "https://pkg.julialang.org" + juliaGeneralRegistryUUID = "23338594-aafe-5451-b93e-139f81909106" + juliaArtifactName = "_artifact" + juliaRegistryName = "_registry" +) + +var ( + juliaHexPattern = regexp.MustCompile(`^[0-9a-f]{40,64}$`) + juliaUUIDPattern = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`) +) + +// JuliaHandler handles Julia Pkg server protocol requests. +// +// See https://pkgdocs.julialang.org/v1/registries/ and the PkgServer.jl +// reference implementation. The protocol is content-addressed: registry, +// package and artifact resources are all identified by git tree hashes +// and are immutable once published. +type JuliaHandler struct { + proxy *Proxy + upstreamURL string + + mu sync.RWMutex + names map[string]string + namesHash string + loadMu sync.Mutex +} + +// NewJuliaHandler creates a new Julia Pkg server handler. +func NewJuliaHandler(proxy *Proxy, _ string) *JuliaHandler { + return &JuliaHandler{ + proxy: proxy, + upstreamURL: juliaUpstream, + names: make(map[string]string), + } +} + +// Routes returns the HTTP handler for Julia requests. +func (h *JuliaHandler) Routes() http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc("GET /registries", h.handleRegistries) + mux.HandleFunc("GET /registries.eager", h.handleRegistries) + mux.HandleFunc("GET /registries.conservative", h.handleRegistries) + mux.HandleFunc("GET /registry/{uuid}/{hash}", h.handleRegistry) + mux.HandleFunc("GET /package/{uuid}/{hash}", h.handlePackage) + mux.HandleFunc("GET /artifact/{hash}", h.handleArtifact) + mux.HandleFunc("GET /meta", h.proxyUpstream) + + return mux +} + +// handleRegistries serves the list of available registries. This is the only +// mutable endpoint in the protocol so it goes through the metadata cache. +func (h *JuliaHandler) handleRegistries(w http.ResponseWriter, r *http.Request) { + cacheKey := strings.TrimPrefix(r.URL.Path, "/") + h.proxy.ProxyCached(w, r, h.upstreamURL+r.URL.Path, "julia", cacheKey, "*/*") +} + +// handleRegistry serves an immutable registry tarball and refreshes the +// UUID→name map from its Registry.toml. +func (h *JuliaHandler) handleRegistry(w http.ResponseWriter, r *http.Request) { + uuid := r.PathValue("uuid") + hash := r.PathValue("hash") + if !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid registry reference", http.StatusBadRequest) + return + } + + h.proxy.Logger.Info("julia registry request", "uuid", uuid, "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaRegistryName, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get registry", "error", err) + http.Error(w, "failed to fetch registry", http.StatusBadGateway) + return + } + + go h.refreshNamesFromRegistry(uuid, hash) + + ServeArtifact(w, result) +} + +// handlePackage serves an immutable package source tarball. +func (h *JuliaHandler) handlePackage(w http.ResponseWriter, r *http.Request) { + uuid := r.PathValue("uuid") + hash := r.PathValue("hash") + if !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid package reference", http.StatusBadRequest) + return + } + + if err := h.ensureNames(r.Context()); err != nil { + h.proxy.Logger.Warn("julia name map unavailable, using uuid", "error", err) + } + name := h.resolveName(uuid) + + h.proxy.Logger.Info("julia package request", "name", name, "uuid", uuid, "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", name, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get package", "error", err) + http.Error(w, "failed to fetch package", http.StatusBadGateway) + return + } + + ServeArtifact(w, result) +} + +// handleArtifact serves an immutable binary artifact tarball. Artifacts are +// anonymous content-addressed blobs with no associated package name. +func (h *JuliaHandler) handleArtifact(w http.ResponseWriter, r *http.Request) { + hash := r.PathValue("hash") + if !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid artifact hash", http.StatusBadRequest) + return + } + + h.proxy.Logger.Info("julia artifact request", "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaArtifactName, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get artifact", "error", err) + http.Error(w, "failed to fetch artifact", http.StatusBadGateway) + return + } + + ServeArtifact(w, result) +} + +// proxyUpstream forwards a request to the upstream Pkg server without caching. +func (h *JuliaHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { + h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, nil) +} + +// resolveName returns the human-readable package name for a UUID, falling +// back to the UUID itself if it is not present in the loaded registry. +func (h *JuliaHandler) resolveName(uuid string) string { + h.mu.RLock() + defer h.mu.RUnlock() + if name, ok := h.names[uuid]; ok { + return name + } + return uuid +} + +// ensureNames lazily populates the UUID→name map from the General registry. +// Returns immediately if the map is already populated; otherwise blocks until +// a single in-flight load completes. Failed loads are retried on the next call. +func (h *JuliaHandler) ensureNames(ctx context.Context) error { + if h.namesLoaded() { + return nil + } + + h.loadMu.Lock() + defer h.loadMu.Unlock() + + if h.namesLoaded() { + return nil + } + return h.loadNamesFromUpstream(ctx) +} + +func (h *JuliaHandler) namesLoaded() bool { + h.mu.RLock() + defer h.mu.RUnlock() + return len(h.names) > 0 +} + +// loadNamesFromUpstream fetches the current /registries listing, downloads the +// General registry tarball at its current hash, and parses Registry.toml. +func (h *JuliaHandler) loadNamesFromUpstream(ctx context.Context) error { + hash, err := h.fetchGeneralRegistryHash(ctx) + if err != nil { + return err + } + return h.loadRegistryTarball(ctx, juliaGeneralRegistryUUID, hash) +} + +// fetchGeneralRegistryHash reads /registries and returns the current tree hash +// for the General registry. +func (h *JuliaHandler) fetchGeneralRegistryHash(ctx context.Context) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, h.upstreamURL+"/registries", nil) + if err != nil { + return "", err + } + resp, err := h.proxy.HTTPClient.Do(req) + if err != nil { + return "", err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("upstream /registries returned %d", resp.StatusCode) + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + uuid, hash, ok := parseRegistryLine(scanner.Text()) + if ok && uuid == juliaGeneralRegistryUUID { + return hash, nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + return "", fmt.Errorf("general registry not listed in /registries") +} + +// refreshNamesFromRegistry reloads the UUID→name map from a registry tarball +// that has just been cached. Errors are logged but do not affect the response. +func (h *JuliaHandler) refreshNamesFromRegistry(uuid, hash string) { + if uuid != juliaGeneralRegistryUUID { + return + } + h.mu.RLock() + current := h.namesHash + h.mu.RUnlock() + if current == hash { + return + } + if err := h.loadRegistryTarball(context.Background(), uuid, hash); err != nil { + h.proxy.Logger.Warn("failed to refresh julia name map", "error", err) + } +} + +// loadRegistryTarball downloads a registry tarball and replaces the name map +// with the contents of its Registry.toml. +func (h *JuliaHandler) loadRegistryTarball(ctx context.Context, uuid, hash string) error { + url := fmt.Sprintf("%s/registry/%s/%s", h.upstreamURL, uuid, hash) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + resp, err := h.proxy.HTTPClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("upstream registry returned %d", resp.StatusCode) + } + + names, err := extractRegistryNames(resp.Body) + if err != nil { + return err + } + + h.mu.Lock() + h.names = names + h.namesHash = hash + h.mu.Unlock() + + h.proxy.Logger.Info("loaded julia registry name map", "packages", len(names), "hash", hash) + return nil +} + +// extractRegistryNames reads a gzipped registry tarball, finds Registry.toml +// at the root, and returns its [packages] table as a UUID→name map. +func extractRegistryNames(r io.Reader) (map[string]string, error) { + gz, err := gzip.NewReader(r) + if err != nil { + return nil, fmt.Errorf("opening gzip stream: %w", err) + } + defer func() { _ = gz.Close() }() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err == io.EOF { + return nil, fmt.Errorf("no Registry.toml in tarball") + } + if err != nil { + return nil, err + } + if strings.TrimPrefix(hdr.Name, "./") != "Registry.toml" { + continue + } + + data, err := io.ReadAll(tr) + if err != nil { + return nil, err + } + return parseRegistryToml(data) + } +} + +type juliaRegistryFile struct { + Packages map[string]struct { + Name string `toml:"name"` + } `toml:"packages"` +} + +// parseRegistryToml decodes the [packages] table of a Registry.toml file. +func parseRegistryToml(data []byte) (map[string]string, error) { + var reg juliaRegistryFile + if _, err := toml.NewDecoder(bytes.NewReader(data)).Decode(®); err != nil { + return nil, fmt.Errorf("parsing Registry.toml: %w", err) + } + + names := make(map[string]string, len(reg.Packages)) + for uuid, pkg := range reg.Packages { + if pkg.Name != "" { + names[uuid] = pkg.Name + } + } + return names, nil +} + +// parseRegistryLine parses a single line from /registries of the form +// "/registry/{uuid}/{hash}" and returns the uuid and hash. +func parseRegistryLine(line string) (uuid, hash string, ok bool) { + line = strings.TrimSpace(line) + line = strings.TrimPrefix(line, "/registry/") + uuid, hash, found := strings.Cut(line, "/") + if !found || !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + return "", "", false + } + return uuid, hash, true +} + +// validJuliaUUID reports whether s looks like a lowercase RFC 4122 UUID. +func validJuliaUUID(s string) bool { + return juliaUUIDPattern.MatchString(s) +} diff --git a/internal/handler/julia_test.go b/internal/handler/julia_test.go new file mode 100644 index 0000000..68fb975 --- /dev/null +++ b/internal/handler/julia_test.go @@ -0,0 +1,167 @@ +package handler + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "log/slog" + "net/http" + "net/http/httptest" + "testing" +) + +func TestJuliaParseRegistryLine(t *testing.T) { + tests := []struct { + line string + wantUUID string + wantHash string + wantOK bool + }{ + { + "/registry/23338594-aafe-5451-b93e-139f81909106/342327538ed6c1ec54c69fa145e7b6bf5934201e", + "23338594-aafe-5451-b93e-139f81909106", + "342327538ed6c1ec54c69fa145e7b6bf5934201e", + true, + }, + { + " /registry/23338594-aafe-5451-b93e-139f81909106/342327538ed6c1ec54c69fa145e7b6bf5934201e\n", + "23338594-aafe-5451-b93e-139f81909106", + "342327538ed6c1ec54c69fa145e7b6bf5934201e", + true, + }, + {"/registry/not-a-uuid/0000", "", "", false}, + {"junk", "", "", false}, + {"", "", "", false}, + } + + for _, tt := range tests { + uuid, hash, ok := parseRegistryLine(tt.line) + if uuid != tt.wantUUID || hash != tt.wantHash || ok != tt.wantOK { + t.Errorf("parseRegistryLine(%q) = (%q, %q, %v), want (%q, %q, %v)", + tt.line, uuid, hash, ok, tt.wantUUID, tt.wantHash, tt.wantOK) + } + } +} + +func TestJuliaValidUUID(t *testing.T) { + tests := []struct { + s string + want bool + }{ + {"23338594-aafe-5451-b93e-139f81909106", true}, + {"295af30f-e4ad-537b-8983-00126c2a3abe", true}, + {"23338594-AAFE-5451-b93e-139f81909106", false}, + {"23338594aafe5451b93e139f81909106", false}, + {"23338594-aafe-5451-b93e-139f8190910", false}, + {"23338594-aafe-5451-b93e-139f81909106-", false}, + {"23338594-gafe-5451-b93e-139f81909106", false}, + {"", false}, + } + + for _, tt := range tests { + if got := validJuliaUUID(tt.s); got != tt.want { + t.Errorf("validJuliaUUID(%q) = %v, want %v", tt.s, got, tt.want) + } + } +} + +func TestJuliaParseRegistryToml(t *testing.T) { + data := []byte(`name = "General" +uuid = "23338594-aafe-5451-b93e-139f81909106" + +[packages] +295af30f-e4ad-537b-8983-00126c2a3abe = { name = "Revise", path = "R/Revise" } +91a5bcdd-55d7-5caf-9e0b-520d859cae80 = { name = "Plots", path = "P/Plots" } +`) + + names, err := parseRegistryToml(data) + if err != nil { + t.Fatalf("parseRegistryToml: %v", err) + } + if got := names["295af30f-e4ad-537b-8983-00126c2a3abe"]; got != "Revise" { + t.Errorf("names[Revise uuid] = %q, want Revise", got) + } + if got := names["91a5bcdd-55d7-5caf-9e0b-520d859cae80"]; got != "Plots" { + t.Errorf("names[Plots uuid] = %q, want Plots", got) + } + if len(names) != 2 { + t.Errorf("len(names) = %d, want 2", len(names)) + } +} + +func TestJuliaExtractRegistryNames(t *testing.T) { + registryToml := `name = "General" +[packages] +295af30f-e4ad-537b-8983-00126c2a3abe = { name = "Revise", path = "R/Revise" } +` + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + tw := tar.NewWriter(gw) + + for _, f := range []struct{ name, body string }{ + {"R/Revise/Package.toml", "name = \"Revise\"\n"}, + {"Registry.toml", registryToml}, + } { + if err := tw.WriteHeader(&tar.Header{Name: f.name, Mode: 0o644, Size: int64(len(f.body))}); err != nil { + t.Fatalf("WriteHeader: %v", err) + } + if _, err := tw.Write([]byte(f.body)); err != nil { + t.Fatalf("Write: %v", err) + } + } + if err := tw.Close(); err != nil { + t.Fatalf("tar Close: %v", err) + } + if err := gw.Close(); err != nil { + t.Fatalf("gzip Close: %v", err) + } + + names, err := extractRegistryNames(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatalf("extractRegistryNames: %v", err) + } + if got := names["295af30f-e4ad-537b-8983-00126c2a3abe"]; got != "Revise" { + t.Errorf("names[Revise uuid] = %q, want Revise", got) + } +} + +func TestJuliaResolveName(t *testing.T) { + h := &JuliaHandler{ + proxy: &Proxy{Logger: slog.Default()}, + names: map[string]string{ + "295af30f-e4ad-537b-8983-00126c2a3abe": "Revise", + }, + } + + if got := h.resolveName("295af30f-e4ad-537b-8983-00126c2a3abe"); got != "Revise" { + t.Errorf("resolveName(known) = %q, want Revise", got) + } + if got := h.resolveName("00000000-0000-0000-0000-000000000000"); got != "00000000-0000-0000-0000-000000000000" { + t.Errorf("resolveName(unknown) = %q, want uuid fallback", got) + } +} + +func TestJuliaRoutesValidation(t *testing.T) { + h := NewJuliaHandler(&Proxy{Logger: slog.Default()}, "") + routes := h.Routes() + + tests := []struct { + path string + want int + }{ + {"/package/not-a-uuid/342327538ed6c1ec54c69fa145e7b6bf5934201e", http.StatusBadRequest}, + {"/package/295af30f-e4ad-537b-8983-00126c2a3abe/short", http.StatusBadRequest}, + {"/registry/295af30f-e4ad-537b-8983-00126c2a3abe/zzzz", http.StatusBadRequest}, + {"/artifact/nothex", http.StatusBadRequest}, + {"/nope", http.StatusNotFound}, + } + + for _, tt := range tests { + req := httptest.NewRequest(http.MethodGet, tt.path, nil) + rr := httptest.NewRecorder() + routes.ServeHTTP(rr, req) + if rr.Code != tt.want { + t.Errorf("GET %s = %d, want %d", tt.path, rr.Code, tt.want) + } + } +} diff --git a/internal/handler/maven.go b/internal/handler/maven.go index 86664a2..c423645 100644 --- a/internal/handler/maven.go +++ b/internal/handler/maven.go @@ -1,6 +1,7 @@ package handler import ( + "errors" "fmt" "net/http" "path" @@ -8,23 +9,33 @@ import ( ) const ( - mavenUpstream = "https://repo1.maven.org/maven2" - minMavenParts = 4 // group path segments + artifact + version + filename + mavenCentralUpstream = "https://repo1.maven.org/maven2" + gradlePluginPortalUpstream = "https://plugins.gradle.org/m2" + minMavenParts = 4 // group path segments + artifact + version + filename ) // MavenHandler handles Maven repository protocol requests. type MavenHandler struct { - proxy *Proxy - upstreamURL string - proxyURL string + proxy *Proxy + upstreamURL string + pluginPortalUpstreamURL string + proxyURL string } // NewMavenHandler creates a new Maven repository handler. -func NewMavenHandler(proxy *Proxy, proxyURL string) *MavenHandler { +func NewMavenHandler(proxy *Proxy, proxyURL, upstreamURL, pluginPortalUpstreamURL string) *MavenHandler { + if strings.TrimSpace(upstreamURL) == "" { + upstreamURL = mavenCentralUpstream + } + if strings.TrimSpace(pluginPortalUpstreamURL) == "" { + pluginPortalUpstreamURL = gradlePluginPortalUpstream + } + return &MavenHandler{ - proxy: proxy, - upstreamURL: mavenUpstream, - proxyURL: strings.TrimSuffix(proxyURL, "/"), + proxy: proxy, + upstreamURL: strings.TrimSuffix(upstreamURL, "/"), + pluginPortalUpstreamURL: strings.TrimSuffix(pluginPortalUpstreamURL, "/"), + proxyURL: strings.TrimSuffix(proxyURL, "/"), } } @@ -51,8 +62,7 @@ func (h *MavenHandler) handleRequest(w http.ResponseWriter, r *http.Request) { filename := path.Base(urlPath) if h.isMetadataFile(filename) { - cacheKey := strings.ReplaceAll(urlPath, "/", "_") - h.proxy.ProxyCached(w, r, h.upstreamURL+r.URL.Path, "maven", cacheKey, "*/*") + h.handleMetadata(w, r, urlPath) return } @@ -66,6 +76,32 @@ func (h *MavenHandler) handleRequest(w http.ResponseWriter, r *http.Request) { h.proxyUpstream(w, r) } +func (h *MavenHandler) handleMetadata(w http.ResponseWriter, r *http.Request, urlPath string) { + cacheKey := strings.ReplaceAll(urlPath, "/", "_") + upstreamURL := fmt.Sprintf("%s/%s", h.upstreamURL, urlPath) + + body, contentType, err := h.proxy.FetchOrCacheMetadata(r.Context(), "maven", cacheKey, upstreamURL, "*/*") + if err != nil { + if errors.Is(err, ErrUpstreamNotFound) { + pluginPortalURL := fmt.Sprintf("%s/%s", h.pluginPortalUpstreamURL, urlPath) + h.proxy.Logger.Info("maven metadata unavailable in primary upstream, trying Gradle Plugin Portal", + "path", urlPath) + body, contentType, err = h.proxy.FetchOrCacheMetadata(r.Context(), "maven", cacheKey, pluginPortalURL, "*/*") + } + } + if err != nil { + if errors.Is(err, ErrUpstreamNotFound) { + http.Error(w, "not found", http.StatusNotFound) + return + } + h.proxy.Logger.Error("metadata fetch failed", "error", err) + http.Error(w, "failed to fetch from upstream", http.StatusBadGateway) + return + } + + h.proxy.writeMetadataCachedResponse(w, r, "maven", cacheKey, body, contentType) +} + // handleDownload serves an artifact file, fetching and caching from upstream if needed. func (h *MavenHandler) handleDownload(w http.ResponseWriter, r *http.Request, urlPath string) { // Parse Maven path: group/artifact/version/filename @@ -86,6 +122,18 @@ func (h *MavenHandler) handleDownload(w http.ResponseWriter, r *http.Request, ur result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "maven", name, version, filename, upstreamURL) if err != nil { + if errors.Is(err, ErrUpstreamNotFound) { + pluginPortalURL := fmt.Sprintf("%s/%s", h.pluginPortalUpstreamURL, urlPath) + h.proxy.Logger.Info("maven artifact not found in primary upstream, trying Gradle Plugin Portal", + "group", group, "artifact", artifact, "version", version, "filename", filename) + result, err = h.proxy.GetOrFetchArtifactFromURL(r.Context(), "maven", name, version, filename, pluginPortalURL) + } + } + if err != nil { + if errors.Is(err, ErrUpstreamNotFound) { + http.Error(w, "not found", http.StatusNotFound) + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch artifact", http.StatusBadGateway) return @@ -115,7 +163,7 @@ func (h *MavenHandler) parsePath(urlPath string) (group, artifact, version, file // isArtifactFile returns true if the filename looks like a Maven artifact. func (h *MavenHandler) isArtifactFile(filename string) bool { // Common artifact extensions - extensions := []string{".jar", ".war", ".ear", ".pom", ".aar", ".klib"} + extensions := []string{".jar", ".war", ".ear", ".pom", ".aar", ".klib", ".module"} for _, ext := range extensions { if strings.HasSuffix(filename, ext) { return true diff --git a/internal/handler/maven_test.go b/internal/handler/maven_test.go index df6917c..9ca5eb6 100644 --- a/internal/handler/maven_test.go +++ b/internal/handler/maven_test.go @@ -52,6 +52,7 @@ func TestMavenIsArtifactFile(t *testing.T) { }{ {"guava-32.1.3-jre.jar", true}, {"guava-32.1.3-jre.pom", true}, + {"guava-32.1.3-jre.module", true}, {"app-1.0.war", true}, {"lib-1.0.aar", true}, {"maven-metadata.xml", false}, @@ -65,3 +66,63 @@ func TestMavenIsArtifactFile(t *testing.T) { } } } + +func TestMavenIsMetadataFile(t *testing.T) { + h := &MavenHandler{} + + tests := []struct { + name string + filename string + want bool + }{ + { + name: "pom is artifact, not metadata", + filename: "com.diffplug.spotless.gradle.plugin-8.4.0.pom", + want: false, + }, + { + name: "pom checksum is metadata", + filename: "com.diffplug.spotless.gradle.plugin-8.4.0.pom.sha1", + want: true, + }, + { + name: "metadata file", + filename: "maven-metadata.xml", + want: true, + }, + { + name: "metadata checksum", + filename: "maven-metadata.xml.sha256", + want: true, + }, + { + name: "jar checksum is metadata", + filename: "guava-32.1.3-jre.jar.sha1", + want: true, + }, + { + name: "asc signature is metadata", + filename: "guava-32.1.3-jre.jar.asc", + want: true, + }, + { + name: "regular jar is not metadata", + filename: "guava-32.1.3-jre.jar", + want: false, + }, + { + name: "pom checksum is metadata", + filename: "guava-32.1.3-jre.pom.sha1", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := h.isMetadataFile(tt.filename) + if got != tt.want { + t.Errorf("isMetadataFile(%q) = %v, want %v", tt.filename, got, tt.want) + } + }) + } +} diff --git a/internal/handler/npm_test.go b/internal/handler/npm_test.go index 7db3539..bc1edde 100644 --- a/internal/handler/npm_test.go +++ b/internal/handler/npm_test.go @@ -8,7 +8,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) const testVersion100 = "1.0.0" diff --git a/internal/handler/nuget.go b/internal/handler/nuget.go index 1c022fb..40b8b5f 100644 --- a/internal/handler/nuget.go +++ b/internal/handler/nuget.go @@ -172,7 +172,7 @@ func (h *NuGetHandler) handleRegistration(w http.ResponseWriter, r *http.Request http.Error(w, "failed to create request", http.StatusInternalServerError) return } - req.Header.Set("Accept-Encoding", "gzip") + req.Header.Set(headerAcceptEncoding, "gzip") resp, err := h.proxy.HTTPClient.Do(req) if err != nil { @@ -193,7 +193,7 @@ func (h *NuGetHandler) handleRegistration(w http.ResponseWriter, r *http.Request return } - body, err := ReadMetadata(resp.Body) + body, err := h.proxy.ReadMetadata(resp.Body) if err != nil { http.Error(w, "failed to read response", http.StatusInternalServerError) return @@ -338,8 +338,8 @@ func (h *NuGetHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { } // Copy accept-encoding for compression - if ae := r.Header.Get("Accept-Encoding"); ae != "" { - req.Header.Set("Accept-Encoding", ae) + if ae := r.Header.Get(headerAcceptEncoding); ae != "" { + req.Header.Set(headerAcceptEncoding, ae) } resp, err := h.proxy.HTTPClient.Do(req) diff --git a/internal/handler/nuget_test.go b/internal/handler/nuget_test.go index 68c9d22..b2164e5 100644 --- a/internal/handler/nuget_test.go +++ b/internal/handler/nuget_test.go @@ -10,7 +10,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func nugetTestProxy() *Proxy { diff --git a/internal/handler/pub_test.go b/internal/handler/pub_test.go index 2788714..8a4c098 100644 --- a/internal/handler/pub_test.go +++ b/internal/handler/pub_test.go @@ -6,7 +6,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" ) func TestPubRewriteMetadata(t *testing.T) { diff --git a/internal/handler/pypi_test.go b/internal/handler/pypi_test.go index 9e2ade0..2b58960 100644 --- a/internal/handler/pypi_test.go +++ b/internal/handler/pypi_test.go @@ -10,7 +10,7 @@ import ( "testing" "time" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" "github.com/git-pkgs/registries/fetch" ) diff --git a/internal/handler/read_metadata_test.go b/internal/handler/read_metadata_test.go index 60c1cf2..b13bddb 100644 --- a/internal/handler/read_metadata_test.go +++ b/internal/handler/read_metadata_test.go @@ -7,9 +7,12 @@ import ( ) func TestReadMetadata(t *testing.T) { + const limit = 1024 + p := &Proxy{MetadataMaxSize: limit} + t.Run("small body", func(t *testing.T) { data := []byte("hello world") - got, err := ReadMetadata(bytes.NewReader(data)) + got, err := p.ReadMetadata(bytes.NewReader(data)) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -19,27 +22,39 @@ func TestReadMetadata(t *testing.T) { }) t.Run("exactly at limit", func(t *testing.T) { - data := make([]byte, maxMetadataSize) + data := make([]byte, limit) for i := range data { data[i] = 'x' } - got, err := ReadMetadata(bytes.NewReader(data)) + got, err := p.ReadMetadata(bytes.NewReader(data)) if err != nil { t.Fatalf("unexpected error: %v", err) } - if len(got) != int(maxMetadataSize) { - t.Errorf("got length %d, want %d", len(got), maxMetadataSize) + if len(got) != limit { + t.Errorf("got length %d, want %d", len(got), limit) } }) t.Run("over limit returns error", func(t *testing.T) { - data := make([]byte, maxMetadataSize+100) + data := make([]byte, limit+100) for i := range data { data[i] = 'x' } - _, err := ReadMetadata(bytes.NewReader(data)) + _, err := p.ReadMetadata(bytes.NewReader(data)) if !errors.Is(err, ErrMetadataTooLarge) { t.Errorf("got error %v, want ErrMetadataTooLarge", err) } }) + + t.Run("zero limit uses default", func(t *testing.T) { + p := &Proxy{} + data := make([]byte, 1<<20) + got, err := p.ReadMetadata(bytes.NewReader(data)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != len(data) { + t.Errorf("got length %d, want %d", len(got), len(data)) + } + }) } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 18ebe88..f23a5a9 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -128,6 +128,14 @@ var ( }, []string{"ecosystem"}, ) + + HealthProbeFailures = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "proxy_health_probe_failures_total", + Help: "Total number of storage health probe failures, by step (write|size|read|verify|delete).", + }, + []string{"step"}, + ) ) func init() { @@ -147,6 +155,7 @@ func init() { StorageErrors, ActiveRequests, IntegrityFailures, + HealthProbeFailures, ) } @@ -192,6 +201,12 @@ func RecordIntegrityFailure(ecosystem string) { IntegrityFailures.WithLabelValues(ecosystem).Inc() } +// RecordHealthProbeFailure increments the health probe failure counter. +// step is one of: "write", "size", "read", "verify", "delete". +func RecordHealthProbeFailure(step string) { + HealthProbeFailures.WithLabelValues(step).Inc() +} + // RecordStorageError increments storage error counter. func RecordStorageError(operation string) { StorageErrors.WithLabelValues(operation).Inc() diff --git a/internal/server/dashboard.go b/internal/server/dashboard.go index 1fe5388..1de294c 100644 --- a/internal/server/dashboard.go +++ b/internal/server/dashboard.go @@ -127,6 +127,7 @@ func supportedEcosystems() []string { "gem", "golang", "hex", + "julia", "maven", "npm", "nuget", @@ -176,6 +177,8 @@ func ecosystemBadgeClasses(ecosystem string) string { return base + " bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300" case "cran": return base + " bg-slate-100 text-slate-700 dark:bg-slate-800 dark:text-slate-300" + case "julia": + return base + " bg-emerald-100 text-emerald-700 dark:bg-emerald-900/50 dark:text-emerald-300" case "oci": return base + " bg-sky-100 text-sky-700 dark:bg-sky-900/50 dark:text-sky-300" case "deb": @@ -377,6 +380,17 @@ local({ r["CRAN"] <- "` + baseURL + `/cran" options(repos = r) })`), + }, + { + ID: "julia", + Name: "Julia", + Language: "Julia", + Endpoint: "/julia/", + Instructions: template.HTML(`

Set the Pkg server before starting Julia:

+
export JULIA_PKG_SERVER=` + baseURL + `/julia
+

Or inside a running session:

+
ENV["JULIA_PKG_SERVER"] = "` + baseURL + `/julia"
+using Pkg; Pkg.update()
`), }, { ID: "oci", diff --git a/internal/server/gradle_cache_eviction.go b/internal/server/gradle_cache_eviction.go index 1f5e95d..7f546d1 100644 --- a/internal/server/gradle_cache_eviction.go +++ b/internal/server/gradle_cache_eviction.go @@ -81,23 +81,7 @@ func sweepGradleBuildCache( return 0, 0, nil } - sort.Slice(entries, func(i, j int) bool { - iTime := entries[i].ModTime - jTime := entries[j].ModTime - - switch { - case iTime.IsZero() && jTime.IsZero(): - return entries[i].Path < entries[j].Path - case iTime.IsZero(): - return true - case jTime.IsZero(): - return false - case iTime.Equal(jTime): - return entries[i].Path < entries[j].Path - default: - return iTime.Before(jTime) - } - }) + sortOldestFirst(entries) deletedCount := 0 freedBytes := int64(0) @@ -154,3 +138,12 @@ func sweepGradleBuildCache( return deletedCount, freedBytes, nil } + +func sortOldestFirst(entries []storage.ObjectInfo) { + sort.Slice(entries, func(i, j int) bool { + if entries[i].ModTime.Equal(entries[j].ModTime) { + return entries[i].Path < entries[j].Path + } + return entries[i].ModTime.Before(entries[j].ModTime) + }) +} diff --git a/internal/server/health.go b/internal/server/health.go new file mode 100644 index 0000000..f4e4847 --- /dev/null +++ b/internal/server/health.go @@ -0,0 +1,182 @@ +// Package server implements the proxy HTTP server. +package server + +import ( + "bytes" + "context" + "crypto/rand" + "encoding/hex" + "errors" + "fmt" + "io" + "log/slog" + "strconv" + "sync" + "time" + + "github.com/git-pkgs/proxy/internal/metrics" + "github.com/git-pkgs/proxy/internal/storage" +) + +const ( + probePathPrefix = ".healthcheck/" + probeMarker = "proxy-healthcheck:" + probeSuffixBytes = 8 + defaultProbeTTL = 30 * time.Second + defaultProbeTimeout = 10 * time.Second +) + +// HealthResponse is the JSON payload returned by /health. +type HealthResponse struct { + Status string `json:"status"` + Checks map[string]HealthCheck `json:"checks"` +} + +// HealthCheck reports the status of a single subsystem check. +type HealthCheck struct { + Status string `json:"status"` + Error string `json:"error,omitempty"` + Step string `json:"step,omitempty"` +} + +// probeError tags a storage probe failure with the step that failed. +type probeError struct { + step string + err error +} + +func (e *probeError) Error() string { return e.step + ": " + e.err.Error() } +func (e *probeError) Unwrap() error { return e.err } + +// storageProbe runs a write → size-check → read → verify → delete round-trip +// against the storage backend. Returns nil on success or a *probeError on failure. +func storageProbe(ctx context.Context, s storage.Storage) (err error) { + suffix, suffixErr := randomSuffix() + if suffixErr != nil { + return &probeError{step: "write", err: fmt.Errorf("generating random suffix: %w", suffixErr)} + } + path := probePathPrefix + strconv.FormatInt(time.Now().UnixNano(), 10) + "-" + suffix + payload := []byte(probeMarker + suffix) + + // 1. Store + size, _, storeErr := s.Store(ctx, path, bytes.NewReader(payload)) + if storeErr != nil { + return &probeError{step: "write", err: storeErr} + } + // After Store succeeds, always attempt to delete on the way out so probe + // objects don't accumulate when a later step (size/open/read/verify) fails. + // Delete is reported as the primary error only if no earlier failure + // already set one. + defer func() { + if delErr := s.Delete(ctx, path); delErr != nil && err == nil { + err = &probeError{step: "delete", err: delErr} + } + }() + // 2. Size check + if size != int64(len(payload)) { + return &probeError{step: "size", err: fmt.Errorf("wrote %d bytes, expected %d", size, len(payload))} + } + // 3. Open + rc, openErr := s.Open(ctx, path) + if openErr != nil { + return &probeError{step: "read", err: openErr} + } + // 4. Read all (classify mid-stream errors as read, not verify). + // Close explicitly (not deferred) so the file handle is released before + // Delete — on Windows, an open handle prevents deletion. + data, readErr := io.ReadAll(rc) + _ = rc.Close() + if readErr != nil { + return &probeError{step: "read", err: readErr} + } + // 5. Verify + if !bytes.Equal(data, payload) { + return &probeError{step: "verify", err: fmt.Errorf("content mismatch")} + } + // 6. Delete is handled via the deferred cleanup above. + return nil +} + +// randomSuffix returns 8 cryptographically random bytes hex-encoded. +func randomSuffix() (string, error) { + b := make([]byte, probeSuffixBytes) + if _, err := rand.Read(b); err != nil { + return "", err + } + return hex.EncodeToString(b), nil +} + +// healthCache memoizes the result of storageProbe for a configurable TTL. +// It is safe for concurrent use. +type healthCache struct { + storage storage.Storage + interval time.Duration + probeTimeout time.Duration + logger *slog.Logger + + mu sync.Mutex + lastAt time.Time + lastErr error +} + +// newHealthCache builds a cache, parsing the interval from a duration string. +// Empty interval string defaults to 30s. "0" or "0s" disables caching. +func newHealthCache(s storage.Storage, intervalStr string, logger *slog.Logger) (*healthCache, error) { + interval := defaultProbeTTL + if intervalStr != "" { + d, err := time.ParseDuration(intervalStr) + if err != nil { + return nil, fmt.Errorf("parsing storage_probe_interval %q: %w", intervalStr, err) + } + interval = d + } + return &healthCache{ + storage: s, + interval: interval, + probeTimeout: defaultProbeTimeout, + logger: logger, + }, nil +} + +// Check returns the cached probe result if still fresh, otherwise runs a fresh probe. +// The probe runs under a context derived from context.Background() with a fixed +// timeout so that caller cancellation (e.g. client disconnect) cannot poison the +// cache with context.Canceled. +func (c *healthCache) Check() error { + c.mu.Lock() + defer c.mu.Unlock() + + // Cache hit + if c.interval > 0 && !c.lastAt.IsZero() && time.Since(c.lastAt) < c.interval { + return c.lastErr + } + + // Fresh probe under a detached context + probeCtx, cancel := context.WithTimeout(context.Background(), c.probeTimeout) + defer cancel() + err := storageProbe(probeCtx, c.storage) + + // Transition logging and metric increment happen only on the fresh-probe path. + c.logTransition(c.lastErr, err) + if err != nil { + var pe *probeError + if errors.As(err, &pe) { + metrics.RecordHealthProbeFailure(pe.step) + } else { + metrics.RecordHealthProbeFailure("unknown") + } + } + + c.lastErr = err + c.lastAt = time.Now() + return err +} + +func (c *healthCache) logTransition(prev, curr error) { + switch { + case prev != nil && curr == nil: + c.logger.Info("storage probe recovered") + case prev == nil && curr != nil: + c.logger.Error("storage probe failed", "error", curr.Error()) + } +} diff --git a/internal/server/health_test.go b/internal/server/health_test.go new file mode 100644 index 0000000..c0f70c9 --- /dev/null +++ b/internal/server/health_test.go @@ -0,0 +1,448 @@ +package server + +import ( + "bytes" + "context" + "errors" + "io" + "log/slog" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/git-pkgs/proxy/internal/metrics" + "github.com/git-pkgs/proxy/internal/storage" + "github.com/prometheus/client_golang/prometheus/testutil" +) + +// fakeStorage is a minimal storage.Storage for probe tests with per-step failure injection. +type fakeStorage struct { + mu sync.Mutex + + storeCalls atomic.Int64 + openCalls atomic.Int64 + closeCalls atomic.Int64 + deleteCalls atomic.Int64 + + paths []string + payloads [][]byte + + // Failure injection. + storeErr error + openErr error + readErr error // returned by the io.ReadCloser.Read after partial bytes + deleteErr error + + // Misbehavior knobs. + sizeDelta int64 // added to the reported size from Store + readOverride []byte // if non-nil, Open returns a reader yielding these bytes instead of stored content + + // storeBlock, if non-nil, causes Store to block until the channel is closed or ctx is done. + storeBlock chan struct{} + + stored map[string][]byte +} + +func newFakeStorage() *fakeStorage { return &fakeStorage{stored: map[string][]byte{}} } + +func (f *fakeStorage) Store(ctx context.Context, path string, r io.Reader) (int64, string, error) { + f.storeCalls.Add(1) + if f.storeErr != nil { + return 0, "", f.storeErr + } + if f.storeBlock != nil { + select { + case <-f.storeBlock: + case <-ctx.Done(): + return 0, "", ctx.Err() + } + } + data, err := io.ReadAll(r) + if err != nil { + return 0, "", err + } + f.mu.Lock() + f.stored[path] = data + f.paths = append(f.paths, path) + f.payloads = append(f.payloads, data) + f.mu.Unlock() + return int64(len(data)) + f.sizeDelta, "fakehash", nil +} + +type fakeReadCloser struct { + data []byte + pos int + readErr error + closed *atomic.Int64 +} + +func (rc *fakeReadCloser) Read(p []byte) (int, error) { + if rc.pos >= len(rc.data) { + if rc.readErr != nil { + return 0, rc.readErr + } + return 0, io.EOF + } + n := copy(p, rc.data[rc.pos:]) + rc.pos += n + if rc.pos >= len(rc.data) && rc.readErr != nil { + return n, rc.readErr + } + return n, nil +} + +func (rc *fakeReadCloser) Close() error { rc.closed.Add(1); return nil } + +func (f *fakeStorage) Open(ctx context.Context, path string) (io.ReadCloser, error) { + f.openCalls.Add(1) + if f.openErr != nil { + return nil, f.openErr + } + f.mu.Lock() + data := f.stored[path] + f.mu.Unlock() + if f.readOverride != nil { + data = f.readOverride + } + return &fakeReadCloser{data: data, readErr: f.readErr, closed: &f.closeCalls}, nil +} + +func (f *fakeStorage) Exists(ctx context.Context, path string) (bool, error) { + f.mu.Lock() + defer f.mu.Unlock() + _, ok := f.stored[path] + return ok, nil +} + +func (f *fakeStorage) Delete(ctx context.Context, path string) error { + f.deleteCalls.Add(1) + if f.deleteErr != nil { + return f.deleteErr + } + f.mu.Lock() + delete(f.stored, path) + f.mu.Unlock() + return nil +} + +func (f *fakeStorage) Size(ctx context.Context, path string) (int64, error) { return 0, nil } +func (f *fakeStorage) SignedURL(ctx context.Context, path string, expiry time.Duration) (string, error) { + return "", storage.ErrSignedURLUnsupported +} +func (f *fakeStorage) UsedSpace(ctx context.Context) (int64, error) { return 0, nil } +func (f *fakeStorage) URL() string { return "fake://" } +func (f *fakeStorage) Close() error { return nil } + +// --- Tests follow. First test: happy path --- + +func TestStorageProbe_HappyPath(t *testing.T) { + fs := newFakeStorage() + if err := storageProbe(context.Background(), fs); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got := fs.storeCalls.Load(); got != 1 { + t.Errorf("Store calls = %d, want 1", got) + } + if got := fs.openCalls.Load(); got != 1 { + t.Errorf("Open calls = %d, want 1", got) + } + if got := fs.closeCalls.Load(); got != 1 { + t.Errorf("Close calls = %d, want 1", got) + } + if got := fs.deleteCalls.Load(); got != 1 { + t.Errorf("Delete calls = %d, want 1", got) + } + if len(fs.paths) != 1 || !strings.HasPrefix(fs.paths[0], ".healthcheck/") { + t.Errorf("unexpected probe path: %v", fs.paths) + } +} + +func TestStorageProbe_WriteFails(t *testing.T) { + fs := newFakeStorage() + fs.storeErr = errors.New("disk full") + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) { + t.Fatalf("expected *probeError, got %T: %v", err, err) + } + if pe.step != "write" { + t.Errorf("step = %q, want write", pe.step) + } + if fs.openCalls.Load() != 0 { + t.Errorf("Open should not be called after write failure") + } +} + +func TestStorageProbe_SizeMismatch(t *testing.T) { + fs := newFakeStorage() + fs.sizeDelta = -1 // Report 1 byte fewer than actually written + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != "size" { + t.Fatalf("step = %v, want size; err = %v", pe, err) + } + if fs.openCalls.Load() != 0 { + t.Errorf("Open should not be called after size mismatch") + } +} + +func TestStorageProbe_OpenFails(t *testing.T) { + fs := newFakeStorage() + fs.openErr = errors.New("access denied") + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != "read" { + t.Fatalf("step = %v, want read; err = %v", pe, err) + } +} + +func TestStorageProbe_ReadMidStreamFails(t *testing.T) { + fs := newFakeStorage() + fs.readErr = errors.New("connection reset") + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != "read" { + t.Fatalf("step = %v, want read (NOT verify); err = %v", pe, err) + } +} + +func TestStorageProbe_ContentMismatch(t *testing.T) { + fs := newFakeStorage() + fs.readOverride = []byte("wrong content") + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != "verify" { + t.Fatalf("step = %v, want verify; err = %v", pe, err) + } +} + +func TestStorageProbe_DeleteFails(t *testing.T) { + fs := newFakeStorage() + fs.deleteErr = errors.New("permission denied") + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != "delete" { + t.Fatalf("step = %v, want delete; err = %v", pe, err) + } +} + +// TestStorageProbe_CleanupOnNonDeleteFailure asserts that the probe object is +// deleted even when a step after Store (size/open/read/verify) fails, so +// probe artifacts don't accumulate in the storage backend. +func TestStorageProbe_CleanupOnNonDeleteFailure(t *testing.T) { + cases := []struct { + name string + inject func(*fakeStorage) + wantErr string + }{ + {"size mismatch", func(fs *fakeStorage) { fs.sizeDelta = -1 }, "size"}, + {"open fails", func(fs *fakeStorage) { fs.openErr = errors.New("open boom") }, "read"}, + {"read mid-stream", func(fs *fakeStorage) { fs.readErr = errors.New("mid-stream boom") }, "read"}, + {"content mismatch", func(fs *fakeStorage) { fs.readOverride = []byte("wrong") }, "verify"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + fs := newFakeStorage() + tc.inject(fs) + err := storageProbe(context.Background(), fs) + var pe *probeError + if !errors.As(err, &pe) || pe.step != tc.wantErr { + t.Fatalf("step = %v, want %q; err = %v", pe, tc.wantErr, err) + } + if got := fs.deleteCalls.Load(); got != 1 { + t.Errorf("deleteCalls = %d, want 1 (cleanup should run on non-delete failures)", got) + } + }) + } +} + +func TestStorageProbe_ReaderClosedOnReadFailure(t *testing.T) { + fs := newFakeStorage() + fs.readErr = errors.New("read error") + _ = storageProbe(context.Background(), fs) + if got := fs.closeCalls.Load(); got != fs.openCalls.Load() { + t.Errorf("closeCalls = %d, openCalls = %d (should match)", got, fs.openCalls.Load()) + } +} + +func TestStorageProbe_PathUniqueness(t *testing.T) { + fs := newFakeStorage() + for i := 0; i < 100; i++ { + if err := storageProbe(context.Background(), fs); err != nil { + t.Fatalf("probe %d: %v", i, err) + } + } + seen := make(map[string]bool) + for _, p := range fs.paths { + if !strings.HasPrefix(p, ".healthcheck/") { + t.Errorf("path missing prefix: %q", p) + } + if seen[p] { + t.Errorf("duplicate path: %q", p) + } + seen[p] = true + } +} + +// helper: a healthCache wired to a fakeStorage and a discard logger. +func newTestCache(fs *fakeStorage, interval time.Duration) *healthCache { + return &healthCache{ + storage: fs, + interval: interval, + probeTimeout: 5 * time.Second, + logger: discardLogger(), + } +} + +func discardLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +func TestHealthCache_CacheHit(t *testing.T) { + fs := newFakeStorage() + c := newTestCache(fs, 30*time.Second) + if err := c.Check(); err != nil { + t.Fatalf("first check: %v", err) + } + if err := c.Check(); err != nil { + t.Fatalf("second check: %v", err) + } + if got := fs.storeCalls.Load(); got != 1 { + t.Errorf("storeCalls = %d, want 1 (second call should be cached)", got) + } +} + +func TestHealthCache_MissAfterTTL(t *testing.T) { + fs := newFakeStorage() + c := newTestCache(fs, 10*time.Millisecond) + _ = c.Check() + time.Sleep(20 * time.Millisecond) + _ = c.Check() + if got := fs.storeCalls.Load(); got != 2 { + t.Errorf("storeCalls = %d, want 2", got) + } +} + +func TestHealthCache_Disabled(t *testing.T) { + fs := newFakeStorage() + c := newTestCache(fs, 0) // interval = 0 means probe every call + _ = c.Check() + _ = c.Check() + if got := fs.storeCalls.Load(); got != 2 { + t.Errorf("storeCalls = %d, want 2", got) + } +} + +func TestHealthCache_LastAtNotAdvancedOnHit(t *testing.T) { + fs := newFakeStorage() + c := newTestCache(fs, 30*time.Second) + for i := 0; i < 100; i++ { + _ = c.Check() + } + if got := fs.storeCalls.Load(); got != 1 { + t.Errorf("storeCalls = %d, want 1 across 100 hits", got) + } +} + +func TestHealthCache_ConcurrentSingleFlight(t *testing.T) { + fs := newFakeStorage() + c := newTestCache(fs, 30*time.Second) + var wg sync.WaitGroup + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { defer wg.Done(); _ = c.Check() }() + } + wg.Wait() + if got := fs.storeCalls.Load(); got != 1 { + t.Errorf("storeCalls = %d, want 1 with 20 concurrent callers", got) + } +} + +func TestHealthCache_FailureCounterIncrement(t *testing.T) { + fs := newFakeStorage() + fs.storeErr = errors.New("boom") + c := newTestCache(fs, 30*time.Second) + + before := testutil.ToFloat64(metrics.HealthProbeFailures.WithLabelValues("write")) + + // First call: fresh probe → counter +1 + _ = c.Check() + afterFirst := testutil.ToFloat64(metrics.HealthProbeFailures.WithLabelValues("write")) + if afterFirst-before != 1 { + t.Errorf("counter delta after first call = %v, want 1", afterFirst-before) + } + + // Second call: cache hit → counter NOT re-incremented + _ = c.Check() + afterSecond := testutil.ToFloat64(metrics.HealthProbeFailures.WithLabelValues("write")) + if afterSecond != afterFirst { + t.Errorf("counter changed on cache hit: %v → %v", afterFirst, afterSecond) + } +} + +func TestHealthCache_ProbeTimeout(t *testing.T) { + fs := newFakeStorage() + fs.storeBlock = make(chan struct{}) // Store will block until channel is closed (or never) + t.Cleanup(func() { close(fs.storeBlock) }) + + c := &healthCache{ + storage: fs, + interval: 30 * time.Second, + probeTimeout: 50 * time.Millisecond, + logger: discardLogger(), + } + start := time.Now() + err := c.Check() + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected timeout error, got nil") + } + if elapsed > 500*time.Millisecond { + t.Errorf("probe took %v, expected ~50ms (timeout not respected)", elapsed) + } +} + +func TestHealthCache_TransitionLogging(t *testing.T) { + fs := newFakeStorage() + var buf bytes.Buffer + logger := slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo})) + c := &healthCache{ + storage: fs, + interval: 0, // probe every call + probeTimeout: 5 * time.Second, + logger: logger, + } + + // Steady ok state — should not log + _ = c.Check() + _ = c.Check() + if got := strings.Count(buf.String(), "storage probe"); got != 0 { + t.Errorf("steady-state logs = %d, want 0; output: %s", got, buf.String()) + } + + // ok → err transition: exactly one Error log + buf.Reset() + fs.storeErr = errors.New("boom") + _ = c.Check() + if !strings.Contains(buf.String(), "storage probe failed") { + t.Errorf("missing failure log on transition; output: %s", buf.String()) + } + + // err steady state — should not log again + buf.Reset() + _ = c.Check() + if buf.Len() != 0 { + t.Errorf("steady-err logs = %q, want empty", buf.String()) + } + + // err → ok transition: exactly one Info log + buf.Reset() + fs.storeErr = nil + _ = c.Check() + if !strings.Contains(buf.String(), "storage probe recovered") { + t.Errorf("missing recovery log on transition; output: %s", buf.String()) + } +} diff --git a/internal/server/server.go b/internal/server/server.go index a0983e5..7de5041 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -15,6 +15,7 @@ // - /conan/* - Conan C/C++ protocol // - /conda/* - Conda/Anaconda protocol // - /cran/* - CRAN (R) protocol +// - /julia/* - Julia Pkg server protocol // - /v2/* - OCI/Docker container registry protocol // - /debian/* - Debian/APT repository protocol // - /rpm/* - RPM/Yum repository protocol @@ -40,6 +41,7 @@ import ( "context" "database/sql" "encoding/json" + "errors" "fmt" "log/slog" "net/http" @@ -49,7 +51,7 @@ import ( swaggerdoc "github.com/git-pkgs/proxy/docs/swagger" "github.com/git-pkgs/proxy/internal/config" - "github.com/git-pkgs/proxy/internal/cooldown" + "github.com/git-pkgs/cooldown" "github.com/git-pkgs/proxy/internal/database" "github.com/git-pkgs/proxy/internal/enrichment" "github.com/git-pkgs/proxy/internal/handler" @@ -79,7 +81,8 @@ type Server struct { logger *slog.Logger http *http.Server templates *Templates - cancel context.CancelFunc + cancel context.CancelFunc + healthCache *healthCache } // New creates a new Server with the given configuration. @@ -125,12 +128,20 @@ func New(cfg *config.Config, logger *slog.Logger) (*Server, error) { return nil, fmt.Errorf("verifying storage connectivity: %w", err) } + hc, err := newHealthCache(store, cfg.Health.StorageProbeInterval, logger) + if err != nil { + _ = store.Close() + _ = db.Close() + return nil, fmt.Errorf("initializing health cache: %w", err) + } + return &Server{ - cfg: cfg, - db: db, - storage: store, - logger: logger, - templates: &Templates{}, + cfg: cfg, + db: db, + storage: store, + logger: logger, + templates: &Templates{}, + healthCache: hc, }, nil } @@ -149,6 +160,7 @@ func (s *Server) Start() error { proxy.Cooldown = cd proxy.CacheMetadata = s.cfg.CacheMetadata proxy.MetadataTTL = s.cfg.ParseMetadataTTL() + proxy.MetadataMaxSize = s.cfg.ParseMetadataMaxSize() proxy.GradleReadOnly = s.cfg.Gradle.BuildCache.ReadOnly proxy.GradleMaxUploadSize = s.cfg.ParseGradleBuildCacheMaxUploadSize() proxy.DirectServe = s.cfg.Storage.DirectServe @@ -182,13 +194,19 @@ func (s *Server) Start() error { hexHandler := handler.NewHexHandler(proxy, s.cfg.BaseURL) pubHandler := handler.NewPubHandler(proxy, s.cfg.BaseURL) pypiHandler := handler.NewPyPIHandler(proxy, s.cfg.BaseURL) - mavenHandler := handler.NewMavenHandler(proxy, s.cfg.BaseURL) + mavenHandler := handler.NewMavenHandler( + proxy, + s.cfg.BaseURL, + s.cfg.Upstream.Maven, + s.cfg.Upstream.GradlePluginPortal, + ) gradleHandler := handler.NewGradleBuildCacheHandler(proxy) nugetHandler := handler.NewNuGetHandler(proxy, s.cfg.BaseURL) composerHandler := handler.NewComposerHandler(proxy, s.cfg.BaseURL) conanHandler := handler.NewConanHandler(proxy, s.cfg.BaseURL) condaHandler := handler.NewCondaHandler(proxy, s.cfg.BaseURL) cranHandler := handler.NewCRANHandler(proxy, s.cfg.BaseURL) + juliaHandler := handler.NewJuliaHandler(proxy, s.cfg.BaseURL) containerHandler := handler.NewContainerHandler(proxy, s.cfg.BaseURL) debianHandler := handler.NewDebianHandler(proxy, s.cfg.BaseURL) rpmHandler := handler.NewRPMHandler(proxy, s.cfg.BaseURL) @@ -207,6 +225,7 @@ func (s *Server) Start() error { r.Mount("/conan", http.StripPrefix("/conan", conanHandler.Routes())) r.Mount("/conda", http.StripPrefix("/conda", condaHandler.Routes())) r.Mount("/cran", http.StripPrefix("/cran", cranHandler.Routes())) + r.Mount("/julia", http.StripPrefix("/julia", juliaHandler.Routes())) r.Mount("/v2", http.StripPrefix("/v2", containerHandler.Routes())) r.Mount("/debian", http.StripPrefix("/debian", debianHandler.Routes())) r.Mount("/rpm", http.StripPrefix("/rpm", rpmHandler.Routes())) @@ -799,23 +818,49 @@ func (s *Server) showComparePage(w http.ResponseWriter, ecosystem, name, version } } -// handleHealth responds with a simple health check. +// handleHealth responds with a structured JSON health report. +// // @Summary Health check // @Tags meta -// @Produce plain -// @Success 200 {string} string -// @Failure 503 {string} string +// @Produce json +// @Success 200 {object} HealthResponse +// @Failure 503 {object} HealthResponse // @Router /health [get] func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { - // Check database connectivity + w.Header().Set("Content-Type", "application/json") + + resp := HealthResponse{Status: "ok", Checks: map[string]HealthCheck{}} + + // Database check (short-circuit; do not waste a storage probe call when DB is down). + // On DB failure the storage entry reports "skipped" rather than being omitted so + // the response always carries the same key set for monitors that expect it. if _, err := s.db.SchemaVersion(); err != nil { + resp.Status = "error" + resp.Checks["database"] = HealthCheck{Status: "error", Error: err.Error()} + resp.Checks["storage"] = HealthCheck{Status: "skipped"} w.WriteHeader(http.StatusServiceUnavailable) - _, _ = fmt.Fprint(w, "database error") + _ = json.NewEncoder(w).Encode(resp) return } + resp.Checks["database"] = HealthCheck{Status: "ok"} + + // Storage probe (via cache). + if err := s.healthCache.Check(); err != nil { + resp.Status = "error" + sc := HealthCheck{Status: "error", Error: err.Error()} + var pe *probeError + if errors.As(err, &pe) { + sc.Step = pe.step + } + resp.Checks["storage"] = sc + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(resp) + return + } + resp.Checks["storage"] = HealthCheck{Status: "ok"} w.WriteHeader(http.StatusOK) - _, _ = fmt.Fprint(w, "ok") + _ = json.NewEncoder(w).Encode(resp) } // StatsResponse contains cache statistics. diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 574b6ba..e2dc1c2 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -81,13 +81,21 @@ func newTestServer(t *testing.T) *testServer { r.Mount("/pypi", http.StripPrefix("/pypi", pypiHandler.Routes())) r.Mount("/gradle", http.StripPrefix("/gradle", gradleHandler.Routes())) + hc, err := newHealthCache(store, "30s", logger) + if err != nil { + _ = db.Close() + _ = os.RemoveAll(tempDir) + t.Fatalf("failed to create health cache: %v", err) + } + // Create a minimal server struct for the handlers s := &Server{ - cfg: cfg, - db: db, - storage: store, - logger: logger, - templates: &Templates{}, + cfg: cfg, + db: db, + storage: store, + logger: logger, + templates: &Templates{}, + healthCache: hc, } r.Get("/health", s.handleHealth) @@ -179,12 +187,55 @@ func TestHealthEndpoint(t *testing.T) { ts.handler.ServeHTTP(w, req) if w.Code != http.StatusOK { - t.Errorf("expected status 200, got %d", w.Code) + t.Fatalf("status = %d, want 200; body: %s", w.Code, w.Body.String()) } + if got := w.Header().Get("Content-Type"); got != "application/json" { + t.Errorf("Content-Type = %q, want application/json", got) + } + var resp HealthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decoding response: %v", err) + } + if resp.Status != "ok" { + t.Errorf("status = %q, want ok", resp.Status) + } + if resp.Checks["database"].Status != "ok" { + t.Errorf("database check = %+v, want ok", resp.Checks["database"]) + } + if resp.Checks["storage"].Status != "ok" { + t.Errorf("storage check = %+v, want ok", resp.Checks["storage"]) + } +} - body := w.Body.String() - if body != "ok" { - t.Errorf("expected body 'ok', got %q", body) +func TestHealthEndpoint_DBFailureShortCircuits(t *testing.T) { + ts := newTestServer(t) + defer ts.close() + + // Force DB failure by closing the connection. + _ = ts.db.Close() + + req := httptest.NewRequest("GET", "/health", nil) + w := httptest.NewRecorder() + ts.handler.ServeHTTP(w, req) + + if w.Code != http.StatusServiceUnavailable { + t.Fatalf("status = %d, want 503; body: %s", w.Code, w.Body.String()) + } + var resp HealthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decoding: %v", err) + } + if resp.Status != "error" { + t.Errorf("status = %q, want error", resp.Status) + } + if resp.Checks["database"].Status != "error" { + t.Errorf("database check = %+v, want error", resp.Checks["database"]) + } + storage, present := resp.Checks["storage"] + if !present { + t.Error("storage key should be present (with status=skipped) on DB short-circuit") + } else if storage.Status != "skipped" { + t.Errorf("storage check = %+v, want status=skipped", storage) } }