pkg-proxy/internal/handler/npm.go
Andrew Nesbitt d62c42b8d7
Add mirror command and API for selective package mirroring
Add a `proxy mirror` CLI command and `/api/mirror` API endpoints that
pre-populate the cache from various input sources: individual PURLs,
SBOM files (CycloneDX and SPDX), or full registry enumeration.

The mirror reuses the existing handler.Proxy.GetOrFetchArtifact()
pipeline so cached artifacts are identical to those fetched on demand.
A bounded worker pool controls download parallelism.

Metadata caching is opt-in via `cache_metadata: true` in config (or
PROXY_CACHE_METADATA=true). The mirror command always enables it. When
enabled, upstream metadata responses are stored for offline fallback
with ETag-based conditional revalidation.

New internal/mirror package with Source interface, PURLSource,
SBOMSource, RegistrySource, and async JobStore. New metadata_cache
database table for offline metadata serving.
2026-04-13 09:01:04 +01:00

340 lines
9.2 KiB
Go

package handler
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"sort"
"strings"
"time"
"github.com/git-pkgs/purl"
)
const (
npmUpstream = "https://registry.npmjs.org"
npmAbbreviatedCT = "application/vnd.npm.install-v1+json"
scopedParts = 2 // scope + name in scoped packages
)
// NPMHandler handles npm registry protocol requests.
type NPMHandler struct {
proxy *Proxy
upstreamURL string
proxyURL string // URL where this proxy is hosted
}
// NewNPMHandler creates a new npm protocol handler.
func NewNPMHandler(proxy *Proxy, proxyURL string) *NPMHandler {
return &NPMHandler{
proxy: proxy,
upstreamURL: npmUpstream,
proxyURL: strings.TrimSuffix(proxyURL, "/"),
}
}
// Routes returns the HTTP handler for npm requests.
// Mount this at /npm on your router.
func (h *NPMHandler) Routes() http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
path := strings.TrimPrefix(r.URL.Path, "/")
// Check if this is a tarball download (contains /-/)
if strings.Contains(path, "/-/") {
h.handleDownload(w, r)
return
}
// Otherwise it's a metadata request
h.handlePackageMetadata(w, r)
})
}
// handlePackageMetadata proxies package metadata from upstream and rewrites tarball URLs.
func (h *NPMHandler) handlePackageMetadata(w http.ResponseWriter, r *http.Request) {
packageName := h.extractPackageName(r)
if packageName == "" {
JSONError(w, http.StatusBadRequest, "invalid package name")
return
}
h.proxy.Logger.Info("npm metadata request", "package", packageName)
upstreamURL := fmt.Sprintf("%s/%s", h.upstreamURL, url.PathEscape(packageName))
// Use abbreviated metadata when cooldown is disabled — it's much smaller
// (e.g. drizzle-orm: 4MB vs 92MB) but lacks the time map needed for cooldown.
accept := npmAbbreviatedCT
if h.proxy.Cooldown != nil && h.proxy.Cooldown.Enabled() {
accept = contentTypeJSON
}
body, _, err := h.proxy.FetchOrCacheMetadata(r.Context(), "npm", packageName, upstreamURL, accept)
if err != nil {
if errors.Is(err, ErrUpstreamNotFound) {
JSONError(w, http.StatusNotFound, "package not found")
return
}
h.proxy.Logger.Error("failed to fetch npm metadata", "error", err)
JSONError(w, http.StatusBadGateway, "failed to fetch from upstream")
return
}
rewritten, err := h.rewriteMetadata(packageName, body)
if err != nil {
// If rewriting fails, just proxy the original
h.proxy.Logger.Warn("failed to rewrite metadata, proxying original", "error", err)
w.Header().Set("Content-Type", contentTypeJSON)
w.WriteHeader(http.StatusOK)
_, _ = w.Write(body)
return
}
w.Header().Set("Content-Type", contentTypeJSON)
w.WriteHeader(http.StatusOK)
_, _ = w.Write(rewritten)
}
// rewriteMetadata rewrites tarball URLs in npm package metadata to point at this proxy.
// If cooldown is enabled, versions published too recently are filtered out.
func (h *NPMHandler) rewriteMetadata(packageName string, body []byte) ([]byte, error) {
var metadata map[string]any
if err := json.Unmarshal(body, &metadata); err != nil {
return nil, err
}
// Rewrite tarball URLs in versions
versions, ok := metadata["versions"].(map[string]any)
if !ok {
return body, nil // No versions to rewrite
}
h.applyCooldownFiltering(metadata, versions, packageName)
h.rewriteTarballURLs(versions, packageName)
return json.Marshal(metadata)
}
// applyCooldownFiltering removes versions that are too recently published,
// and updates dist-tags.latest if the current latest was filtered out.
func (h *NPMHandler) applyCooldownFiltering(metadata map[string]any, versions map[string]any, packageName string) {
if h.proxy.Cooldown == nil || !h.proxy.Cooldown.Enabled() {
return
}
timeMap, _ := metadata["time"].(map[string]any)
if timeMap == nil {
return
}
packagePURL := purl.MakePURLString("npm", packageName, "")
for version := range versions {
publishedStr, ok := timeMap[version].(string)
if !ok {
continue
}
publishedAt, err := time.Parse(time.RFC3339, publishedStr)
if err != nil {
continue
}
if !h.proxy.Cooldown.IsAllowed("npm", packagePURL, publishedAt) {
h.proxy.Logger.Info("cooldown: filtering npm version",
"package", packageName, "version", version,
"published", publishedStr)
delete(versions, version)
delete(timeMap, version)
}
}
h.updateDistTagsLatest(metadata, versions, timeMap)
}
// updateDistTagsLatest updates the dist-tags.latest field if the current latest
// version was removed by cooldown filtering.
func (h *NPMHandler) updateDistTagsLatest(metadata, versions, timeMap map[string]any) {
distTags, ok := metadata["dist-tags"].(map[string]any)
if !ok {
return
}
latest, ok := distTags["latest"].(string)
if !ok {
return
}
if _, exists := versions[latest]; exists {
return
}
if newLatest := h.findNewestVersion(versions, timeMap); newLatest != "" {
distTags["latest"] = newLatest
}
}
// rewriteTarballURLs rewrites all tarball URLs in version entries to point at this proxy.
func (h *NPMHandler) rewriteTarballURLs(versions map[string]any, packageName string) {
for version, vdata := range versions {
vmap, ok := vdata.(map[string]any)
if !ok {
continue
}
dist, ok := vmap["dist"].(map[string]any)
if !ok {
continue
}
tarball, ok := dist["tarball"].(string)
if !ok {
continue
}
filename := tarball
if idx := strings.LastIndex(tarball, "/"); idx >= 0 {
filename = tarball[idx+1:]
}
escapedName := url.PathEscape(packageName)
newTarball := fmt.Sprintf("%s/npm/%s/-/%s", h.proxyURL, escapedName, filename)
dist["tarball"] = newTarball
h.proxy.Logger.Debug("rewrote tarball URL",
"package", packageName, "version", version,
"old", tarball, "new", newTarball)
}
}
// findNewestVersion returns the version string with the most recent timestamp
// from the remaining versions, using the time map.
func (h *NPMHandler) findNewestVersion(versions map[string]any, timeMap map[string]any) string {
if timeMap == nil {
return ""
}
type versionTime struct {
version string
t time.Time
}
var vts []versionTime
for v := range versions {
if ts, ok := timeMap[v].(string); ok {
if t, err := time.Parse(time.RFC3339, ts); err == nil {
vts = append(vts, versionTime{v, t})
}
}
}
if len(vts) == 0 {
return ""
}
sort.Slice(vts, func(i, j int) bool {
return vts[i].t.After(vts[j].t)
})
return vts[0].version
}
// handleDownload serves a package tarball, fetching and caching from upstream if needed.
func (h *NPMHandler) handleDownload(w http.ResponseWriter, r *http.Request) {
packageName, filename := h.parseDownloadPath(r.URL.Path)
if packageName == "" || filename == "" {
JSONError(w, http.StatusBadRequest, "invalid request")
return
}
// Extract version from filename (e.g., "lodash-4.17.21.tgz" -> "4.17.21")
version := h.extractVersionFromFilename(packageName, filename)
if version == "" {
JSONError(w, http.StatusBadRequest, "could not determine version from filename")
return
}
h.proxy.Logger.Info("npm download request",
"package", packageName, "version", version, "filename", filename)
result, err := h.proxy.GetOrFetchArtifact(r.Context(), "npm", packageName, version, filename)
if err != nil {
h.proxy.Logger.Error("failed to get artifact", "error", err)
JSONError(w, http.StatusBadGateway, "failed to fetch package")
return
}
ServeArtifact(w, result)
}
// extractPackageName extracts the package name from the request path.
// Handles both scoped (@scope/name) and unscoped (name) packages.
func (h *NPMHandler) extractPackageName(r *http.Request) string {
path := strings.TrimPrefix(r.URL.Path, "/")
// Remove /-/filename suffix if present
if idx := strings.Index(path, "/-/"); idx >= 0 {
path = path[:idx]
}
// URL decode the path (handles %40 -> @, %2f -> /)
decoded, err := url.PathUnescape(path)
if err != nil {
return path
}
return decoded
}
// parseDownloadPath extracts package name and filename from a download path.
// Path format: /@scope/name/-/filename.tgz or /name/-/filename.tgz
func (h *NPMHandler) parseDownloadPath(path string) (packageName, filename string) {
path = strings.TrimPrefix(path, "/")
idx := strings.Index(path, "/-/")
if idx < 0 {
return "", ""
}
packageName = path[:idx]
filename = path[idx+3:] // skip "/-/"
// URL decode package name
if decoded, err := url.PathUnescape(packageName); err == nil {
packageName = decoded
}
return packageName, filename
}
// extractVersionFromFilename extracts version from npm tarball filename.
// e.g., "lodash-4.17.21.tgz" -> "4.17.21"
// e.g., "core-7.23.0.tgz" for @babel/core -> "7.23.0"
func (h *NPMHandler) extractVersionFromFilename(packageName, filename string) string {
// Remove .tgz extension
if !strings.HasSuffix(filename, ".tgz") {
return ""
}
base := strings.TrimSuffix(filename, ".tgz")
// For scoped packages, the filename uses the short name
shortName := packageName
if strings.Contains(packageName, "/") {
parts := strings.SplitN(packageName, "/", scopedParts)
shortName = parts[1]
}
// Expected format: {shortName}-{version}
prefix := shortName + "-"
if !strings.HasPrefix(base, prefix) {
return ""
}
return strings.TrimPrefix(base, prefix)
}