forked from mirrors/pkg-proxy
Add a `proxy mirror` CLI command and `/api/mirror` API endpoints that pre-populate the cache from various input sources: individual PURLs, SBOM files (CycloneDX and SPDX), or full registry enumeration. The mirror reuses the existing handler.Proxy.GetOrFetchArtifact() pipeline so cached artifacts are identical to those fetched on demand. A bounded worker pool controls download parallelism. Metadata caching is opt-in via `cache_metadata: true` in config (or PROXY_CACHE_METADATA=true). The mirror command always enables it. When enabled, upstream metadata responses are stored for offline fallback with ETag-based conditional revalidation. New internal/mirror package with Source interface, PURLSource, SBOMSource, RegistrySource, and async JobStore. New metadata_cache database table for offline metadata serving.
340 lines
9.2 KiB
Go
340 lines
9.2 KiB
Go
package handler
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/git-pkgs/purl"
|
|
)
|
|
|
|
const (
|
|
npmUpstream = "https://registry.npmjs.org"
|
|
npmAbbreviatedCT = "application/vnd.npm.install-v1+json"
|
|
scopedParts = 2 // scope + name in scoped packages
|
|
)
|
|
|
|
// NPMHandler handles npm registry protocol requests.
|
|
type NPMHandler struct {
|
|
proxy *Proxy
|
|
upstreamURL string
|
|
proxyURL string // URL where this proxy is hosted
|
|
}
|
|
|
|
// NewNPMHandler creates a new npm protocol handler.
|
|
func NewNPMHandler(proxy *Proxy, proxyURL string) *NPMHandler {
|
|
return &NPMHandler{
|
|
proxy: proxy,
|
|
upstreamURL: npmUpstream,
|
|
proxyURL: strings.TrimSuffix(proxyURL, "/"),
|
|
}
|
|
}
|
|
|
|
// Routes returns the HTTP handler for npm requests.
|
|
// Mount this at /npm on your router.
|
|
func (h *NPMHandler) Routes() http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodGet {
|
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
|
return
|
|
}
|
|
|
|
path := strings.TrimPrefix(r.URL.Path, "/")
|
|
|
|
// Check if this is a tarball download (contains /-/)
|
|
if strings.Contains(path, "/-/") {
|
|
h.handleDownload(w, r)
|
|
return
|
|
}
|
|
|
|
// Otherwise it's a metadata request
|
|
h.handlePackageMetadata(w, r)
|
|
})
|
|
}
|
|
|
|
// handlePackageMetadata proxies package metadata from upstream and rewrites tarball URLs.
|
|
func (h *NPMHandler) handlePackageMetadata(w http.ResponseWriter, r *http.Request) {
|
|
packageName := h.extractPackageName(r)
|
|
if packageName == "" {
|
|
JSONError(w, http.StatusBadRequest, "invalid package name")
|
|
return
|
|
}
|
|
|
|
h.proxy.Logger.Info("npm metadata request", "package", packageName)
|
|
|
|
upstreamURL := fmt.Sprintf("%s/%s", h.upstreamURL, url.PathEscape(packageName))
|
|
|
|
// Use abbreviated metadata when cooldown is disabled — it's much smaller
|
|
// (e.g. drizzle-orm: 4MB vs 92MB) but lacks the time map needed for cooldown.
|
|
accept := npmAbbreviatedCT
|
|
if h.proxy.Cooldown != nil && h.proxy.Cooldown.Enabled() {
|
|
accept = contentTypeJSON
|
|
}
|
|
|
|
body, _, err := h.proxy.FetchOrCacheMetadata(r.Context(), "npm", packageName, upstreamURL, accept)
|
|
if err != nil {
|
|
if errors.Is(err, ErrUpstreamNotFound) {
|
|
JSONError(w, http.StatusNotFound, "package not found")
|
|
return
|
|
}
|
|
h.proxy.Logger.Error("failed to fetch npm metadata", "error", err)
|
|
JSONError(w, http.StatusBadGateway, "failed to fetch from upstream")
|
|
return
|
|
}
|
|
|
|
rewritten, err := h.rewriteMetadata(packageName, body)
|
|
if err != nil {
|
|
// If rewriting fails, just proxy the original
|
|
h.proxy.Logger.Warn("failed to rewrite metadata, proxying original", "error", err)
|
|
w.Header().Set("Content-Type", contentTypeJSON)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write(body)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", contentTypeJSON)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write(rewritten)
|
|
}
|
|
|
|
// rewriteMetadata rewrites tarball URLs in npm package metadata to point at this proxy.
|
|
// If cooldown is enabled, versions published too recently are filtered out.
|
|
func (h *NPMHandler) rewriteMetadata(packageName string, body []byte) ([]byte, error) {
|
|
var metadata map[string]any
|
|
if err := json.Unmarshal(body, &metadata); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Rewrite tarball URLs in versions
|
|
versions, ok := metadata["versions"].(map[string]any)
|
|
if !ok {
|
|
return body, nil // No versions to rewrite
|
|
}
|
|
|
|
h.applyCooldownFiltering(metadata, versions, packageName)
|
|
h.rewriteTarballURLs(versions, packageName)
|
|
|
|
return json.Marshal(metadata)
|
|
}
|
|
|
|
// applyCooldownFiltering removes versions that are too recently published,
|
|
// and updates dist-tags.latest if the current latest was filtered out.
|
|
func (h *NPMHandler) applyCooldownFiltering(metadata map[string]any, versions map[string]any, packageName string) {
|
|
if h.proxy.Cooldown == nil || !h.proxy.Cooldown.Enabled() {
|
|
return
|
|
}
|
|
|
|
timeMap, _ := metadata["time"].(map[string]any)
|
|
if timeMap == nil {
|
|
return
|
|
}
|
|
|
|
packagePURL := purl.MakePURLString("npm", packageName, "")
|
|
|
|
for version := range versions {
|
|
publishedStr, ok := timeMap[version].(string)
|
|
if !ok {
|
|
continue
|
|
}
|
|
publishedAt, err := time.Parse(time.RFC3339, publishedStr)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if !h.proxy.Cooldown.IsAllowed("npm", packagePURL, publishedAt) {
|
|
h.proxy.Logger.Info("cooldown: filtering npm version",
|
|
"package", packageName, "version", version,
|
|
"published", publishedStr)
|
|
delete(versions, version)
|
|
delete(timeMap, version)
|
|
}
|
|
}
|
|
|
|
h.updateDistTagsLatest(metadata, versions, timeMap)
|
|
}
|
|
|
|
// updateDistTagsLatest updates the dist-tags.latest field if the current latest
|
|
// version was removed by cooldown filtering.
|
|
func (h *NPMHandler) updateDistTagsLatest(metadata, versions, timeMap map[string]any) {
|
|
distTags, ok := metadata["dist-tags"].(map[string]any)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
latest, ok := distTags["latest"].(string)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
if _, exists := versions[latest]; exists {
|
|
return
|
|
}
|
|
|
|
if newLatest := h.findNewestVersion(versions, timeMap); newLatest != "" {
|
|
distTags["latest"] = newLatest
|
|
}
|
|
}
|
|
|
|
// rewriteTarballURLs rewrites all tarball URLs in version entries to point at this proxy.
|
|
func (h *NPMHandler) rewriteTarballURLs(versions map[string]any, packageName string) {
|
|
for version, vdata := range versions {
|
|
vmap, ok := vdata.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
dist, ok := vmap["dist"].(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
tarball, ok := dist["tarball"].(string)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
filename := tarball
|
|
if idx := strings.LastIndex(tarball, "/"); idx >= 0 {
|
|
filename = tarball[idx+1:]
|
|
}
|
|
|
|
escapedName := url.PathEscape(packageName)
|
|
newTarball := fmt.Sprintf("%s/npm/%s/-/%s", h.proxyURL, escapedName, filename)
|
|
dist["tarball"] = newTarball
|
|
|
|
h.proxy.Logger.Debug("rewrote tarball URL",
|
|
"package", packageName, "version", version,
|
|
"old", tarball, "new", newTarball)
|
|
}
|
|
}
|
|
|
|
// findNewestVersion returns the version string with the most recent timestamp
|
|
// from the remaining versions, using the time map.
|
|
func (h *NPMHandler) findNewestVersion(versions map[string]any, timeMap map[string]any) string {
|
|
if timeMap == nil {
|
|
return ""
|
|
}
|
|
|
|
type versionTime struct {
|
|
version string
|
|
t time.Time
|
|
}
|
|
|
|
var vts []versionTime
|
|
for v := range versions {
|
|
if ts, ok := timeMap[v].(string); ok {
|
|
if t, err := time.Parse(time.RFC3339, ts); err == nil {
|
|
vts = append(vts, versionTime{v, t})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(vts) == 0 {
|
|
return ""
|
|
}
|
|
|
|
sort.Slice(vts, func(i, j int) bool {
|
|
return vts[i].t.After(vts[j].t)
|
|
})
|
|
|
|
return vts[0].version
|
|
}
|
|
|
|
// handleDownload serves a package tarball, fetching and caching from upstream if needed.
|
|
func (h *NPMHandler) handleDownload(w http.ResponseWriter, r *http.Request) {
|
|
packageName, filename := h.parseDownloadPath(r.URL.Path)
|
|
|
|
if packageName == "" || filename == "" {
|
|
JSONError(w, http.StatusBadRequest, "invalid request")
|
|
return
|
|
}
|
|
|
|
// Extract version from filename (e.g., "lodash-4.17.21.tgz" -> "4.17.21")
|
|
version := h.extractVersionFromFilename(packageName, filename)
|
|
if version == "" {
|
|
JSONError(w, http.StatusBadRequest, "could not determine version from filename")
|
|
return
|
|
}
|
|
|
|
h.proxy.Logger.Info("npm download request",
|
|
"package", packageName, "version", version, "filename", filename)
|
|
|
|
result, err := h.proxy.GetOrFetchArtifact(r.Context(), "npm", packageName, version, filename)
|
|
if err != nil {
|
|
h.proxy.Logger.Error("failed to get artifact", "error", err)
|
|
JSONError(w, http.StatusBadGateway, "failed to fetch package")
|
|
return
|
|
}
|
|
|
|
ServeArtifact(w, result)
|
|
}
|
|
|
|
// extractPackageName extracts the package name from the request path.
|
|
// Handles both scoped (@scope/name) and unscoped (name) packages.
|
|
func (h *NPMHandler) extractPackageName(r *http.Request) string {
|
|
path := strings.TrimPrefix(r.URL.Path, "/")
|
|
|
|
// Remove /-/filename suffix if present
|
|
if idx := strings.Index(path, "/-/"); idx >= 0 {
|
|
path = path[:idx]
|
|
}
|
|
|
|
// URL decode the path (handles %40 -> @, %2f -> /)
|
|
decoded, err := url.PathUnescape(path)
|
|
if err != nil {
|
|
return path
|
|
}
|
|
|
|
return decoded
|
|
}
|
|
|
|
// parseDownloadPath extracts package name and filename from a download path.
|
|
// Path format: /@scope/name/-/filename.tgz or /name/-/filename.tgz
|
|
func (h *NPMHandler) parseDownloadPath(path string) (packageName, filename string) {
|
|
path = strings.TrimPrefix(path, "/")
|
|
|
|
idx := strings.Index(path, "/-/")
|
|
if idx < 0 {
|
|
return "", ""
|
|
}
|
|
|
|
packageName = path[:idx]
|
|
filename = path[idx+3:] // skip "/-/"
|
|
|
|
// URL decode package name
|
|
if decoded, err := url.PathUnescape(packageName); err == nil {
|
|
packageName = decoded
|
|
}
|
|
|
|
return packageName, filename
|
|
}
|
|
|
|
// extractVersionFromFilename extracts version from npm tarball filename.
|
|
// e.g., "lodash-4.17.21.tgz" -> "4.17.21"
|
|
// e.g., "core-7.23.0.tgz" for @babel/core -> "7.23.0"
|
|
func (h *NPMHandler) extractVersionFromFilename(packageName, filename string) string {
|
|
// Remove .tgz extension
|
|
if !strings.HasSuffix(filename, ".tgz") {
|
|
return ""
|
|
}
|
|
base := strings.TrimSuffix(filename, ".tgz")
|
|
|
|
// For scoped packages, the filename uses the short name
|
|
shortName := packageName
|
|
if strings.Contains(packageName, "/") {
|
|
parts := strings.SplitN(packageName, "/", scopedParts)
|
|
shortName = parts[1]
|
|
}
|
|
|
|
// Expected format: {shortName}-{version}
|
|
prefix := shortName + "-"
|
|
if !strings.HasPrefix(base, prefix) {
|
|
return ""
|
|
}
|
|
|
|
return strings.TrimPrefix(base, prefix)
|
|
}
|