diff --git a/.woodpecker/debug.yaml b/.woodpecker/debug.yaml index 4dc7d3c9..6cfbab7a 100644 --- a/.woodpecker/debug.yaml +++ b/.woodpecker/debug.yaml @@ -16,6 +16,16 @@ steps: commands: - nix-build -j4 --attr flakePackages.fmt + - name: check typos + image: nixpkgs/nix:nixos-24.05 + commands: + - nix-shell --attr ci --run typos + + - name: check lints with clippy + image: nixpkgs/nix:nixos-24.05 + commands: + - nix-build -j4 --attr flakePackages.clippy + - name: build image: nixpkgs/nix:nixos-24.05 commands: diff --git a/.woodpecker/release.yaml b/.woodpecker/release.yaml index a94a9ccf..4133b92d 100644 --- a/.woodpecker/release.yaml +++ b/.woodpecker/release.yaml @@ -38,7 +38,15 @@ steps: - matrix: ARCH: i386 - - name: upgrade tests + - name: upgrade tests from v1.0.0 + image: nixpkgs/nix:nixos-24.05 + commands: + - nix-shell --attr ci --run "./script/test-upgrade.sh v1.0.0 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false) + when: + - matrix: + ARCH: amd64 + + - name: upgrade tests from v0.8.4 image: nixpkgs/nix:nixos-24.05 commands: - nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false) diff --git a/Cargo.lock b/Cargo.lock index 7473d9af..968126e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -301,9 +301,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.120.0" +version = "1.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06673901e961f20fa8d7da907da48f7ad6c1b383e3726c22bd418900f015abe1" +checksum = "75ddb925e840f49446aa6338b67abdbec04b4ebf923b7da038ec4c35afb916cd" dependencies = [ "aws-credential-types", "aws-runtime", @@ -313,7 +313,6 @@ dependencies = [ "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-json", - "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -370,9 +369,9 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.63.13" +version = "0.63.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23374b9170cbbcc6f5df8dc5ebb9b6c5c28a3c8f599f0e8b8b10eb6f4a5c6e74" +checksum = "9054b4cc5eda331cde3096b1576dec45365c5cbbca61d1fffa5f236e251dfce7" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -865,42 +864,16 @@ dependencies = [ "libc", ] -[[package]] -name = "crc" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" -dependencies = [ - "crc-catalog", -] - -[[package]] -name = "crc-catalog" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" - [[package]] name = "crc-fast" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d" +checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" dependencies = [ - "crc", "digest", - "rustversion", "spin 0.10.0", ] -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -1201,12 +1174,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" -[[package]] -name = "foldhash" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1311,7 +1278,7 @@ dependencies = [ [[package]] name = "garage" -version = "1.3.1" +version = "2.2.0" dependencies = [ "assert-json-diff", "async-trait", @@ -1322,7 +1289,7 @@ dependencies = [ "bytes", "bytesize", "chrono", - "crc32fast", + "crc-fast", "format_table", "futures", "garage_api_admin", @@ -1351,7 +1318,6 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry-prometheus", "parse_duration", - "serde", "serde_json", "sha1", "sha2", @@ -1363,16 +1329,21 @@ dependencies = [ "tracing", "tracing-journald", "tracing-subscriber", + "utoipa", ] [[package]] name = "garage_api_admin" -version = "1.3.1" +version = "2.2.0" dependencies = [ "argon2", "async-trait", + "bytesize", + "chrono", + "format_table", "futures", "garage_api_common", + "garage_block", "garage_model", "garage_rpc", "garage_table", @@ -1382,6 +1353,7 @@ dependencies = [ "hyper 1.8.1", "opentelemetry", "opentelemetry-prometheus", + "paste", "prometheus", "serde", "serde_json", @@ -1389,17 +1361,17 @@ dependencies = [ "tokio", "tracing", "url", + "utoipa", ] [[package]] name = "garage_api_common" -version = "1.3.1" +version = "2.2.0" dependencies = [ "base64 0.21.7", "bytes", "chrono", - "crc32c", - "crc32fast", + "crc-fast", "crypto-common", "futures", "garage_model", @@ -1427,7 +1399,7 @@ dependencies = [ [[package]] name = "garage_api_k2v" -version = "1.3.1" +version = "2.2.0" dependencies = [ "base64 0.21.7", "futures", @@ -1450,15 +1422,14 @@ dependencies = [ [[package]] name = "garage_api_s3" -version = "1.3.1" +version = "2.2.0" dependencies = [ "aes-gcm", "async-compression", "base64 0.21.7", "bytes", "chrono", - "crc32c", - "crc32fast", + "crc-fast", "form_urlencoded", "futures", "garage_api_common", @@ -1469,6 +1440,7 @@ dependencies = [ "garage_table", "garage_util", "hex", + "hmac", "http 1.4.0", "http-body-util", "http-range", @@ -1495,7 +1467,7 @@ dependencies = [ [[package]] name = "garage_block" -version = "1.3.1" +version = "2.2.0" dependencies = [ "arc-swap", "async-compression", @@ -1506,7 +1478,6 @@ dependencies = [ "garage_db", "garage_net", "garage_rpc", - "garage_table", "garage_util", "hex", "opentelemetry", @@ -1520,7 +1491,7 @@ dependencies = [ [[package]] name = "garage_db" -version = "1.3.1" +version = "2.2.0" dependencies = [ "fjall", "heed", @@ -1535,8 +1506,9 @@ dependencies = [ [[package]] name = "garage_model" -version = "1.3.1" +version = "2.2.0" dependencies = [ + "argon2", "async-trait", "base64 0.21.7", "blake2", @@ -1562,7 +1534,7 @@ dependencies = [ [[package]] name = "garage_net" -version = "1.3.1" +version = "2.2.0" dependencies = [ "arc-swap", "bytes", @@ -1587,7 +1559,7 @@ dependencies = [ [[package]] name = "garage_rpc" -version = "1.3.1" +version = "2.2.0" dependencies = [ "arc-swap", "async-trait", @@ -1619,7 +1591,7 @@ dependencies = [ [[package]] name = "garage_table" -version = "1.3.1" +version = "2.2.0" dependencies = [ "arc-swap", "async-trait", @@ -1640,7 +1612,7 @@ dependencies = [ [[package]] name = "garage_util" -version = "1.3.1" +version = "2.2.0" dependencies = [ "arc-swap", "async-trait", @@ -1672,7 +1644,7 @@ dependencies = [ [[package]] name = "garage_web" -version = "1.3.1" +version = "2.2.0" dependencies = [ "garage_api_common", "garage_api_s3", @@ -1834,7 +1806,9 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash 0.1.5", + "allocator-api2", + "equivalent", + "foldhash", ] [[package]] @@ -1842,11 +1816,6 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash 0.2.0", -] [[package]] name = "hashlink" @@ -2304,6 +2273,8 @@ checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -2682,11 +2653,11 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru" -version = "0.16.3" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.16.1", + "hashbrown 0.15.5", ] [[package]] @@ -3108,6 +3079,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "path-absolutize" version = "3.1.1" @@ -4832,6 +4809,29 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "utoipa" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" +dependencies = [ + "indexmap 2.13.0", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "uuid" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index df4005a3..7e4a3f64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,22 +24,22 @@ default-members = ["src/garage"] # Internal Garage crates format_table = { version = "0.1.1", path = "src/format-table" } -garage_api_common = { version = "1.3.1", path = "src/api/common" } -garage_api_admin = { version = "1.3.1", path = "src/api/admin" } -garage_api_s3 = { version = "1.3.1", path = "src/api/s3" } -garage_api_k2v = { version = "1.3.1", path = "src/api/k2v" } -garage_block = { version = "1.3.1", path = "src/block" } -garage_db = { version = "1.3.1", path = "src/db", default-features = false } -garage_model = { version = "1.3.1", path = "src/model", default-features = false } -garage_net = { version = "1.3.1", path = "src/net" } -garage_rpc = { version = "1.3.1", path = "src/rpc" } -garage_table = { version = "1.3.1", path = "src/table" } -garage_util = { version = "1.3.1", path = "src/util" } -garage_web = { version = "1.3.1", path = "src/web" } +garage_api_common = { version = "2.2.0", path = "src/api/common" } +garage_api_admin = { version = "2.2.0", path = "src/api/admin" } +garage_api_s3 = { version = "2.2.0", path = "src/api/s3" } +garage_api_k2v = { version = "2.2.0", path = "src/api/k2v" } +garage_block = { version = "2.2.0", path = "src/block" } +garage_db = { version = "2.2.0", path = "src/db", default-features = false } +garage_model = { version = "2.2.0", path = "src/model", default-features = false } +garage_net = { version = "2.2.0", path = "src/net" } +garage_rpc = { version = "2.2.0", path = "src/rpc" } +garage_table = { version = "2.2.0", path = "src/table" } +garage_util = { version = "2.2.0", path = "src/util" } +garage_web = { version = "2.2.0", path = "src/web" } k2v-client = { version = "0.0.4", path = "src/k2v-client" } # External crates from crates.io -arc-swap = "1.0" +arc-swap = "1.1" argon2 = "0.5" async-trait = "0.1.7" backtrace = "0.3" @@ -48,9 +48,8 @@ blake2 = "0.10" bytes = "1.0" bytesize = "1.1" cfg-if = "1.0" -chrono = "0.4" -crc32fast = "1.4" -crc32c = "0.6" +chrono = { version = "0.4", features = ["serde"] } +crc-fast = "1.6" crypto-common = "0.1" gethostname = "0.4" git-version = "0.3.4" @@ -66,6 +65,7 @@ nix = { version = "0.29", default-features = false, features = ["fs"] } nom = "7.1" parking_lot = "0.12" parse_duration = "2.1" +paste = "1.0" pin-project = "1.0.12" pnet_datalink = "0.34" rand = "0.8" @@ -95,12 +95,13 @@ fjall = "2.4" async-compression = { version = "0.4", features = ["tokio", "zstd"] } zstd = { version = "0.13", default-features = false } -quick-xml = { version = "0.26", features = [ "serialize" ] } +quick-xml = { version = "0.26", features = ["serialize"] } rmp-serde = "1.1.2" serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } serde_bytes = "0.11" serde_json = "1.0" toml = { version = "0.8", default-features = false, features = ["parse"] } +utoipa = { version = "5.3.1", features = ["chrono"] } # newer version requires rust edition 2021 k8s-openapi = { version = "0.21", features = ["v1_24"] } @@ -114,7 +115,7 @@ httpdate = "1.0" http-range = "0.1" http-body-util = "0.1" hyper = { version = "1.0", default-features = false } -hyper-util = { version = "0.1", features = [ "full" ] } +hyper-util = { version = "0.1", features = ["full"] } multer = "3.0" percent-encoding = "2.2" roxmltree = "0.19" @@ -122,11 +123,11 @@ url = "2.3" futures = "0.3" futures-util = "0.3" -tokio = { version = "1.0", default-features = false, features = ["net", "rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } +tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio-util = { version = "0.7", features = ["compat", "io"] } tokio-stream = { version = "0.1", features = ["net"] } -opentelemetry = { version = "0.17", features = [ "rt-tokio", "metrics", "trace" ] } +opentelemetry = { version = "0.17", features = ["rt-tokio", "metrics", "trace"] } opentelemetry-prometheus = "0.10" opentelemetry-otlp = "0.10" opentelemetry-contrib = "0.9" @@ -146,8 +147,12 @@ aws-smithy-runtime = { version = "1.8", default-features = false, features = ["t aws-sdk-config = { version = "1.62", default-features = false } aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] } +[profile.dev] +#lto = "thin" # disabled for now, adds 2-4 min to each CI build +lto = "off" + [profile.release] -lto = "thin" -codegen-units = 16 +lto = true +codegen-units = 1 opt-level = 3 -strip = "debuginfo" +strip = true diff --git a/doc/api/garage-admin-v0.html b/doc/api/garage-admin-v0.html index dbdd9e1c..7eb11f25 100644 --- a/doc/api/garage-admin-v0.html +++ b/doc/api/garage-admin-v0.html @@ -1,7 +1,7 @@ - Garage Adminstration API v0 + Garage administration API v0 diff --git a/doc/api/garage-admin-v0.yml b/doc/api/garage-admin-v0.yml index 83316d93..d2e05a42 100644 --- a/doc/api/garage-admin-v0.yml +++ b/doc/api/garage-admin-v0.yml @@ -3,10 +3,10 @@ info: version: v0.8.0 title: Garage Administration API v0+garage-v0.8.0 description: | - Administrate your Garage cluster programatically, including status, layout, keys, buckets, and maintainance tasks. - - *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionnaly, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* -paths: + Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks. + + *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* +paths: /status: get: tags: diff --git a/doc/api/garage-admin-v1.html b/doc/api/garage-admin-v1.html index 783d459e..e98306b8 100644 --- a/doc/api/garage-admin-v1.html +++ b/doc/api/garage-admin-v1.html @@ -1,7 +1,7 @@ - Garage Adminstration API v0 + Garage administration API v1 diff --git a/doc/api/garage-admin-v1.yml b/doc/api/garage-admin-v1.yml index a70dc97b..90465890 100644 --- a/doc/api/garage-admin-v1.yml +++ b/doc/api/garage-admin-v1.yml @@ -3,10 +3,10 @@ info: version: v0.9.0 title: Garage Administration API v0+garage-v0.9.0 description: | - Administrate your Garage cluster programatically, including status, layout, keys, buckets, and maintainance tasks. - - *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionnaly, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* -paths: + Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks. + + *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* +paths: /health: get: tags: @@ -440,7 +440,7 @@ paths: - "false" example: "true" required: false - description: "Wether or not the secret key should be returned in the response" + description: "Whether or not the secret key should be returned in the response" responses: '500': description: "The server can not handle your request. Check your connectivity with the rest of the cluster." diff --git a/doc/api/garage-admin-v2.html b/doc/api/garage-admin-v2.html new file mode 100644 index 00000000..b079e760 --- /dev/null +++ b/doc/api/garage-admin-v2.html @@ -0,0 +1,24 @@ + + + + Garage administration API v2 + + + + + + + + + + + + + diff --git a/doc/api/garage-admin-v2.json b/doc/api/garage-admin-v2.json new file mode 100644 index 00000000..15059ce4 --- /dev/null +++ b/doc/api/garage-admin-v2.json @@ -0,0 +1,4430 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Garage administration API", + "description": "Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks.\n\n*Disclaimer: This API may change in future Garage versions. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is early stage and can contain bugs, so be careful and please report any issues on our issue tracker.*", + "contact": { + "name": "The Garage team", + "url": "https://garagehq.deuxfleurs.fr/", + "email": "garagehq@deuxfleurs.fr" + }, + "license": { + "name": "AGPL-3.0", + "identifier": "AGPL-3.0" + }, + "version": "v2.2.0" + }, + "servers": [ + { + "url": "http://localhost:3903/", + "description": "A local server" + } + ], + "paths": { + "/check": { + "get": { + "tags": [ + "Special endpoints" + ], + "description": "\nStatic website domain name check. Checks whether a bucket is configured to serve\na static website for the requested domain. This is used by reverse proxies such\nas Caddy or Tricot, to avoid requesting TLS certificates for domain names that\ndo not correspond to an actual website.\n ", + "operationId": "CheckDomain", + "parameters": [ + { + "name": "domain", + "in": "query", + "description": "The domain name to check for", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "The domain name redirects to a static website bucket" + }, + "400": { + "description": "No static website bucket exists for this domain" + } + }, + "security": [ + {} + ] + } + }, + "/health": { + "get": { + "tags": [ + "Special endpoints" + ], + "description": "\nCheck cluster health. The status code returned by this function indicates\nwhether this Garage daemon can answer API requests.\nGarage will return `200 OK` even if some storage nodes are disconnected,\nas long as it is able to have a quorum of nodes for read and write operations.\n ", + "operationId": "Health", + "responses": { + "200": { + "description": "Garage is able to answer requests" + }, + "503": { + "description": "This Garage daemon is not able to handle requests" + } + }, + "security": [ + {} + ] + } + }, + "/metrics": { + "get": { + "tags": [ + "Special endpoints" + ], + "description": "Prometheus metrics endpoint", + "operationId": "Metrics", + "responses": { + "200": { + "description": "Garage daemon metrics exported in Prometheus format" + } + }, + "security": [ + {}, + { + "bearerAuth": [] + } + ] + } + }, + "/v2/AddBucketAlias": { + "post": { + "tags": [ + "Bucket alias" + ], + "description": "Add an alias for the target bucket. This can be either a global or a local alias, depending on which fields are specified.", + "operationId": "AddBucketAlias", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BucketAliasEnum" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AddBucketAliasResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/AllowBucketKey": { + "post": { + "tags": [ + "Permission" + ], + "description": "\n⚠️ **DISCLAIMER**: Garage's developers are aware that this endpoint has an unconventional semantic. Be extra careful when implementing it, its behavior is not obvious.\n\nAllows a key to do read/write/owner operations on a bucket.\n\nFlags in permissions which have the value true will be activated. Other flags will remain unchanged (ie. they will keep their internal value).\n\nFor example, if you set read to true, the key will be allowed to read the bucket.\nIf you set it to false, the key will keeps its previous read permission.\nIf you want to disallow read for the key, check the DenyBucketKey operation.\n ", + "operationId": "AllowBucketKey", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AllowBucketKeyRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AllowBucketKeyResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ApplyClusterLayout": { + "post": { + "tags": [ + "Cluster layout" + ], + "description": "\nApplies to the cluster the layout changes currently registered as staged layout changes.\n\n*Note: do not try to parse the `message` field of the response, it is given as an array of string specifically because its format is not stable.*\n ", + "operationId": "ApplyClusterLayout", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApplyClusterLayoutRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "The updated cluster layout has been applied in the cluster", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApplyClusterLayoutResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/CleanupIncompleteUploads": { + "post": { + "tags": [ + "Bucket" + ], + "description": "Removes all incomplete multipart uploads that are older than the specified number of seconds.", + "operationId": "CleanupIncompleteUploads", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupIncompleteUploadsRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "The bucket was cleaned up successfully", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupIncompleteUploadsResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ClusterLayoutSkipDeadNodes": { + "post": { + "tags": [ + "Cluster layout" + ], + "description": "Force progress in layout update trackers", + "operationId": "ClusterLayoutSkipDeadNodes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClusterLayoutSkipDeadNodesRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Request has been taken into account", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClusterLayoutSkipDeadNodesResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ConnectClusterNodes": { + "post": { + "tags": [ + "Cluster" + ], + "description": "Instructs this Garage node to connect to other Garage nodes at specified `@`. `node_id` is generated automatically on node start.", + "operationId": "ConnectClusterNodes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ConnectClusterNodesRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "The request has been handled correctly but it does not mean that all connection requests succeeded; some might have fail, you need to check the body!", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ConnectClusterNodesResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/CreateAdminToken": { + "post": { + "tags": [ + "Admin API token" + ], + "description": "Creates a new admin API token", + "operationId": "CreateAdminToken", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateAdminTokenRequestBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Admin token has been created", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateAdminTokenResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/CreateBucket": { + "post": { + "tags": [ + "Bucket" + ], + "description": "\nCreates a new bucket, either with a global alias, a local one, or no alias at all.\nTechnically, you can also specify both `globalAlias` and `localAlias` and that would create two aliases.\n ", + "operationId": "CreateBucket", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBucketRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBucketResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/CreateKey": { + "post": { + "tags": [ + "Access key" + ], + "description": "Creates a new API access key.", + "operationId": "CreateKey", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateKeyRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Access key has been created", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateKeyResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/CreateMetadataSnapshot": { + "post": { + "tags": [ + "Node" + ], + "description": "\nInstruct one or several nodes to take a snapshot of their metadata databases.\n ", + "operationId": "CreateMetadataSnapshot", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalCreateMetadataSnapshotResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/DeleteAdminToken": { + "post": { + "tags": [ + "Admin API token" + ], + "description": "Delete an admin API token from the cluster, revoking all its permissions.", + "operationId": "DeleteAdminToken", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Admin API token ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Admin token has been deleted" + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/DeleteBucket": { + "post": { + "tags": [ + "Bucket" + ], + "description": "\nDeletes a storage bucket. A bucket cannot be deleted if it is not empty.\n\n**Warning:** this will delete all aliases associated with the bucket!\n ", + "operationId": "DeleteBucket", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "ID of the bucket to delete", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Bucket has been deleted" + }, + "400": { + "description": "Bucket is not empty" + }, + "404": { + "description": "Bucket not found" + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/DeleteKey": { + "post": { + "tags": [ + "Access key" + ], + "description": "Delete a key from the cluster. Its access will be removed from all the buckets. Buckets are not automatically deleted and can be dangling. You should manually delete them before. ", + "operationId": "DeleteKey", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Access key ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Access key has been deleted" + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/DenyBucketKey": { + "post": { + "tags": [ + "Permission" + ], + "description": "\n⚠️ **DISCLAIMER**: Garage's developers are aware that this endpoint has an unconventional semantic. Be extra careful when implementing it, its behavior is not obvious.\n\nDenies a key from doing read/write/owner operations on a bucket.\n\nFlags in permissions which have the value true will be deactivated. Other flags will remain unchanged.\n\nFor example, if you set read to true, the key will be denied from reading.\nIf you set read to false, the key will keep its previous permissions.\nIf you want the key to have the reading permission, check the AllowBucketKey operation.\n ", + "operationId": "DenyBucketKey", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DenyBucketKeyRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DenyBucketKeyResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetAdminTokenInfo": { + "get": { + "tags": [ + "Admin API token" + ], + "description": "\nReturn information about a specific admin API token.\nYou can search by specifying the exact token identifier (`id`) or by specifying a pattern (`search`).\n ", + "operationId": "GetAdminTokenInfo", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Admin API token ID", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "search", + "in": "query", + "description": "Partial token ID or name to search for", + "required": false, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Information about the admin token", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetAdminTokenInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetBlockInfo": { + "post": { + "tags": [ + "Block" + ], + "description": "\nGet detailed information about a data block stored on a Garage node, including all object versions and in-progress multipart uploads that contain a reference to this block.\n ", + "operationId": "GetBlockInfo", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalGetBlockInfoRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Detailed block information", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalGetBlockInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetBucketInfo": { + "get": { + "tags": [ + "Bucket" + ], + "description": "\nGiven a bucket identifier (`id`) or a global alias (`alias`), get its information.\nIt includes its aliases, its web configuration, keys that have some permissions\non it, some statistics (number of objects, size), number of dangling multipart uploads,\nand its quotas (if any).\n ", + "operationId": "GetBucketInfo", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Exact bucket ID to look up", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "globalAlias", + "in": "query", + "description": "Global alias of bucket to look up", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "search", + "in": "query", + "description": "Partial ID or alias to search for", + "required": false, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetClusterHealth": { + "get": { + "tags": [ + "Cluster" + ], + "description": "Returns the global status of the cluster, the number of connected nodes (over the number of known ones), the number of healthy storage nodes (over the declared ones), and the number of healthy partitions (over the total).", + "operationId": "GetClusterHealth", + "responses": { + "200": { + "description": "Cluster health report", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetClusterHealthResponse" + } + } + } + } + } + } + }, + "/v2/GetClusterLayout": { + "get": { + "tags": [ + "Cluster layout" + ], + "description": "\nReturns the cluster's current layout, including:\n\n- Currently configured cluster layout\n- Staged changes to the cluster layout\n\n*Capacity is given in bytes*\n ", + "operationId": "GetClusterLayout", + "responses": { + "200": { + "description": "Current cluster layout", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetClusterLayoutResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetClusterLayoutHistory": { + "get": { + "tags": [ + "Cluster layout" + ], + "description": "\nReturns the history of layouts in the cluster\n ", + "operationId": "GetClusterLayoutHistory", + "responses": { + "200": { + "description": "Cluster layout history", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetClusterLayoutHistoryResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetClusterStatistics": { + "get": { + "tags": [ + "Cluster" + ], + "description": "\nFetch global cluster statistics.\n\n*Note: do not try to parse the `freeform` field of the response, it is given as a string specifically because its format is not stable.*\n ", + "operationId": "GetClusterStatistics", + "responses": { + "200": { + "description": "Global cluster statistics", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetClusterStatisticsResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetClusterStatus": { + "get": { + "tags": [ + "Cluster" + ], + "description": "\nReturns the cluster's current status, including:\n\n- ID of the node being queried and its version of the Garage daemon\n- Live nodes\n- Currently configured cluster layout\n- Staged changes to the cluster layout\n\n*Capacity is given in bytes*\n ", + "operationId": "GetClusterStatus", + "responses": { + "200": { + "description": "Cluster status report", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetClusterStatusResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetCurrentAdminTokenInfo": { + "get": { + "tags": [ + "Admin API token" + ], + "description": "\nReturn information about the calling admin API token.\n ", + "operationId": "GetCurrentAdminTokenInfo", + "responses": { + "200": { + "description": "Information about the admin token", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetCurrentAdminTokenInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetKeyInfo": { + "get": { + "tags": [ + "Access key" + ], + "description": "\nReturn information about a specific key like its identifiers, its permissions and buckets on which it has permissions.\nYou can search by specifying the exact key identifier (`id`) or by specifying a pattern (`search`).\n\nFor confidentiality reasons, the secret key is not returned by default: you must pass the `showSecretKey` query parameter to get it.\n ", + "operationId": "GetKeyInfo", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Access key ID", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "search", + "in": "query", + "description": "Partial key ID or name to search for", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "showSecretKey", + "in": "query", + "description": "Whether to return the secret access key", + "required": false, + "schema": { + "type": "boolean" + } + } + ], + "responses": { + "200": { + "description": "Information about the access key", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetKeyInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetNodeInfo": { + "get": { + "tags": [ + "Node" + ], + "description": "\nReturn information about the Garage daemon running on one or several nodes.\n ", + "operationId": "GetNodeInfo", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalGetNodeInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetNodeStatistics": { + "get": { + "tags": [ + "Node" + ], + "description": "\nFetch statistics for one or several Garage nodes.\n\n*Note: do not try to parse the `freeform` field of the response, it is given as a string specifically because its format is not stable.*\n ", + "operationId": "GetNodeStatistics", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalGetNodeStatisticsResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetWorkerInfo": { + "post": { + "tags": [ + "Worker" + ], + "description": "\nGet information about the specified background worker on one or several cluster nodes.\n ", + "operationId": "GetWorkerInfo", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalGetWorkerInfoRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalGetWorkerInfoResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/GetWorkerVariable": { + "post": { + "tags": [ + "Worker" + ], + "description": "\nFetch values of one or several worker variables, from one or several cluster nodes.\n ", + "operationId": "GetWorkerVariable", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalGetWorkerVariableRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalGetWorkerVariableResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ImportKey": { + "post": { + "tags": [ + "Access key" + ], + "description": "\nImports an existing API key. This feature must only be used for migrations and backup restore.\n\n**Do not use it to generate custom key identifiers or you will break your Garage cluster.**\n ", + "operationId": "ImportKey", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ImportKeyRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Access key has been imported", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ImportKeyResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/InspectObject": { + "get": { + "tags": [ + "Bucket" + ], + "description": "\nReturns detailed information about an object in a bucket, including its internal state in Garage.\n\nThis API call can be used to list the data blocks referenced by an object,\nas well as to view metadata associated to the object.\n\nThis call may return a list of more than one version for the object, for instance in the\ncase where there is a currently stored version of the object, and a newer version whose\nupload is in progress and not yet finished.\n ", + "operationId": "InspectObject", + "parameters": [ + { + "name": "bucketId", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "key", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Returns exhaustive information about the object", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InspectObjectResponse" + } + } + } + }, + "404": { + "description": "Object not found" + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/LaunchRepairOperation": { + "post": { + "tags": [ + "Node" + ], + "description": "\nLaunch a repair operation on one or several cluster nodes.\n ", + "operationId": "LaunchRepairOperation", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalLaunchRepairOperationRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalLaunchRepairOperationResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ListAdminTokens": { + "get": { + "tags": [ + "Admin API token" + ], + "description": "Returns all admin API tokens in the cluster.", + "operationId": "ListAdminTokens", + "responses": { + "200": { + "description": "Returns info about all admin API tokens", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListAdminTokensResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ListBlockErrors": { + "get": { + "tags": [ + "Block" + ], + "description": "\nList data blocks that are currently in an errored state on one or several Garage nodes.\n ", + "operationId": "ListBlockErrors", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalListBlockErrorsResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ListBuckets": { + "get": { + "tags": [ + "Bucket" + ], + "description": "List all the buckets on the cluster with their UUID and their global and local aliases.", + "operationId": "ListBuckets", + "responses": { + "200": { + "description": "Returns the UUID of all the buckets and all their aliases", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBucketsResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ListKeys": { + "get": { + "tags": [ + "Access key" + ], + "description": "Returns all API access keys in the cluster.", + "operationId": "ListKeys", + "responses": { + "200": { + "description": "Returns the key identifier (aka `AWS_ACCESS_KEY_ID`) and its associated, human friendly, name if any (otherwise return an empty string)", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListKeysResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/ListWorkers": { + "post": { + "tags": [ + "Worker" + ], + "description": "\nList background workers currently running on one or several cluster nodes.\n ", + "operationId": "ListWorkers", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalListWorkersRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalListWorkersResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/PreviewClusterLayoutChanges": { + "post": { + "tags": [ + "Cluster layout" + ], + "description": "\nComputes a new layout taking into account the staged parameters, and returns it with detailed statistics. The new layout is not applied in the cluster.\n\n*Note: do not try to parse the `message` field of the response, it is given as an array of string specifically because its format is not stable.*\n ", + "operationId": "PreviewClusterLayoutChanges", + "responses": { + "200": { + "description": "Information about the new layout", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PreviewClusterLayoutChangesResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/PurgeBlocks": { + "post": { + "tags": [ + "Block" + ], + "description": "\nPurge references to one or several missing data blocks.\n\nThis will remove all objects and in-progress multipart uploads that contain the specified data block(s). The objects will be permanently deleted from the buckets in which they appear. Use with caution.\n ", + "operationId": "PurgeBlocks", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalPurgeBlocksRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalPurgeBlocksResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/RemoveBucketAlias": { + "post": { + "tags": [ + "Bucket alias" + ], + "description": "Remove an alias for the target bucket. This can be either a global or a local alias, depending on which fields are specified.", + "operationId": "RemoveBucketAlias", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BucketAliasEnum" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Returns exhaustive information about the bucket", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RemoveBucketAliasResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/RetryBlockResync": { + "post": { + "tags": [ + "Block" + ], + "description": "\nInstruct Garage node(s) to retry the resynchronization of one or several missing data block(s).\n ", + "operationId": "RetryBlockResync", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalRetryBlockResyncRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalRetryBlockResyncResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/RevertClusterLayout": { + "post": { + "tags": [ + "Cluster layout" + ], + "description": "Clear staged layout changes", + "operationId": "RevertClusterLayout", + "responses": { + "200": { + "description": "All pending changes to the cluster layout have been erased", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RevertClusterLayoutResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/SetWorkerVariable": { + "post": { + "tags": [ + "Worker" + ], + "description": "\nSet the value for a worker variable, on one or several cluster nodes.\n ", + "operationId": "SetWorkerVariable", + "parameters": [ + { + "name": "node", + "in": "query", + "description": "Node ID to query, or `*` for all nodes, or `self` for the node responding to the request", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LocalSetWorkerVariableRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Responses from individual cluster nodes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MultiResponse_LocalSetWorkerVariableResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/UpdateAdminToken": { + "post": { + "tags": [ + "Admin API token" + ], + "description": "\nUpdates information about the specified admin API token.\n ", + "operationId": "UpdateAdminToken", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Admin API token ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateAdminTokenRequestBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Admin token has been updated", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateAdminTokenResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/UpdateBucket": { + "post": { + "tags": [ + "Bucket" + ], + "description": "\nAll fields (`websiteAccess` and `quotas`) are optional.\nIf they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.\n\nIn `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.\nThe field `errorDocument` is optional, if no error document is set a generic\nerror message is displayed when errors happen. Conversely, if `enabled` is\n`false`, neither `indexDocument` nor `errorDocument` must be specified.\n\nIn `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`\nto remove the quotas. An absent value will be considered the same as a `null`. It is not possible\nto change only one of the two quotas.\n ", + "operationId": "UpdateBucket", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "ID of the bucket to update", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateBucketRequestBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Bucket has been updated", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateBucketResponse" + } + } + } + }, + "404": { + "description": "Bucket not found" + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/UpdateClusterLayout": { + "post": { + "tags": [ + "Cluster layout" + ], + "description": "\nSend modifications to the cluster layout. These modifications will be included in the staged role changes, visible in subsequent calls of `GET /GetClusterHealth`. Once the set of staged changes is satisfactory, the user may call `POST /ApplyClusterLayout` to apply the changed changes, or `POST /RevertClusterLayout` to clear all of the staged changes in the layout.\n\nSetting the capacity to `null` will configure the node as a gateway.\nOtherwise, capacity must be now set in bytes (before Garage 0.9 it was arbitrary weights).\nFor example to declare 100GB, you must set `capacity: 100000000000`.\n\nGarage uses internally the International System of Units (SI), it assumes that 1kB = 1000 bytes, and displays storage as kB, MB, GB (and not KiB, MiB, GiB that assume 1KiB = 1024 bytes).\n ", + "operationId": "UpdateClusterLayout", + "requestBody": { + "description": "\nTo add a new node to the layout or to change the configuration of an existing node, simply set the values you want (`zone`, `capacity`, and `tags`).\nTo remove a node, simply pass the `remove: true` field.\nThis logic is represented in OpenAPI with a 'One Of' object.\n\nContrary to the CLI that may update only a subset of the fields capacity, zone and tags, when calling this API all of these values must be specified.\n ", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateClusterLayoutRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Proposed changes have been added to the list of pending changes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateClusterLayoutResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + }, + "/v2/UpdateKey": { + "post": { + "tags": [ + "Access key" + ], + "description": "\nUpdates information about the specified API access key.\n\n*Note: the secret key is not returned in the response, `null` is sent instead.*\n ", + "operationId": "UpdateKey", + "parameters": [ + { + "name": "id", + "in": "query", + "description": "Access key ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateKeyRequestBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Access key has been updated", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateKeyResponse" + } + } + } + }, + "500": { + "description": "Internal server error" + } + } + } + } + }, + "components": { + "schemas": { + "AddBucketAliasResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "AllowBucketKeyRequest": { + "$ref": "#/components/schemas/BucketKeyPermChangeRequest" + }, + "AllowBucketKeyResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "ApiBucketKeyPerm": { + "type": "object", + "properties": { + "owner": { + "type": "boolean" + }, + "read": { + "type": "boolean" + }, + "write": { + "type": "boolean" + } + } + }, + "ApiBucketQuotas": { + "type": "object", + "properties": { + "maxObjects": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "minimum": 0 + }, + "maxSize": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "minimum": 0 + } + } + }, + "ApplyClusterLayoutRequest": { + "type": "object", + "required": [ + "version" + ], + "properties": { + "version": { + "type": "integer", + "format": "int64", + "description": "As a safety measure, the new version number of the layout must\nbe specified here", + "minimum": 0 + } + } + }, + "ApplyClusterLayoutResponse": { + "type": "object", + "required": [ + "message", + "layout" + ], + "properties": { + "layout": { + "$ref": "#/components/schemas/GetClusterLayoutResponse", + "description": "Details about the new cluster layout" + }, + "message": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Plain-text information about the layout computation\n(do not try to parse this)" + } + } + }, + "BlockError": { + "type": "object", + "required": [ + "blockHash", + "refcount", + "errorCount", + "lastTrySecsAgo", + "nextTryInSecs" + ], + "properties": { + "blockHash": { + "type": "string" + }, + "errorCount": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "lastTrySecsAgo": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "nextTryInSecs": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "refcount": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "BlockVersion": { + "type": "object", + "required": [ + "versionId", + "refDeleted", + "versionDeleted", + "garbageCollected" + ], + "properties": { + "backlink": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/BlockVersionBacklink" + } + ] + }, + "garbageCollected": { + "type": "boolean" + }, + "refDeleted": { + "type": "boolean" + }, + "versionDeleted": { + "type": "boolean" + }, + "versionId": { + "type": "string" + } + } + }, + "BlockVersionBacklink": { + "oneOf": [ + { + "type": "object", + "required": [ + "object" + ], + "properties": { + "object": { + "type": "object", + "required": [ + "bucketId", + "key" + ], + "properties": { + "bucketId": { + "type": "string" + }, + "key": { + "type": "string" + } + } + } + } + }, + { + "type": "object", + "required": [ + "upload" + ], + "properties": { + "upload": { + "type": "object", + "required": [ + "uploadId", + "uploadDeleted", + "uploadGarbageCollected" + ], + "properties": { + "bucketId": { + "type": [ + "string", + "null" + ] + }, + "key": { + "type": [ + "string", + "null" + ] + }, + "uploadDeleted": { + "type": "boolean" + }, + "uploadGarbageCollected": { + "type": "boolean" + }, + "uploadId": { + "type": "string" + } + } + } + } + } + ] + }, + "BucketAliasEnum": { + "oneOf": [ + { + "type": "object", + "required": [ + "bucketId", + "globalAlias" + ], + "properties": { + "bucketId": { + "type": "string" + }, + "globalAlias": { + "type": "string" + } + } + }, + { + "type": "object", + "required": [ + "bucketId", + "localAlias", + "accessKeyId" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "bucketId": { + "type": "string" + }, + "localAlias": { + "type": "string" + } + } + } + ] + }, + "BucketKeyPermChangeRequest": { + "type": "object", + "required": [ + "bucketId", + "accessKeyId", + "permissions" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "bucketId": { + "type": "string" + }, + "permissions": { + "$ref": "#/components/schemas/ApiBucketKeyPerm" + } + } + }, + "BucketLocalAlias": { + "type": "object", + "required": [ + "accessKeyId", + "alias" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "alias": { + "type": "string" + } + } + }, + "CleanupIncompleteUploadsRequest": { + "type": "object", + "required": [ + "bucketId", + "olderThanSecs" + ], + "properties": { + "bucketId": { + "type": "string" + }, + "olderThanSecs": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "CleanupIncompleteUploadsResponse": { + "type": "object", + "required": [ + "uploadsDeleted" + ], + "properties": { + "uploadsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "ClusterLayoutSkipDeadNodesRequest": { + "type": "object", + "required": [ + "version", + "allowMissingData" + ], + "properties": { + "allowMissingData": { + "type": "boolean", + "description": "Allow the skip even if a quorum of nodes could not be found for\nthe data among the remaining nodes" + }, + "version": { + "type": "integer", + "format": "int64", + "description": "Version number of the layout to assume is currently up-to-date.\nThis will generally be the current layout version.", + "minimum": 0 + } + } + }, + "ClusterLayoutSkipDeadNodesResponse": { + "type": "object", + "required": [ + "ackUpdated", + "syncUpdated" + ], + "properties": { + "ackUpdated": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Nodes for which the ACK update tracker has been updated to `version`" + }, + "syncUpdated": { + "type": "array", + "items": { + "type": "string" + }, + "description": "If `allow_missing_data` is set,\nnodes for which the SYNC update tracker has been updated to `version`" + } + } + }, + "ClusterLayoutVersion": { + "type": "object", + "required": [ + "version", + "status", + "storageNodes", + "gatewayNodes" + ], + "properties": { + "gatewayNodes": { + "type": "integer", + "format": "int64", + "description": "Number of nodes with a gateway role in this layout version", + "minimum": 0 + }, + "status": { + "$ref": "#/components/schemas/ClusterLayoutVersionStatus", + "description": "Status of this layout version" + }, + "storageNodes": { + "type": "integer", + "format": "int64", + "description": "Number of nodes with an assigned storage capacity in this layout version", + "minimum": 0 + }, + "version": { + "type": "integer", + "format": "int64", + "description": "Version number of this layout version", + "minimum": 0 + } + } + }, + "ClusterLayoutVersionStatus": { + "type": "string", + "enum": [ + "Current", + "Draining", + "Historical" + ] + }, + "ConnectClusterNodesRequest": { + "type": "array", + "items": { + "type": "string" + } + }, + "ConnectClusterNodesResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ConnectNodeResponse" + } + }, + "ConnectNodeResponse": { + "type": "object", + "required": [ + "success" + ], + "properties": { + "error": { + "type": [ + "string", + "null" + ], + "description": "An error message if Garage did not manage to connect to this node" + }, + "success": { + "type": "boolean", + "description": "`true` if Garage managed to connect to this node" + } + } + }, + "CreateAdminTokenResponse": { + "allOf": [ + { + "$ref": "#/components/schemas/GetAdminTokenInfoResponse" + }, + { + "type": "object", + "required": [ + "secretToken" + ], + "properties": { + "secretToken": { + "type": "string", + "description": "The secret bearer token. **CAUTION:** This token will be shown only\nONCE, so this value MUST be remembered somewhere, or the token\nwill be unusable." + } + } + } + ] + }, + "CreateBucketLocalAlias": { + "type": "object", + "required": [ + "accessKeyId", + "alias" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "alias": { + "type": "string" + }, + "allow": { + "$ref": "#/components/schemas/ApiBucketKeyPerm" + } + } + }, + "CreateBucketRequest": { + "type": "object", + "properties": { + "globalAlias": { + "type": [ + "string", + "null" + ] + }, + "localAlias": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/CreateBucketLocalAlias" + } + ] + } + } + }, + "CreateBucketResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "CreateKeyRequest": { + "$ref": "#/components/schemas/UpdateKeyRequestBody" + }, + "CreateKeyResponse": { + "$ref": "#/components/schemas/GetKeyInfoResponse" + }, + "DenyBucketKeyRequest": { + "$ref": "#/components/schemas/BucketKeyPermChangeRequest" + }, + "DenyBucketKeyResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "FreeSpaceResp": { + "type": "object", + "required": [ + "available", + "total" + ], + "properties": { + "available": { + "type": "integer", + "format": "int64", + "description": "Number of bytes available", + "minimum": 0 + }, + "total": { + "type": "integer", + "format": "int64", + "description": "Total number of bytes", + "minimum": 0 + } + } + }, + "GetAdminTokenInfoResponse": { + "type": "object", + "required": [ + "name", + "expired", + "scope" + ], + "properties": { + "created": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Creation date" + }, + "expiration": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Expiration time and date, formatted according to RFC 3339" + }, + "expired": { + "type": "boolean", + "description": "Whether this admin token is expired already" + }, + "id": { + "type": [ + "string", + "null" + ], + "description": "Identifier of the admin token (which is also a prefix of the full bearer token)" + }, + "name": { + "type": "string", + "description": "Name of the admin API token" + }, + "scope": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Scope of the admin API token, a list of admin endpoint names (such as\n`GetClusterStatus`, etc), or the special value `*` to allow all\nadmin endpoints" + } + } + }, + "GetBucketInfoKey": { + "type": "object", + "required": [ + "accessKeyId", + "name", + "permissions", + "bucketLocalAliases" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "bucketLocalAliases": { + "type": "array", + "items": { + "type": "string" + } + }, + "name": { + "type": "string" + }, + "permissions": { + "$ref": "#/components/schemas/ApiBucketKeyPerm" + } + } + }, + "GetBucketInfoResponse": { + "type": "object", + "required": [ + "id", + "created", + "globalAliases", + "websiteAccess", + "keys", + "objects", + "bytes", + "unfinishedUploads", + "unfinishedMultipartUploads", + "unfinishedMultipartUploadParts", + "unfinishedMultipartUploadBytes", + "quotas" + ], + "properties": { + "bytes": { + "type": "integer", + "format": "int64", + "description": "Total number of bytes used by objects in this bucket" + }, + "created": { + "type": "string", + "format": "date-time", + "description": "Bucket creation date" + }, + "globalAliases": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of global aliases for this bucket" + }, + "id": { + "type": "string", + "description": "Identifier of the bucket" + }, + "keys": { + "type": "array", + "items": { + "$ref": "#/components/schemas/GetBucketInfoKey" + }, + "description": "List of access keys that have permissions granted on this bucket" + }, + "objects": { + "type": "integer", + "format": "int64", + "description": "Number of objects in this bucket" + }, + "quotas": { + "$ref": "#/components/schemas/ApiBucketQuotas", + "description": "Quotas that apply to this bucket" + }, + "unfinishedMultipartUploadBytes": { + "type": "integer", + "format": "int64", + "description": "Total number of bytes used by unfinished multipart uploads in this bucket" + }, + "unfinishedMultipartUploadParts": { + "type": "integer", + "format": "int64", + "description": "Number of parts in unfinished multipart uploads in this bucket" + }, + "unfinishedMultipartUploads": { + "type": "integer", + "format": "int64", + "description": "Number of unfinished multipart uploads in this bucket" + }, + "unfinishedUploads": { + "type": "integer", + "format": "int64", + "description": "Number of unfinished uploads in this bucket" + }, + "websiteAccess": { + "type": "boolean", + "description": "Whether website access is enabled for this bucket" + }, + "websiteConfig": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/GetBucketInfoWebsiteResponse", + "description": "Website configuration for this bucket" + } + ] + } + } + }, + "GetBucketInfoWebsiteResponse": { + "type": "object", + "required": [ + "indexDocument" + ], + "properties": { + "errorDocument": { + "type": [ + "string", + "null" + ] + }, + "indexDocument": { + "type": "string" + } + } + }, + "GetClusterHealthResponse": { + "type": "object", + "required": [ + "status", + "knownNodes", + "connectedNodes", + "storageNodes", + "storageNodesUp", + "partitions", + "partitionsQuorum", + "partitionsAllOk" + ], + "properties": { + "connectedNodes": { + "type": "integer", + "description": "the number of nodes this Garage node currently has an open connection to", + "minimum": 0 + }, + "knownNodes": { + "type": "integer", + "description": "the number of nodes this Garage node has had a TCP connection to since the daemon started", + "minimum": 0 + }, + "partitions": { + "type": "integer", + "description": "the total number of partitions of the data (currently always 256)", + "minimum": 0 + }, + "partitionsAllOk": { + "type": "integer", + "description": "the number of partitions for which we are connected to all storage nodes responsible of storing it", + "minimum": 0 + }, + "partitionsQuorum": { + "type": "integer", + "description": "the number of partitions for which a quorum of write nodes is available", + "minimum": 0 + }, + "status": { + "type": "string", + "description": "One of `healthy`, `degraded` or `unavailable`:\n- `healthy`: Garage node is connected to all storage nodes\n- `degraded`: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions\n- `unavailable`: a quorum of write nodes is not available for some partitions" + }, + "storageNodes": { + "type": "integer", + "description": "the number of storage nodes currently registered in the cluster layout", + "minimum": 0 + }, + "storageNodesUp": { + "type": "integer", + "description": "the number of storage nodes to which a connection is currently open", + "minimum": 0 + } + } + }, + "GetClusterLayoutHistoryResponse": { + "type": "object", + "required": [ + "currentVersion", + "minAck", + "versions" + ], + "properties": { + "currentVersion": { + "type": "integer", + "format": "int64", + "description": "The current version number of the cluster layout", + "minimum": 0 + }, + "minAck": { + "type": "integer", + "format": "int64", + "description": "All nodes in the cluster are aware of layout versions up to\nthis version number (at least)", + "minimum": 0 + }, + "updateTrackers": { + "type": [ + "object", + "null" + ], + "description": "Detailed update trackers for nodes (see\n`https://garagehq.deuxfleurs.fr/blog/2023-12-preserving-read-after-write-consistency/`)", + "additionalProperties": { + "$ref": "#/components/schemas/NodeUpdateTrackers" + }, + "propertyNames": { + "type": "string" + } + }, + "versions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ClusterLayoutVersion" + }, + "description": "Layout version history" + } + } + }, + "GetClusterLayoutResponse": { + "type": "object", + "required": [ + "version", + "roles", + "parameters", + "partitionSize", + "stagedRoleChanges" + ], + "properties": { + "parameters": { + "$ref": "#/components/schemas/LayoutParameters", + "description": "Layout parameters used when the current layout was computed" + }, + "partitionSize": { + "type": "integer", + "format": "int64", + "description": "The size, in bytes, of one Garage partition (= a shard)", + "minimum": 0 + }, + "roles": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LayoutNodeRole" + }, + "description": "List of nodes that currently have a role in the cluster layout" + }, + "stagedParameters": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/LayoutParameters", + "description": "Layout parameters to use when computing the next version of\nthe cluster layout" + } + ] + }, + "stagedRoleChanges": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NodeRoleChange" + }, + "description": "List of nodes that will have a new role or whose role will be\nremoved in the next version of the cluster layout" + }, + "version": { + "type": "integer", + "format": "int64", + "description": "The current version number of the cluster layout", + "minimum": 0 + } + } + }, + "GetClusterStatisticsResponse": { + "type": "object", + "required": [ + "freeform" + ], + "properties": { + "freeform": { + "type": "string" + } + } + }, + "GetClusterStatusResponse": { + "type": "object", + "required": [ + "layoutVersion", + "nodes" + ], + "properties": { + "layoutVersion": { + "type": "integer", + "format": "int64", + "description": "Current version number of the cluster layout", + "minimum": 0 + }, + "nodes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NodeResp" + }, + "description": "List of nodes that are either currently connected, part of the\ncurrent cluster layout, or part of an older cluster layout that\nis still active in the cluster (being drained)." + } + } + }, + "GetCurrentAdminTokenInfoResponse": { + "$ref": "#/components/schemas/GetAdminTokenInfoResponse" + }, + "GetKeyInfoResponse": { + "type": "object", + "required": [ + "accessKeyId", + "name", + "expired", + "permissions", + "buckets" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "buckets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/KeyInfoBucketResponse" + } + }, + "created": { + "type": [ + "string", + "null" + ], + "format": "date-time" + }, + "expiration": { + "type": [ + "string", + "null" + ], + "format": "date-time" + }, + "expired": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "permissions": { + "$ref": "#/components/schemas/KeyPerm" + }, + "secretAccessKey": { + "type": [ + "string", + "null" + ] + } + } + }, + "ImportKeyRequest": { + "type": "object", + "required": [ + "accessKeyId", + "secretAccessKey" + ], + "properties": { + "accessKeyId": { + "type": "string" + }, + "name": { + "type": [ + "string", + "null" + ] + }, + "secretAccessKey": { + "type": "string" + } + } + }, + "ImportKeyResponse": { + "$ref": "#/components/schemas/GetKeyInfoResponse" + }, + "InspectObjectBlock": { + "type": "object", + "required": [ + "partNumber", + "offset", + "hash", + "size" + ], + "properties": { + "hash": { + "type": "string", + "description": "Hash (blake2 sum) of the block's data" + }, + "offset": { + "type": "integer", + "format": "int64", + "description": "Offset of this block within the part", + "minimum": 0 + }, + "partNumber": { + "type": "integer", + "format": "int64", + "description": "Part number of the part containing this block, for multipart uploads", + "minimum": 0 + }, + "size": { + "type": "integer", + "format": "int64", + "description": "Length of the blocks's data", + "minimum": 0 + } + } + }, + "InspectObjectResponse": { + "type": "object", + "required": [ + "bucketId", + "key", + "versions" + ], + "properties": { + "bucketId": { + "type": "string", + "description": "ID of the bucket containing the inspected object" + }, + "key": { + "type": "string", + "description": "Key of the inspected object" + }, + "versions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/InspectObjectVersion" + }, + "description": "List of versions currently stored for this object" + } + } + }, + "InspectObjectVersion": { + "type": "object", + "required": [ + "uuid", + "timestamp", + "encrypted", + "uploading", + "aborted", + "deleteMarker", + "inline" + ], + "properties": { + "aborted": { + "type": "boolean", + "description": "Whether this is an aborted upload" + }, + "blocks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/InspectObjectBlock" + }, + "description": "List of data blocks for this object version" + }, + "deleteMarker": { + "type": "boolean", + "description": "Whether this version is a delete marker (a tombstone indicating that a previous version of\nthe object has been deleted)" + }, + "encrypted": { + "type": "boolean", + "description": "Whether this object version was created with SSE-C encryption" + }, + "etag": { + "type": [ + "string", + "null" + ], + "description": "Etag of this object version" + }, + "headers": { + "type": "array", + "items": { + "type": "array", + "items": false, + "prefixItems": [ + { + "type": "string" + }, + { + "type": "string" + } + ] + }, + "description": "Metadata (HTTP headers) associated with this object version" + }, + "inline": { + "type": "boolean", + "description": "Whether the object's data is stored inline (for small objects)" + }, + "size": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Size of the object, in bytes", + "minimum": 0 + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Creation timestamp of this object version" + }, + "uploading": { + "type": "boolean", + "description": "Whether this object version is still uploading" + }, + "uuid": { + "type": "string", + "description": "Version ID" + } + } + }, + "KeyInfoBucketResponse": { + "type": "object", + "required": [ + "id", + "globalAliases", + "localAliases", + "permissions" + ], + "properties": { + "globalAliases": { + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "type": "string" + }, + "localAliases": { + "type": "array", + "items": { + "type": "string" + } + }, + "permissions": { + "$ref": "#/components/schemas/ApiBucketKeyPerm" + } + } + }, + "KeyPerm": { + "type": "object", + "properties": { + "createBucket": { + "type": "boolean" + } + } + }, + "LayoutNodeRole": { + "type": "object", + "required": [ + "id", + "zone", + "tags" + ], + "properties": { + "capacity": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Capacity (in bytes) assigned by the cluster administrator,\nabsent for gateway nodes", + "minimum": 0 + }, + "id": { + "type": "string", + "description": "Identifier of the node" + }, + "storedPartitions": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Number of partitions stored on this node\n(a result of the layout computation)", + "minimum": 0 + }, + "tags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of tags assigned by the cluster administrator" + }, + "usableCapacity": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Capacity (in bytes) that is actually usable on this node in the current\nlayout, which is equal to `stored_partitions` × `partition_size`", + "minimum": 0 + }, + "zone": { + "type": "string", + "description": "Zone name assigned by the cluster administrator" + } + } + }, + "LayoutParameters": { + "type": "object", + "required": [ + "zoneRedundancy" + ], + "properties": { + "zoneRedundancy": { + "$ref": "#/components/schemas/ZoneRedundancy", + "description": "Minimum number of zones in which a data partition must be replicated" + } + } + }, + "ListAdminTokensResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/GetAdminTokenInfoResponse" + } + }, + "ListBucketsResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ListBucketsResponseItem" + } + }, + "ListBucketsResponseItem": { + "type": "object", + "required": [ + "id", + "created", + "globalAliases", + "localAliases" + ], + "properties": { + "created": { + "type": "string", + "format": "date-time" + }, + "globalAliases": { + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "type": "string" + }, + "localAliases": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BucketLocalAlias" + } + } + } + }, + "ListKeysResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ListKeysResponseItem" + } + }, + "ListKeysResponseItem": { + "type": "object", + "required": [ + "id", + "name", + "expired" + ], + "properties": { + "created": { + "type": [ + "string", + "null" + ], + "format": "date-time" + }, + "expiration": { + "type": [ + "string", + "null" + ], + "format": "date-time" + }, + "expired": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "LocalCreateMetadataSnapshotResponse": { + "default": null + }, + "LocalGetBlockInfoRequest": { + "type": "object", + "required": [ + "blockHash" + ], + "properties": { + "blockHash": { + "type": "string" + } + } + }, + "LocalGetBlockInfoResponse": { + "type": "object", + "required": [ + "blockHash", + "refcount", + "versions" + ], + "properties": { + "blockHash": { + "type": "string" + }, + "refcount": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "versions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockVersion" + } + } + } + }, + "LocalGetNodeInfoResponse": { + "type": "object", + "required": [ + "nodeId", + "garageVersion", + "rustVersion", + "dbEngine" + ], + "properties": { + "dbEngine": { + "type": "string" + }, + "garageFeatures": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, + "garageVersion": { + "type": "string" + }, + "nodeId": { + "type": "string" + }, + "rustVersion": { + "type": "string" + } + } + }, + "LocalGetNodeStatisticsResponse": { + "type": "object", + "required": [ + "freeform" + ], + "properties": { + "freeform": { + "type": "string" + } + } + }, + "LocalGetWorkerInfoRequest": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "LocalGetWorkerInfoResponse": { + "$ref": "#/components/schemas/WorkerInfoResp" + }, + "LocalGetWorkerVariableRequest": { + "type": "object", + "properties": { + "variable": { + "type": [ + "string", + "null" + ] + } + } + }, + "LocalGetWorkerVariableResponse": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "LocalLaunchRepairOperationRequest": { + "type": "object", + "required": [ + "repairType" + ], + "properties": { + "repairType": { + "$ref": "#/components/schemas/RepairType" + } + } + }, + "LocalLaunchRepairOperationResponse": { + "default": null + }, + "LocalListBlockErrorsResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockError" + } + }, + "LocalListWorkersRequest": { + "type": "object", + "properties": { + "busyOnly": { + "type": "boolean" + }, + "errorOnly": { + "type": "boolean" + } + } + }, + "LocalListWorkersResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/WorkerInfoResp" + } + }, + "LocalPurgeBlocksRequest": { + "type": "array", + "items": { + "type": "string" + } + }, + "LocalPurgeBlocksResponse": { + "type": "object", + "required": [ + "blocksPurged", + "objectsDeleted", + "uploadsDeleted", + "versionsDeleted", + "blockRefsPurged" + ], + "properties": { + "blockRefsPurged": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "blocksPurged": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "objectsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "uploadsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "versionsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "LocalRetryBlockResyncRequest": { + "oneOf": [ + { + "type": "object", + "required": [ + "all" + ], + "properties": { + "all": { + "type": "boolean" + } + } + }, + { + "type": "object", + "required": [ + "blockHashes" + ], + "properties": { + "blockHashes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + ] + }, + "LocalRetryBlockResyncResponse": { + "type": "object", + "required": [ + "count" + ], + "properties": { + "count": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "LocalSetWorkerVariableRequest": { + "type": "object", + "required": [ + "variable", + "value" + ], + "properties": { + "value": { + "type": "string" + }, + "variable": { + "type": "string" + } + } + }, + "LocalSetWorkerVariableResponse": { + "type": "object", + "required": [ + "variable", + "value" + ], + "properties": { + "value": { + "type": "string" + }, + "variable": { + "type": "string" + } + } + }, + "MultiResponse_LocalCreateMetadataSnapshotResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "default": null + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalGetBlockInfoResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "blockHash", + "refcount", + "versions" + ], + "properties": { + "blockHash": { + "type": "string" + }, + "refcount": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "versions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockVersion" + } + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalGetNodeInfoResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "nodeId", + "garageVersion", + "rustVersion", + "dbEngine" + ], + "properties": { + "dbEngine": { + "type": "string" + }, + "garageFeatures": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, + "garageVersion": { + "type": "string" + }, + "nodeId": { + "type": "string" + }, + "rustVersion": { + "type": "string" + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalGetNodeStatisticsResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "freeform" + ], + "properties": { + "freeform": { + "type": "string" + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalGetWorkerInfoResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "$ref": "#/components/schemas/WorkerInfoResp" + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalGetWorkerVariableResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalLaunchRepairOperationResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "default": null + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalListBlockErrorsResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockError" + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalListWorkersResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/WorkerInfoResp" + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalPurgeBlocksResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "blocksPurged", + "objectsDeleted", + "uploadsDeleted", + "versionsDeleted", + "blockRefsPurged" + ], + "properties": { + "blockRefsPurged": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "blocksPurged": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "objectsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "uploadsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "versionsDeleted": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalRetryBlockResyncResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "count" + ], + "properties": { + "count": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "MultiResponse_LocalSetWorkerVariableResponse": { + "type": "object", + "required": [ + "success", + "error" + ], + "properties": { + "error": { + "type": "object", + "description": "Map of node id to error message, for nodes that were unable to complete the API\ncall", + "additionalProperties": { + "type": "string" + }, + "propertyNames": { + "type": "string" + } + }, + "success": { + "type": "object", + "description": "Map of node id to response returned by this node, for nodes that were able to\nsuccessfully complete the API call", + "additionalProperties": { + "type": "object", + "required": [ + "variable", + "value" + ], + "properties": { + "value": { + "type": "string" + }, + "variable": { + "type": "string" + } + } + }, + "propertyNames": { + "type": "string" + } + } + } + }, + "NodeAssignedRole": { + "type": "object", + "required": [ + "zone", + "tags" + ], + "properties": { + "capacity": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Capacity (in bytes) assigned by the cluster administrator,\nabsent for gateway nodes", + "minimum": 0 + }, + "tags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of tags assigned by the cluster administrator" + }, + "zone": { + "type": "string", + "description": "Zone name assigned by the cluster administrator" + } + } + }, + "NodeResp": { + "type": "object", + "required": [ + "id", + "isUp", + "draining" + ], + "properties": { + "addr": { + "type": [ + "string", + "null" + ], + "description": "Socket address used by other nodes to connect to this node for RPC" + }, + "dataPartition": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/FreeSpaceResp", + "description": "Total and available space on the disk partition(s) containing the data\ndirectory(ies)" + } + ] + }, + "draining": { + "type": "boolean", + "description": "Whether this node is part of an older layout version and is draining data." + }, + "garageVersion": { + "type": [ + "string", + "null" + ], + "description": "Garage version" + }, + "hostname": { + "type": [ + "string", + "null" + ], + "description": "Hostname of the node" + }, + "id": { + "type": "string", + "description": "Full-length node identifier" + }, + "isUp": { + "type": "boolean", + "description": "Whether this node is connected in the cluster" + }, + "lastSeenSecsAgo": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "For disconnected nodes, the number of seconds since last contact,\nor `null` if no contact was established since Garage restarted.", + "minimum": 0 + }, + "metadataPartition": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/FreeSpaceResp", + "description": "Total and available space on the disk partition containing the\nmetadata directory" + } + ] + }, + "role": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/NodeAssignedRole", + "description": "Role assigned to this node in the current cluster layout" + } + ] + } + } + }, + "NodeRoleChange": { + "allOf": [ + { + "$ref": "#/components/schemas/NodeRoleChangeEnum" + }, + { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string", + "description": "ID of the node for which this change applies" + } + } + } + ] + }, + "NodeRoleChangeEnum": { + "oneOf": [ + { + "type": "object", + "required": [ + "remove" + ], + "properties": { + "remove": { + "type": "boolean", + "description": "Set `remove` to `true` to remove the node from the layout" + } + } + }, + { + "$ref": "#/components/schemas/NodeAssignedRole" + } + ] + }, + "NodeRoleChangeRequest": { + "oneOf": [ + { + "type": "object", + "required": [ + "id", + "remove" + ], + "properties": { + "id": { + "type": "string", + "description": "ID of the node for which this change applies" + }, + "remove": { + "type": "boolean", + "description": "Set `remove` to `true` to remove the node from the layout" + } + } + }, + { + "allOf": [ + { + "$ref": "#/components/schemas/NodeAssignedRole" + }, + { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string", + "description": "ID of the node for which this change applies" + } + } + } + ] + } + ] + }, + "NodeUpdateTrackers": { + "type": "object", + "required": [ + "ack", + "sync", + "syncAck" + ], + "properties": { + "ack": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "sync": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "syncAck": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "PreviewClusterLayoutChangesResponse": { + "oneOf": [ + { + "type": "object", + "required": [ + "error" + ], + "properties": { + "error": { + "type": "string", + "description": "Error message indicating that the layout could not be computed\nwith the provided configuration" + } + } + }, + { + "type": "object", + "required": [ + "message", + "newLayout" + ], + "properties": { + "message": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Plain-text information about the layout computation\n(do not try to parse this)" + }, + "newLayout": { + "$ref": "#/components/schemas/GetClusterLayoutResponse", + "description": "Details about the new cluster layout" + } + } + } + ] + }, + "RemoveBucketAliasResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "RepairType": { + "oneOf": [ + { + "type": "string", + "enum": [ + "tables" + ] + }, + { + "type": "string", + "enum": [ + "blocks" + ] + }, + { + "type": "string", + "enum": [ + "versions" + ] + }, + { + "type": "string", + "enum": [ + "multipartUploads" + ] + }, + { + "type": "string", + "enum": [ + "blockRefs" + ] + }, + { + "type": "string", + "enum": [ + "blockRc" + ] + }, + { + "type": "string", + "enum": [ + "rebalance" + ] + }, + { + "type": "object", + "required": [ + "scrub" + ], + "properties": { + "scrub": { + "$ref": "#/components/schemas/ScrubCommand" + } + } + }, + { + "type": "string", + "enum": [ + "aliases" + ] + }, + { + "type": "string", + "enum": [ + "clearResyncQueue" + ] + } + ] + }, + "RevertClusterLayoutResponse": { + "$ref": "#/components/schemas/GetClusterLayoutResponse" + }, + "ScrubCommand": { + "type": "string", + "enum": [ + "start", + "pause", + "resume", + "cancel" + ] + }, + "UpdateAdminTokenRequestBody": { + "type": "object", + "properties": { + "expiration": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Expiration time and date, formatted according to RFC 3339" + }, + "name": { + "type": [ + "string", + "null" + ], + "description": "Name of the admin API token" + }, + "neverExpires": { + "type": "boolean", + "description": "Set the admin token to never expire" + }, + "scope": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "description": "Scope of the admin API token, a list of admin endpoint names (such as\n`GetClusterStatus`, etc), or the special value `*` to allow all\nadmin endpoints. **WARNING:** Granting a scope of `CreateAdminToken` or\n`UpdateAdminToken` trivially allows for privilege escalation, and is thus\nfunctionnally equivalent to granting a scope of `*`." + } + } + }, + "UpdateAdminTokenResponse": { + "$ref": "#/components/schemas/GetAdminTokenInfoResponse" + }, + "UpdateBucketRequestBody": { + "type": "object", + "properties": { + "quotas": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/ApiBucketQuotas" + } + ] + }, + "websiteAccess": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/UpdateBucketWebsiteAccess" + } + ] + } + } + }, + "UpdateBucketResponse": { + "$ref": "#/components/schemas/GetBucketInfoResponse" + }, + "UpdateBucketWebsiteAccess": { + "type": "object", + "required": [ + "enabled" + ], + "properties": { + "enabled": { + "type": "boolean" + }, + "errorDocument": { + "type": [ + "string", + "null" + ] + }, + "indexDocument": { + "type": [ + "string", + "null" + ] + } + } + }, + "UpdateClusterLayoutRequest": { + "type": "object", + "properties": { + "parameters": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/LayoutParameters", + "description": "New layout computation parameters to use" + } + ] + }, + "roles": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NodeRoleChangeRequest" + }, + "description": "New node roles to assign or remove in the cluster layout" + } + } + }, + "UpdateClusterLayoutResponse": { + "$ref": "#/components/schemas/GetClusterLayoutResponse" + }, + "UpdateKeyRequestBody": { + "type": "object", + "properties": { + "allow": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/KeyPerm", + "description": "Permissions to allow for the key" + } + ] + }, + "deny": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/KeyPerm", + "description": "Permissions to deny for the key" + } + ] + }, + "expiration": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Expiration time and date, formatted according to RFC 3339" + }, + "name": { + "type": [ + "string", + "null" + ], + "description": "Name of the API key" + }, + "neverExpires": { + "type": "boolean", + "description": "Set the access key to never expire" + } + } + }, + "UpdateKeyResponse": { + "$ref": "#/components/schemas/GetKeyInfoResponse" + }, + "WorkerInfoResp": { + "type": "object", + "required": [ + "id", + "name", + "state", + "errors", + "consecutiveErrors", + "freeform" + ], + "properties": { + "consecutiveErrors": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "errors": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "freeform": { + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "lastError": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/WorkerLastError" + } + ] + }, + "name": { + "type": "string" + }, + "persistentErrors": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "minimum": 0 + }, + "progress": { + "type": [ + "string", + "null" + ] + }, + "queueLength": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "minimum": 0 + }, + "state": { + "$ref": "#/components/schemas/WorkerStateResp" + }, + "tranquility": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "minimum": 0 + } + } + }, + "WorkerLastError": { + "type": "object", + "required": [ + "message", + "secsAgo" + ], + "properties": { + "message": { + "type": "string" + }, + "secsAgo": { + "type": "integer", + "format": "int64", + "minimum": 0 + } + } + }, + "WorkerStateResp": { + "oneOf": [ + { + "type": "string", + "enum": [ + "busy" + ] + }, + { + "type": "object", + "required": [ + "throttled" + ], + "properties": { + "throttled": { + "type": "object", + "required": [ + "durationSecs" + ], + "properties": { + "durationSecs": { + "type": "number", + "format": "float" + } + } + } + } + }, + { + "type": "string", + "enum": [ + "idle" + ] + }, + { + "type": "string", + "enum": [ + "done" + ] + } + ] + }, + "ZoneRedundancy": { + "oneOf": [ + { + "type": "object", + "description": "Partitions must be replicated in at least this number of\ndistinct zones.", + "required": [ + "atLeast" + ], + "properties": { + "atLeast": { + "type": "integer", + "description": "Partitions must be replicated in at least this number of\ndistinct zones.", + "minimum": 0 + } + } + }, + { + "type": "string", + "description": "Partitions must be replicated in as many zones as possible:\nas many zones as there are replicas, if there are enough distinct\nzones, or at least one in each zone otherwise.", + "enum": [ + "maximum" + ] + } + ] + } + }, + "securitySchemes": { + "bearerAuth": { + "type": "http", + "scheme": "bearer" + } + } + }, + "security": [ + { + "bearerAuth": [] + } + ] +} diff --git a/doc/book/build/_index.md b/doc/book/build/_index.md index 021045aa..6a01ef57 100644 --- a/doc/book/build/_index.md +++ b/doc/book/build/_index.md @@ -51,4 +51,4 @@ We are currently building this SDK for [Python](@/documentation/build/python.md# More information: - [In the reference manual](@/documentation/reference-manual/admin-api.md) - - [Full specifiction](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) + - [Full specification](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) diff --git a/doc/book/build/others.md b/doc/book/build/others.md index 341e82d5..df055e79 100644 --- a/doc/book/build/others.md +++ b/doc/book/build/others.md @@ -5,13 +5,13 @@ weight = 99 ## S3 -If you are developping a new application, you may want to use Garage to store your user's media. +If you are developing a new application, you may want to use Garage to store your user's media. The S3 API that Garage uses is a standard REST API, so as long as you can make HTTP requests, you can query it. You can check the [S3 REST API Reference](https://docs.aws.amazon.com/AmazonS3/latest/API/API_Operations_Amazon_Simple_Storage_Service.html) from Amazon to learn more. -Developping your own wrapper around the REST API is time consuming and complicated. -Instead, there are some libraries already avalaible. +Developing your own wrapper around the REST API is time consuming and complicated. +Instead, there are some libraries already available. Some of them are maintained by Amazon, some by Minio, others by the community. diff --git a/doc/book/connect/_index.md b/doc/book/connect/_index.md index 7d8e686c..497f97a9 100644 --- a/doc/book/connect/_index.md +++ b/doc/book/connect/_index.md @@ -23,7 +23,7 @@ To configure S3-compatible software to interact with Garage, you will need the following parameters: - An **API endpoint**: this corresponds to the HTTP or HTTPS address - used to contact the Garage server. When runing Garage locally this will usually + used to contact the Garage server. When running Garage locally this will usually be `http://127.0.0.1:3900`. In a real-world setting, you would usually have a reverse-proxy that adds TLS support and makes your Garage server available under a public hostname such as `https://garage.example.com`. diff --git a/doc/book/connect/apps/index.md b/doc/book/connect/apps/index.md index f52d434b..627f1842 100644 --- a/doc/book/connect/apps/index.md +++ b/doc/book/connect/apps/index.md @@ -12,8 +12,9 @@ In this section, we cover the following web applications: | [Mastodon](#mastodon) | ✅ | Natively supported | | [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` | | [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` | -| [Pixelfed](#pixelfed) | ✅ | Natively supported | -| [Pleroma](#pleroma) | ❓ | Not yet tested | +| [Ente](#ente) | ✅ | Natively supported | +| [Pixelfed](#pixelfed) | ❓ | Natively supported | +| [Pleroma](#pleroma) | ✅ | Natively supported | | [Lemmy](#lemmy) | ✅ | Supported with pict-rs | | [Funkwhale](#funkwhale) | ❓ | Not yet tested | | [Misskey](#misskey) | ❓ | Not yet tested | @@ -53,7 +54,7 @@ garage bucket allow nextcloud --read --write --key nextcloud-key Now edit your Nextcloud configuration file to enable object storage. On my installation, the config. file is located at the following path: `/var/www/nextcloud/config/config.php`. -We will add a new root key to the `$CONFIG` dictionnary named `objectstore`: +We will add a new root key to the `$CONFIG` dictionary named `objectstore`: ```php + user: + password: + +s3: + # Override the primary and secondary hot storage. The commented out values + # are the defaults. + # + hot_storage: + primary: b2-eu-cen + # secondary: wasabi-eu-central-2-v3 + + # If true, enable some workarounds to allow us to use a local minio instance + # for object storage. + # + # 1. Disable SSL. + # 2. Use "path" style S3 URLs (see `use_path_style_urls` below). + # 3. Directly download the file during replication instead of going via the + # Cloudflare worker. + # 4. Do not specify storage classes when uploading objects (since minio does + # not support them, specifically it doesn't support GLACIER). + are_local_buckets: true + + # To use "path" style S3 URLs instead of DNS-based bucket access + # default to true if you set "are_local_buckets: true" + # use_path_style_urls: true + + b2-eu-cen: # Don't change this key, it is hardcoded + key: + secret: + endpoint: garage:3900 # publicly accessible endpoint of your garage instance + region: garage + bucket: + use_path_style: true + # you can specify secondary locations, names are hardcoded as well + # wasabi-eu-central-2-v3: + # scw-eu-fr-v3: + + # and you can also specify a bucket to be used for embeddings, preview etc.. + # default to the first bucket + # derived-storage: wasabi-eu-central-2-derived +``` + +Finally you can run it with Docker : + +```bash +docker run -d --name ente-server --restart unless-stopped -v /path/to/museum.yaml:/museum.yaml -v /path/to/credentials.yaml:/credentials.yaml -p 8080:8080 ghcr.io/ente-io/ente-server +``` + +For more information on deployment you can check the [ente documentation](https://help.ente.io/self-hosting/) + ## Pixelfed [Pixelfed Technical Documentation > Configuration](https://docs.pixelfed.org/technical-documentation/env.html#filesystem) ## Pleroma -[Pleroma Documentation > Pleroma.Uploaders.S3](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#pleromauploaderss3) +### Creating your bucket + +This is the usual Garage setup: + +```bash +garage key new --name pleroma-key +garage bucket create pleroma +garage bucket allow pleroma --read --write --owner --key pleroma-key +``` + +We also need to expose these buckets publicly to serve their content to users: + +```bash +garage bucket website --allow pleroma +``` + +Note the Key ID and Secret Key. + +### Configure Pleroma + +Update your Pleroma configuration like that in `/etc/pleroma/config.exs`. + +``` +config :pleroma, Pleroma.Upload, + uploader: Pleroma.Uploaders.S3, + base_url: "https://pleroma.garage.example.tld" + +config :ex_aws, :s3, + access_key_id: "GW...", + secret_access_key: "XXX", + region: "garage", + host: "api.garage.example.tld" +``` + +And restart Pleroma. + +You can found more information in [Pleroma Documentation > Pleroma.Uploaders.S3](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#pleromauploaderss3) + +### Migrating your data + +Pleroma have an internal migration tool that can encounter some fatal error + +``` +** (EXIT from #PID<0.98.0>) an exception was raised: + ** (File.Error) could not stream "/var/lib/pleroma/uploads/09/f8": illegal operation on a directory + (elixir 1.17.3) lib/file/stream.ex:100: anonymous fn/3 in Enumerable.File.Stream.reduce/3 + (elixir 1.17.3) lib/stream.ex:1675: anonymous fn/5 in Stream.resource/3 + (elixir 1.17.3) lib/stream.ex:1891: Enumerable.Stream.do_each/4 + (elixir 1.17.3) lib/task/supervised.ex:370: Task.Supervised.stream_reduce/7 + (elixir 1.17.3) lib/enum.ex:4423: Enum.map/2 + (ex_aws_s3 2.5.8) lib/ex_aws/s3/upload.ex:141: ExAws.Operation.ExAws.S3.Upload.perform/2 + (pleroma 2.10.0) lib/pleroma/uploaders/s3.ex:60: Pleroma.Uploaders.S3.put_file/1 + (pleroma 2.10.0) lib/pleroma/uploaders/uploader.ex:49: Pleroma.Uploaders.Uploader.put_file/2 +``` + +So, use [your best tool](https://garagehq.deuxfleurs.fr/documentation/connect/cli/) to sync `/var/lib/pleroma/uploads/` in your S3. + +Then, to avoid some non existent problem (just in case of), run this command + +```bash +while true +do + rm -vr $(./bin/pleroma_ctl uploads migrate_local S3 2>&1 | grep "could not stream" | awk -F '"' '{print $2}') + sleep 5 +done +``` + +If you have many files, stop this command sometime and the command bellow (interactive) to delete local +file after upload. Then restart the loop. + +```bash +./bin/pleroma_ctl uploads migrate_local S3 --delete +``` + +And *voilà* ## Lemmy diff --git a/doc/book/connect/backup.md b/doc/book/connect/backup.md index 7e97d777..dba6900d 100644 --- a/doc/book/connect/backup.md +++ b/doc/book/connect/backup.md @@ -207,3 +207,13 @@ $ plakar at @garageS3 ls ``` More information in Plakar documentation: https://www.plakar.io/docs/main/quickstart/ + +## Synology HyperBackup + +HyperBackup can be configured to upload backups to garage using a custom S3 destination. However, the HyperBackup client hardcodes the `us-east-1` region that is a critical input to the v4 signature process. If garage is not set to `us-east-1`, HyperBackup will recognize available buckets, but fail during the final setup stage. + +In garage.toml: +```toml +[s3_api] +s3_region = "us-east-1" +``` diff --git a/doc/book/connect/cli.md b/doc/book/connect/cli.md index 6529e4b2..f52cc205 100644 --- a/doc/book/connect/cli.md +++ b/doc/book/connect/cli.md @@ -41,7 +41,7 @@ Some commands: # list buckets mc ls garage/ -# list objets in a bucket +# list objects in a bucket mc ls garage/my_files # copy from your filesystem to garage @@ -149,6 +149,15 @@ rclone help This will tremendously accelerate operations such as `rclone sync` or `rclone ncdu` by reducing the number of ListObjects calls that are made. +**Garage behind Cloudflare proxy:** when running Garage behind Cloudflare proxy, you might see `Response: error 403 Forbidden, Forbidden: Invalid signature` error in your garage logs or `AccessDenied: Forbidden: Invalid signature` error in rclone logs. Try adding `--s3-sign-accept-encoding=false` flag to your rclone command and see if the issue is resolved. + +```bash +# this throws an error +rclone lsd garage: + +# this should work +rclone lsd --s3-sign-accept-encoding=false garage: +``` ## `s3cmd` @@ -209,7 +218,7 @@ Within Cyberduck, a available within the `Preferences -> Profiles` section. This can enabled and then connections to Garage may be configured. -### Instuctions for the CLI +### Instructions for the CLI To configure duck (Cyberduck's CLI tool), start by creating its folder hierarchy: @@ -314,4 +323,3 @@ ls ``` And through the web interface at http://[::1]:8080/web/client - diff --git a/doc/book/connect/repositories.md b/doc/book/connect/repositories.md index 537b02e7..0ae79b1e 100644 --- a/doc/book/connect/repositories.md +++ b/doc/book/connect/repositories.md @@ -201,11 +201,9 @@ on the binary cache, the client will download the result from the cache instead ### Channels -Channels additionnaly serve Nix definitions, ie. a `.nix` file referencing +Channels additionally serve Nix definitions, ie. a `.nix` file referencing all the derivations you want to serve. ## Gitlab *External link:* [Gitlab Documentation > Object storage](https://docs.gitlab.com/ee/administration/object_storage.html) - - diff --git a/doc/book/cookbook/ansible.md b/doc/book/cookbook/ansible.md index 8b0d2969..8d86a7d1 100644 --- a/doc/book/cookbook/ansible.md +++ b/doc/book/cookbook/ansible.md @@ -8,12 +8,12 @@ have published Ansible roles. We list them and compare them below. ## Comparison of Ansible roles -| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster ansible-role-garage](#eddster-ansible-role-garage) | +| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster2309 ansible-role-garage](#eddster2309-ansible-role-garage) | |------------------------------------|---------------------------------------------|---------------------------------------------------------------|---------------------------------| | **Runtime** | Systemd | Docker | Systemd | | **Target OS** | Any Linux | Any Linux | Any Linux | | **Architecture** | amd64, arm64, i686 | amd64, arm64 | arm64, arm, 386, amd64 | -| **Additional software** | None | Traefik | Ngnix and Keepalived (optional) | +| **Additional software** | None | Traefik | Nginx and Keepalived (optional) | | **Automatic node connection** | ❌ | ✅ | ✅ | | **Layout management** | ❌ | ✅ | ✅ | | **Manage buckets & keys** | ❌ | ✅ (basic) | ✅ | diff --git a/doc/book/cookbook/binary-packages.md b/doc/book/cookbook/binary-packages.md index ce6beb7b..1e399764 100644 --- a/doc/book/cookbook/binary-packages.md +++ b/doc/book/cookbook/binary-packages.md @@ -29,6 +29,10 @@ it's stable). Garage is available in the official repositories under [extra](https://archlinux.org/packages/extra/x86_64/garage). +```bash +pacman -S garage +``` + ## FreeBSD ```bash @@ -40,3 +44,9 @@ pkg install garage ```bash nix-shell -p garage ``` + +## conda-forge + +```bash +pixi global install garage +``` diff --git a/doc/book/cookbook/encryption.md b/doc/book/cookbook/encryption.md index bfbea0ec..13da4bd7 100644 --- a/doc/book/cookbook/encryption.md +++ b/doc/book/cookbook/encryption.md @@ -33,7 +33,7 @@ by adding encryption at different levels. We would be very curious to know your needs and thougs about ideas such as encryption practices and things like key management, as we want Garage to be a -serious base platform for the developpment of secure, encrypted applications. +serious base platform for the development of secure, encrypted applications. Do not hesitate to come talk to us if you have any thoughts or questions on the subject. @@ -59,7 +59,7 @@ For standard S3 API requests, Garage does not encrypt data at rest by itself. For the most generic at rest encryption of data, we recommend setting up your storage partitions on encrypted LUKS devices. -If you are developping your own client software that makes use of S3 storage, +If you are developing your own client software that makes use of S3 storage, we recommend implementing data encryption directly on the client side and never transmitting plaintext data to Garage. This makes it easy to use an external untrusted storage provider if necessary. @@ -108,14 +108,14 @@ Protects against the following threats: - Stolen HDD -Crucially, does not protect againt malicious sysadmins or remote attackers that +Crucially, does not protect against malicious sysadmins or remote attackers that might gain access to your servers. Methods include full-disk encryption with tools such as LUKS. ## Encrypting data on the client side -Protects againt the following threats: +Protects against the following threats: - A honest-but-curious administrator - A malicious administrator that tries to corrupt your data diff --git a/doc/book/cookbook/exposing-websites.md b/doc/book/cookbook/exposing-websites.md index 9382a541..74a5613d 100644 --- a/doc/book/cookbook/exposing-websites.md +++ b/doc/book/cookbook/exposing-websites.md @@ -9,7 +9,7 @@ There are three methods to expose buckets as website: 1. using the PutBucketWebsite S3 API call, which is allowed for access keys that have the owner permission bit set -2. from the Garage CLI, by an adminstrator of the cluster +2. from the Garage CLI, by an administrator of the cluster 3. using the Garage administration API diff --git a/doc/book/cookbook/from-source.md b/doc/book/cookbook/from-source.md index 7105c999..04b84aef 100644 --- a/doc/book/cookbook/from-source.md +++ b/doc/book/cookbook/from-source.md @@ -20,12 +20,12 @@ sudo apt-get update sudo apt-get install build-essential ``` -## Building from source from the Gitea repository +## Building from source from the Forgejo repository The primary location for Garage's source code is the -[Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage), +[Forgejo repository](https://git.deuxfleurs.fr/Deuxfleurs/garage), which contains all of the released versions as well as the code -for the developpement of the next version. +for the development of the next version. Clone the repository and enter it as follows: @@ -41,7 +41,7 @@ git tag # List available tags git checkout v0.8.0 # Change v0.8.0 with the version you wish to build ``` -Otherwise you will be building a developpement build from the `main` branch +Otherwise you will be building a development build from the `main` branch that includes all of the changes to be released in the next version. Be careful that such a build might be unstable or contain bugs, and could be incompatible with nodes that run stable versions of Garage. @@ -85,11 +85,14 @@ The following feature flags are available in v0.8.0: | Feature flag | Enabled | Description | | ------------ | ------- | ----------- | | `bundled-libs` | *by default* | Use bundled version of sqlite3, zstd, lmdb and libsodium | -| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium
if available (exclusive with `bundled-libs`, build using
`cargo build --no-default-features --features system-libs`) | +| `consul-discovery` | optional | Enable automatic registration and discovery
of cluster nodes through the Consul API | +| `fjall` | experimental | Enable using Fjall to store Garage's metadata | +| `journald` | optional | Enable logging to systemd-journald with
`GARAGE_LOG_TO_JOURNALD=true` environment variable set | | `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your
Garage cluster must have it enabled as well) | | `kubernetes-discovery` | optional | Enable automatic registration and discovery
of cluster nodes through the Kubernetes API | -| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API | -| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry | -| `syslog` | optional | Enable logging to Syslog | | `lmdb` | *by default* | Enable using LMDB to store Garage's metadata | +| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API | | `sqlite` | *by default* | Enable using Sqlite3 to store Garage's metadata | +| `syslog` | optional | Enable logging to Syslog with
`GARAGE_LOG_TO_SYSLOG=true` environment variable set | +| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium
if available (exclusive with `bundled-libs`, build using
`cargo build --no-default-features --features system-libs`) | +| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry | diff --git a/doc/book/cookbook/kubernetes.md b/doc/book/cookbook/kubernetes.md index f5bceec8..c1db742f 100644 --- a/doc/book/cookbook/kubernetes.md +++ b/doc/book/cookbook/kubernetes.md @@ -26,7 +26,7 @@ Or deploy with custom values: helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml ``` -If you want to manage the CustomRessourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart: +If you want to manage the CustomResourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart: ```bash kubectl apply -k ../k8s/crd @@ -47,12 +47,12 @@ All possible configuration values can be found with: helm show values ./garage ``` -This is an example `values.overrride.yaml` for deploying in a microk8s cluster with a https s3 api ingress route: +This is an example `values.override.yaml` for deploying in a microk8s cluster with a https s3 api ingress route: ```yaml garage: # Use only 2 replicas per object - replicationMode: "2" + replicationFactor: 2 # Start 4 instances (StatefulSets) of garage deployment: diff --git a/doc/book/cookbook/real-world.md b/doc/book/cookbook/real-world.md index b9927c06..681346cb 100644 --- a/doc/book/cookbook/real-world.md +++ b/doc/book/cookbook/real-world.md @@ -96,14 +96,14 @@ to store 2 TB of data in total. ## Get a Docker image Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). -We encourage you to use a fixed tag (eg. `v1.3.0`) and not the `latest` tag. -For this example, we will use the latest published version at the time of the writing which is `v1.3.0` but it's up to you +We encourage you to use a fixed tag (eg. `v2.2.0`) and not the `latest` tag. +For this example, we will use the latest published version at the time of the writing which is `v2.2.0` but it's up to you to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). For example: ``` -sudo docker pull dxflrs/garage:v1.3.0 +sudo docker pull dxflrs/garage:v2.2.0 ``` ## Deploying and configuring Garage @@ -171,7 +171,7 @@ docker run \ -v /etc/garage.toml:/etc/garage.toml \ -v /var/lib/garage/meta:/var/lib/garage/meta \ -v /var/lib/garage/data:/var/lib/garage/data \ - dxflrs/garage:v1.3.0 + dxflrs/garage:v2.2.0 ``` With this command line, Garage should be started automatically at each boot. @@ -185,7 +185,7 @@ If you want to use `docker-compose`, you may use the following `docker-compose.y version: "3" services: garage: - image: dxflrs/garage:v1.3.0 + image: dxflrs/garage:v2.2.0 network_mode: "host" restart: unless-stopped volumes: diff --git a/doc/book/cookbook/reverse-proxy.md b/doc/book/cookbook/reverse-proxy.md index bdc1c549..ffa8252c 100644 --- a/doc/book/cookbook/reverse-proxy.md +++ b/doc/book/cookbook/reverse-proxy.md @@ -7,7 +7,7 @@ The main reason to add a reverse proxy in front of Garage is to provide TLS to y In production you will likely need your certificates signed by a certificate authority. The most automated way is to use a provider supporting the [ACME protocol](https://datatracker.ietf.org/doc/html/rfc8555) -such as [Let's Encrypt](https://letsencrypt.org/), [ZeroSSL](https://zerossl.com/) or [Buypass Go SSL](https://www.buypass.com/ssl/products/acme). +such as [Let's Encrypt](https://letsencrypt.org/) or [ZeroSSL](https://zerossl.com/). If you are only testing Garage, you can generate a self-signed certificate to follow the documentation: @@ -97,7 +97,7 @@ server { location / { proxy_pass http://s3_backend; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header Host $host; + proxy_set_header Host $http_host; # Disable buffering to a temporary file. proxy_max_temp_file_size 0; } @@ -272,7 +272,7 @@ Add the following configuration section [to compress response](https://doc.traef ### Add caching response -Traefik's caching middleware is only available on [entreprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed) +Traefik's caching middleware is only available on [enterprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed) ### Complete example diff --git a/doc/book/cookbook/systemd.md b/doc/book/cookbook/systemd.md index ebff8c15..820a47bf 100644 --- a/doc/book/cookbook/systemd.md +++ b/doc/book/cookbook/systemd.md @@ -38,7 +38,7 @@ WantedBy=multi-user.target id is dynamically allocated by systemd (set with `DynamicUser=true`). It cannot access (read or write) home folders (`/home`, `/root` and `/run/user`), the rest of the filesystem can only be read but not written, only the path seen as -`/var/lib/garage` is writable as seen by the service. Additionnaly, the process +`/var/lib/garage` is writable as seen by the service. Additionally, the process can not gain new privileges over time. For this to work correctly, your `garage.toml` must be set with diff --git a/doc/book/design/_index.md b/doc/book/design/_index.md index 5881ab8f..e7098dc8 100644 --- a/doc/book/design/_index.md +++ b/doc/book/design/_index.md @@ -10,7 +10,7 @@ perspective. It will allow you to understand if Garage is a good fit for you, how to better use it, how to contribute to it, what can Garage could and could not do, etc. -- **[Goals and use cases](@/documentation/design/goals.md):** This page explains why Garage was concieved and what practical use cases it targets. +- **[Goals and use cases](@/documentation/design/goals.md):** This page explains why Garage was conceived and what practical use cases it targets. - **[Related work](@/documentation/design/related-work.md):** This pages presents the theoretical background on which Garage is built, and describes other software storage solutions and why they didn't work for us. @@ -31,5 +31,3 @@ We love to talk and hear about Garage, that's why we keep a log here: - [(en, 2021-04-28) Distributed object storage is centralised](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2021-04-28_spirals-team/talk.pdf) - [(fr, 2020-12-02) Garage : jouer dans la cour des grands quand on est un hébergeur associatif](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2020-12-02_wide-team/talk.pdf) - - diff --git a/doc/book/design/benchmarks/index.md b/doc/book/design/benchmarks/index.md index 79cc5d62..2df916e0 100644 --- a/doc/book/design/benchmarks/index.md +++ b/doc/book/design/benchmarks/index.md @@ -15,14 +15,14 @@ The more a user request will require intra-cluster requests to complete, the mor This is especially true for sequential requests: requests that must wait the result of another request to be sent. We designed Garage without consensus algorithms (eg. Paxos or Raft) to minimize the number of sequential and parallel requests. -This serie of benchmarks quantifies the impact of this design choice. +This series of benchmarks quantifies the impact of this design choice. ### On a simple simulated network We start with a controlled environment, all the instances are running on the same (powerful enough) machine. -To control the network latency, we simulate the network with [mknet](https://git.deuxfleurs.fr/trinity-1686a/mknet) (a tool we developped, based on `tc` and the linux network stack). -To mesure S3 endpoints latency, we use our own tool [s3lat](https://git.deuxfleurs.fr/quentin/s3lat/) to observe only the intra-cluster latency and not some contention on the nodes (CPU, RAM, disk I/O, network bandwidth, etc.). +To control the network latency, we simulate the network with [mknet](https://git.deuxfleurs.fr/trinity-1686a/mknet) (a tool we developed, based on `tc` and the linux network stack). +To measure S3 endpoints latency, we use our own tool [s3lat](https://git.deuxfleurs.fr/quentin/s3lat/) to observe only the intra-cluster latency and not some contention on the nodes (CPU, RAM, disk I/O, network bandwidth, etc.). Compared to other benchmark tools, S3Lat sends only one (small) request at the same time and measures its latency. We selected 5 standard endpoints that are often in the critical path: ListBuckets, ListObjects, GetObject, PutObject and RemoveObject. @@ -32,7 +32,7 @@ In this first benchmark, we consider 5 instances that are located in a different Compared to garage, minio latency drastically increases on 3 endpoints: GetObject, PutObject, RemoveObject. -We suppose that these requests on minio make transactions over Raft, involving 4 sequential requests: 1) sending the message to the leader, 2) having the leader dispatch it to the other nodes, 3) waiting for the confirmation of followers and finally 4) commiting it. With our current configuration, one Raft transaction will take around 400 ms. GetObject seems to correlate to 1 transaction while PutObject and RemoveObject seems to correlate to 2 or 3. Reviewing minio code would be required to confirm this hypothesis. +We suppose that these requests on minio make transactions over Raft, involving 4 sequential requests: 1) sending the message to the leader, 2) having the leader dispatch it to the other nodes, 3) waiting for the confirmation of followers and finally 4) committing it. With our current configuration, one Raft transaction will take around 400 ms. GetObject seems to correlate to 1 transaction while PutObject and RemoveObject seems to correlate to 2 or 3. Reviewing minio code would be required to confirm this hypothesis. Conversely, garage uses an architecture similar to DynamoDB and never require global cluster coordination to answer a request. Instead, garage can always contact the right node in charge of the requested data, and can answer in as low as one request in the case of GetObject and PutObject. We also observed that Garage latency, while often lower to minio, is more dispersed: garage is still in beta and has not received any performance optimization yet. @@ -50,7 +50,7 @@ We plot a similar graph as before: This new graph is very similar to the one before, neither minio or garage seems to benefit from this new topology, but they also do not suffer from it. -Considering garage, this is expected: nodes in the same DC are put in the same zone, and then data are spread on different zones for data resiliency and availaibility. +Considering garage, this is expected: nodes in the same DC are put in the same zone, and then data are spread on different zones for data resiliency and availability. Then, in the default mode, requesting data requires to query at least 2 zones to be sure that we have the most up to date information. These requests will involve at least one inter-DC communication. In other words, we prioritize data availability and synchronization over raw performances. diff --git a/doc/book/design/goals.md b/doc/book/design/goals.md index efa3cd33..3fe80e8f 100644 --- a/doc/book/design/goals.md +++ b/doc/book/design/goals.md @@ -59,11 +59,13 @@ Garage themselves for the following tasks: - Hosting of their homepage, [privacyguides.org](https://www.privacyguides.org/), and various other static sites -- As a Mastodon object storage backend for [mstdn.party](https://mstdn.party/) and [mstdn.plus](https://mstdn.plus/) +- As a PowerDNS authoritative zone backend through [Lightning Stream](https://doc.powerdns.com/lightningstream/latest/index.html) and [LMDB](https://doc.powerdns.com/authoritative/backends/lmdb.html) + +- As a Mastodon media storage backend for [mstdn.party](https://mstdn.party/) and [mstdn.plus](https://mstdn.plus/) - As a PeerTube storage backend for [neat.tube](https://neat.tube/) - As a [Matrix media backend](https://github.com/matrix-org/synapse-s3-storage-provider) Triplebit's Garage cluster is a multi-site cluster currently composed of -10 nodes in 3 physical locations. +15 storage nodes in 3 physical locations. diff --git a/doc/book/design/internals.md b/doc/book/design/internals.md index 8e3c214e..81a11854 100644 --- a/doc/book/design/internals.md +++ b/doc/book/design/internals.md @@ -94,7 +94,7 @@ delete a tombstone, the following condition has to be met: - All nodes responsible for storing this entry are aware of the existence of the tombstone, i.e. they cannot hold another version of the entry that is - superseeded by the tombstone. This ensures that deleting the tombstone is + superseded by the tombstone. This ensures that deleting the tombstone is safe and that no deleted value will come back in the system. Garage uses atomic database operations (such as compare-and-swap and @@ -141,4 +141,3 @@ rebalance of data, this would have led to the disk utilization to explode during the rebalancing, only to shrink again after 24 hours. The 10-minute delay is a compromise that gives good security while not having this problem of disk space explosion on rebalance. - diff --git a/doc/book/design/related-work.md b/doc/book/design/related-work.md index 84e66c4e..a8461803 100644 --- a/doc/book/design/related-work.md +++ b/doc/book/design/related-work.md @@ -37,7 +37,7 @@ However, Amazon S3 source code is not open but alternatives were proposed. We identified Minio, Pithos, Swift and Ceph. Minio/Ceph enforces a total order, so properties similar to a (relaxed) filesystem. Swift and Pithos are probably the most similar to AWS S3 with their consistent hashing ring. -However Pithos is not maintained anymore. More precisely the company that published Pithos version 1 has developped a second version 2 but has not open sourced it. +However Pithos is not maintained anymore. More precisely the company that published Pithos version 1 has developed a second version 2 but has not open sourced it. Some tests conducted by the [ACIDES project](https://acides.org/) have shown that Openstack Swift consumes way more resources (CPU+RAM) that we can afford. Furthermore, people developing Swift have not designed their software for geo-distribution. There were many attempts in research too. I am only thinking to [LBFS](https://pdos.csail.mit.edu/papers/lbfs:sosp01/lbfs.pdf) that was used as a basis for Seafile. But none of them have been effectively implemented yet. @@ -63,7 +63,7 @@ Due to its industry oriented design, Ceph is also far from being *Simple* to ope In a certain way, Ceph and MinIO are closer together than they are from Garage or OpenStack Swift. **[Pithos](https://github.com/exoscale/pithos):** -Pithos has been abandonned and should probably not used yet, in the following we explain why we did not pick their design. +Pithos has been abandoned and should probably not used yet, in the following we explain why we did not pick their design. Pithos was relying as a S3 proxy in front of Cassandra (and was working with Scylla DB too). From its designers' mouth, storing data in Cassandra has shown its limitations justifying the project abandonment. They built a closed-source version 2 that does not store blobs in the database (only metadata) but did not communicate further on it. diff --git a/doc/book/development/release-process.md b/doc/book/development/release-process.md index 0c6701c0..476404f3 100644 --- a/doc/book/development/release-process.md +++ b/doc/book/development/release-process.md @@ -23,7 +23,7 @@ This logic is defined in `nix/build_index.nix`. For each commit, we first pass the code to a formatter (rustfmt) and a linter (clippy). Then we try to build it in debug mode and run both unit tests and our integration tests. -Additionnaly, when releasing, our integration tests are run on the release build for amd64 and i686. +Additionally, when releasing, our integration tests are run on the release build for amd64 and i686. ## Generated Artifacts @@ -32,7 +32,7 @@ We generate the following binary artifacts for now: - **os**: linux - **format**: static binary, docker container -Additionnaly we also build two web pages and one JSON document: +Additionally we also build two web pages and one JSON document: - the documentation (this website) - [the release page](https://garagehq.deuxfleurs.fr/_releases.html) - [the release list in JSON format](https://garagehq.deuxfleurs.fr/_releases.json) @@ -67,7 +67,7 @@ nix copy --to 's3://nix?endpoint=garage.deuxfleurs.fr®ion=garage&secret-key=/ The previous command will only send the built package and not its dependencies. In the case of our CI pipeline, we want to cache all intermediate build steps as well. This can be done using this quite involved command (here as an example -for the `pkgs.amd64.relase` package): +for the `pkgs.amd64.release` package): ```bash nix copy -j8 \ @@ -174,5 +174,3 @@ drone sign --save Deuxfleurs/garage ``` Looking at the file, you will see that most of the commands are `nix-shell` and `nix-build` commands with various parameters. - - diff --git a/doc/book/operations/durability-repairs.md b/doc/book/operations/durability-repairs.md index fdf163e2..8a307c84 100644 --- a/doc/book/operations/durability-repairs.md +++ b/doc/book/operations/durability-repairs.md @@ -42,7 +42,7 @@ You may pause an ongoing scrub using `garage repair scrub pause`, but note that the scrub will resume automatically 24 hours later as Garage will not let your cluster run without a regular scrub. If the scrub procedure is too intensive for your servers and is slowing down your workload, the recommended solution -is to increase the "scrub tranquility" using `garage repair scrub set-tranquility`. +is to increase the "scrub tranquility" using `garage worker set scrub-tranquility`. A higher tranquility value will make Garage take longer pauses between two block verifications. Of course, scrubbing the entire data store will also take longer. diff --git a/doc/book/operations/layout.md b/doc/book/operations/layout.md index 667e89d2..a0b2f31d 100644 --- a/doc/book/operations/layout.md +++ b/doc/book/operations/layout.md @@ -242,7 +242,7 @@ dc3 Tags Partitions Capacity Usable capacity TOTAL 256 (256 unique) 2.0 GB 1000.0 MB (50.0%) ``` -As we can see, the node that was moved to `dc3` (node4) is only used at 25% (approximatively), +As we can see, the node that was moved to `dc3` (node4) is only used at 25% (approximately), whereas the node that was already in `dc3` (node3) is used at 75%. This can be explained by the following: @@ -260,7 +260,7 @@ This can be explained by the following: data can be removed to be moved to node1. - Garage will move data in equal proportions from all possible sources, in this - case it means that it will tranfer 25% of the entire data set from node3 to + case it means that it will transfer 25% of the entire data set from node3 to node1 and another 25% from node4 to node1. This explains why node3 ends with 75% utilization (100% from before minus 25% diff --git a/doc/book/operations/multi-hdd.md b/doc/book/operations/multi-hdd.md index 1cbcd805..c21b73d7 100644 --- a/doc/book/operations/multi-hdd.md +++ b/doc/book/operations/multi-hdd.md @@ -40,7 +40,7 @@ First of all, Garage divides the set of all possible block hashes in a fixed number of slices (currently 1024), and assigns to each slice a primary storage location among the specified data directories. The number of slices having their primary location in each data directory -is proportionnal to the capacity specified in the config file. +is proportional to the capacity specified in the config file. When Garage receives a block to write, it will always write it in the primary directory of the slice that contains its hash. diff --git a/doc/book/operations/recovering.md b/doc/book/operations/recovering.md index 05322b67..fb20656d 100644 --- a/doc/book/operations/recovering.md +++ b/doc/book/operations/recovering.md @@ -161,4 +161,7 @@ your recovery options are as follows: - **Option 3: restoring a filesystem-level snapshot.** If you are using ZFS or BTRFS to snapshot your metadata partition, refer to their specific - documentation on rolling back or copying files from an old snapshot. + documentation on rolling back or copying files from an old snapshot. + Note that, depending on the properties of the filesystem and of the DB engine, + if these snapshots were taken during a write operation to the database, they may + also be corrupted and thus unfit for recovery. diff --git a/doc/book/operations/upgrading.md b/doc/book/operations/upgrading.md index a3d2bcf5..26007dbe 100644 --- a/doc/book/operations/upgrading.md +++ b/doc/book/operations/upgrading.md @@ -56,7 +56,7 @@ From a high level perspective, a major upgrade looks like this: 10. Enable API access (reverse step 1) 11. Monitor your cluster while load comes back, check that all your applications are happy with this new version -### Major upgarades with minimal downtime +### Major upgrades with minimal downtime There is only one operation that has to be coordinated cluster-wide: the switch of one version of the internal RPC protocol to the next. This means that an upgrade with very limited downtime can simply be performed from one major version to the next by restarting all nodes diff --git a/doc/book/quick-start/_index.md b/doc/book/quick-start/_index.md index 633b785a..d0ebf778 100644 --- a/doc/book/quick-start/_index.md +++ b/doc/book/quick-start/_index.md @@ -132,7 +132,7 @@ docker run \ -v /path/to/garage.toml:/etc/garage.toml \ -v /path/to/garage/meta:/var/lib/garage/meta \ -v /path/to/garage/data:/var/lib/garage/data \ - dxflrs/garage:v1.3.0 + dxflrs/garage:v2.2.0 ``` Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903` diff --git a/doc/book/reference-manual/admin-api.md b/doc/book/reference-manual/admin-api.md index fcf49e8c..e96fcaff 100644 --- a/doc/book/reference-manual/admin-api.md +++ b/doc/book/reference-manual/admin-api.md @@ -6,41 +6,167 @@ weight = 40 The Garage administration API is accessible through a dedicated server whose listen address is specified in the `[admin]` section of the configuration file (see [configuration file -reference](@/documentation/reference-manual/configuration.md)) +reference](@/documentation/reference-manual/configuration.md)). -**WARNING.** At this point, there is no commitment to the stability of the APIs described in this document. -We will bump the version numbers prefixed to each API endpoint each time the syntax -or semantics change, meaning that code that relies on these endpoint will break -when changes are introduced. - -Versions: - - Before Garage 0.7.2 - no admin API - - Garage 0.7.2 - admin APIv0 - - Garage 0.9.0 - admin APIv1, deprecate admin APIv0 +The current version of the admin API is v2. No breaking changes to the Garage +administration API will be published outside of a major release. +History of previous versions: + - Before Garage v0.7.2 - no admin API + - Garage v0.7.2 - admin API v0 + - Garage v0.9.0 - admin API v1, deprecate admin API v0 + - Garage v2.0.0 - admin API v2, deprecate admin API v1 ## Access control -The admin API uses two different tokens for access control, that are specified in the config file's `[admin]` section: +### Using an API token -- `metrics_token`: the token for accessing the Metrics endpoint (if this token - is not set in the config file, the Metrics endpoint can be accessed without - access control); - -- `admin_token`: the token for accessing all of the other administration - endpoints (if this token is not set in the config file, access to these - endpoints is disabled entirely). - -These tokens are used as simple HTTP bearer tokens. In other words, to -authenticate access to an admin API endpoint, add the following HTTP header -to your request: +Administration API tokens tokens are used as simple HTTP bearer tokens. In +other words, to authenticate access to an admin API endpoint, add the following +HTTP header to your request: ``` Authorization: Bearer ``` -## Administration API endpoints +### User-defined API tokens + +Cluster administrators may dynamically define administration tokens using the CLI commands under `garage admin-token`. +Such tokens may be limited in scope, meaning that they may enable access to only a subset of API calls. +They may also have an expiration date to limit their use in time. + +Here is an example to create an administration token that is valid for 30 days +and gives access to only a subset of API calls, allowing it to create buckets +and access keys and give keys permissions on buckets: + +```bash +$ garage admin-token create --expires-in 30d \ + --scope ListBuckets,GetBucketInfo,ListKeys,GetKeyInfo,CreateBucket,CreateKey,AllowBucketKey,DenyBucketKey \ + my-token +This is your secret bearer token, it will not be shown again by Garage: + + 8ed1830b10a276ff57061950.kOSIpxWK9zSGbTO9Xadpv3YndSFWma0_snXcYHaORXk + +==== ADMINISTRATION TOKEN INFORMATION ==== +Token ID: 8ed1830b10a276ff57061950 +Token name: my-token +Created: 2025-06-15 15:12:44.160 +02:00 +Validity: valid +Expiration: 2025-07-15 15:12:44.117 +02:00 + +Scope: ListBuckets + GetBucketInfo + ListKeys + GetKeyInfo + CreateBucket + CreateKey + AllowBucketKey + DenyBucketKey +``` + +When running this command, your token will be shown only once and **will never +be shown again by Garage**, so make sure to save it directly. The token is +hashed internally, and is identified by its prefix (32 hex digits followed by a +dot) which is saved in clear. + +When running `garage admin-token list`, you might see something like this: + +``` +ID Created Name Expiration Scope +- - metrics_token (from daemon configuration) never Metrics +8ed1830b10a276ff57061950 2025-06-15 my-token 2025-07-15 15:12:44.117 +02:00 ListBuckets, ... (8) +``` + +### Master API tokens + +The admin API can also use two different master tokens for access control, +specified in the config file's `[admin]` section: + +- `metrics_token`: the token for accessing the Metrics endpoint. If this token + is not set in the config file, the Metrics endpoint can be accessed without + access control. + +- `admin_token`: the token for accessing all of the other administration + endpoints. If this token is not set in the config file, access to these + endpoints is only possible with a user-defined admin token. + +With the introduction of multiple user-defined admin tokens, the use of master +API tokens is now discouraged. + + +## Using the admin API + +All of the admin API endpoints are described in the OpenAPI specification: + + - APIv2 - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v2.html) - [OpenAPI JSON](https://garagehq.deuxfleurs.fr/api/garage-admin-v2.json) + - APIv1 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.yml) + - APIv0 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml) + +Making a request to the API from the command line can be as simple as running: + +```bash +curl -H 'Authorization: Bearer s3cr3t' http://localhost:3903/v2/GetClusterStatus | jq +``` + +For more advanced use cases, we recommend using an SDK. +[Go to the "Build your own app" section to know how to use our SDKs](@/documentation/build/_index.md) + +### Making API calls from the `garage` CLI + +Since v2.0.0, the `garage` binary provides a subcommand `garage json-api` that +allows you to invoke the API without making an HTTP request. This can be +useful for scripting Garage deployments. + +`garage json-api` proxies API calls through Garage's internal RPC protocol, +therefore it does not require any form of authentication: RPC connection +parameters are discovered automatically to contact the locally-running Garage +instance (as when running any other `garage` CLI command). + +For simple calls that take no parameters, usage is as follows: + +``` +$ garage json-api GetClusterHealth +{ + "connectedNodes": 3, + "knownNodes": 3, + "partitions": 256, + "partitionsAllOk": 256, + "partitionsQuorum": 256, + "status": "healthy", + "storageNodes": 3, + "storageNodesOk": 3 +} +``` + +If you need to specify a JSON body for your call, you can add it directly after +the name of the function you are calling: + +``` +$ garage json-api CreateAdminToken '{"name": "test"}' +``` + +Or you can feed it through stdin by adding a `-` as the last command parameter: + +``` +$ garage json-api CreateAdminToken - +{"name": "test"} + +``` + +For admin API calls that would have taken query parameters in their HTTP version, these parameters can be passed in the JSON body object: + +``` +$ garage json-api GetAdminTokenInfo '{"id":"b0e6e0ace2c0b2aca4cdb2de"}' +``` + +For admin API calls that take both query parameters and a JSON body, combine them in the following fashion: + +``` +$ garage json-api UpdateAdminToken '{"id":"b0e6e0ace2c0b2aca4cdb2de", "body":{"name":"not a test"}}' +``` + +## Special administration API endpoints ### Metrics `GET /metrics` @@ -83,7 +209,7 @@ content-length: 102 date: Tue, 08 Aug 2023 07:22:38 GMT Garage is fully operational -Consult the full health check API endpoint at /v0/health for more details +Consult the full health check API endpoint at /v2/GetClusterHealth for more details ``` ### On-demand TLS `GET /check` @@ -126,23 +252,7 @@ $ curl -so /dev/null -w "%{http_code}" http://localhost:3903/check?domain=exampl 200 ``` - **References:** - [Using On-Demand TLS](https://caddyserver.com/docs/automatic-https#using-on-demand-tls) - [Add option for a backend check to approve use of on-demand TLS](https://github.com/caddyserver/caddy/pull/1939) - [Serving tens of thousands of domains over HTTPS with Caddy](https://caddy.community/t/serving-tens-of-thousands-of-domains-over-https-with-caddy/11179) - -### Cluster operations - -These endpoints have a dedicated OpenAPI spec. - - APIv1 - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.yml) - - APIv0 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml) - -Requesting the API from the command line can be as simple as running: - -```bash -curl -H 'Authorization: Bearer s3cr3t' http://localhost:3903/v0/status | jq -``` - -For more advanced use cases, we recommend using a SDK. -[Go to the "Build your own app" section to know how to use our SDKs](@/documentation/build/_index.md) diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 1f583fe6..adc3286b 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -51,17 +51,20 @@ allow_punycode = false [consul_discovery] api = "catalog" -consul_http_addr = "http://127.0.0.1:8500" +consul_http_addr = "https://127.0.0.1:8500" +tls_skip_verify = false service_name = "garage-daemon" + ca_cert = "/etc/consul/consul-ca.crt" client_cert = "/etc/consul/consul-client.crt" client_key = "/etc/consul/consul-key.crt" + # for `agent` API mode, unset client_cert and client_key, and optionally enable `token` # token = "abcdef-01234-56789" -tls_skip_verify = false + tags = [ "dns-enabled" ] meta = { dns-acl = "allow trusted" } - +datacenters = ["dc1", "dc2", "dc3"] [kubernetes_discovery] namespace = "garage" @@ -82,6 +85,7 @@ add_host_to_metrics = true [admin] api_bind_addr = "0.0.0.0:3903" metrics_token = "BCAdFjoa9G0KJR0WXnHHm7fs1ZAbfpI8iIZ+Z/a2NgI=" +metrics_require_token = true admin_token = "UkLeGWEvHnXBqnueR3ISEMWpOnm40jH2tM2HnnL/0F4=" trace_sink = "http://localhost:4317" ``` @@ -97,9 +101,9 @@ The following gives details about each available configuration option. Top-level configuration options, in alphabetical order: [`allow_punycode`](#allow_punycode), [`allow_world_readable_secrets`](#allow_world_readable_secrets), -[`block_max_concurrent_reads`](`block_max_concurrent_reads), -[`block_ram_buffer_max`](#block_ram_buffer_max), +[`block_max_concurrent_reads`](#block_max_concurrent_reads), [`block_max_concurrent_writes_per_request`](#block_max_concurrent_writes_per_request), +[`block_ram_buffer_max`](#block_ram_buffer_max), [`block_size`](#block_size), [`bootstrap_peers`](#bootstrap_peers), [`compression_level`](#compression_level), @@ -127,12 +131,14 @@ The `[consul_discovery]` section: [`client_cert`](#consul_client_cert_and_key), [`client_key`](#consul_client_cert_and_key), [`consul_http_addr`](#consul_http_addr), +[`datacenters`](#consul_datacenters) [`meta`](#consul_tags_and_meta), [`service_name`](#consul_service_name), [`tags`](#consul_tags_and_meta), [`tls_skip_verify`](#consul_tls_skip_verify), [`token`](#consul_token). + The `[kubernetes_discovery]` section: [`namespace`](#kube_namespace), [`service_name`](#kube_service_name), @@ -150,6 +156,7 @@ The `[s3_web]` section: The `[admin]` section: [`api_bind_addr`](#admin_api_bind_addr), +[`metrics_require_token`](#admin_metrics_require_token), [`metrics_token`/`metrics_token_file`](#admin_metrics_token), [`admin_token`/`admin_token_file`](#admin_token), [`trace_sink`](#admin_trace_sink), @@ -336,7 +343,7 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows: | --------- | ----------------- | ------------- | | [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `/db.lmdb/` | | [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `/db.sqlite` | -| [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`) | `"fjall"` | `/db.fjall/` | +| [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`/`v2.1.0`) | `"fjall"` | `/db.fjall/` | | [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `/db/` | Sled was supported until Garage v0.9.x, and was removed in Garage v1.0. @@ -345,8 +352,16 @@ old Sled metadata databases to another engine. Performance characteristics of the different DB engines are as follows: -- LMDB: the recommended database engine for high-performance distributed clusters. -LMDB works very well, but is known to have the following limitations: +- **LMDB:** the recommended database engine for high-performance distributed clusters + with `replication_factor` ≥ 2. + LMDB works well, but is known to have the following limitations: + + - LMDB is prone to database corruption after an unclean shutdown (e.g. a process kill + or a power outage). It is recommended to configure + [`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval) to be + able to easily recover from this situation. With `replication_factor` ≥ 2, + metadata can also be reconstructed from remote nodes upon corruption + (see [Recovering from failures](@/documentation/operations/recovering.md#corrupted_meta)). - The data format of LMDB is not portable between architectures, so for instance the Garage database of an x86-64 node cannot be moved to an ARM64 @@ -356,30 +371,21 @@ LMDB works very well, but is known to have the following limitations: node to very small database sizes due to how LMDB works; it is therefore not recommended. - - Several users have reported corrupted LMDB database files after an unclean - shutdown (e.g. a power outage). This situation can generally be recovered - from if your cluster is geo-replicated (by rebuilding your metadata db from - other nodes), or if you have saved regular snapshots at the filesystem - level. - - Keys in LMDB are limited to 511 bytes. This limit translates to limits on - object keys in S3 and sort keys in K2V that are limted to 479 bytes. + object keys in S3 and sort keys in K2V that are limited to 479 bytes. -- Sqlite: Garage supports Sqlite as an alternative storage backend for - metadata, which does not have the issues listed above for LMDB. - On versions 0.8.x and earlier, Sqlite should be avoided due to abysmal - performance, which was fixed with the addition of `metadata_fsync`. - Sqlite is still probably slower than LMDB due to the way we use it, - so it is not the best choice for high-performance storage clusters, - but it should work fine in many cases. +- **Sqlite:** Garage supports Sqlite as an alternative storage backend for + metadata, which does not have the issues listed above for LMDB. Sqlite is + slower than LMDB, so it is not the best choice for high-performance storage + clusters. -- Fjall: a storage engine based on LSM trees, which theoretically allow for +- **Fjall:** a storage engine based on LSM trees, which theoretically allow for higher write throughput than other storage engines that are based on B-trees. Using Fjall could potentially improve Garage's performance significantly in write-heavy workloads. **Support for Fjall is experimental at this point**, - we have added it to Garage for evaluation purposes only. **Do not use it for - production-critical workloads.** - + we have added it to Garage for evaluation purposes only. **Use it only with + test data, and report any issues to our bug tracker. Do not use it for + production workloads.** It is possible to convert Garage's metadata directory from one format to another using the `garage convert-db` command, which should be used as follows: @@ -390,7 +396,7 @@ garage convert-db -a -i \ ``` Make sure to specify the full database path as presented in the table above -(third colummn), and not just the path to the metadata directory. +(third column), and not just the path to the metadata directory. #### `metadata_fsync` {#metadata_fsync} @@ -432,13 +438,14 @@ This might reduce the risk that a data block is lost in rare situations such as simultaneous node losing power, at the cost of a moderate drop in write performance. -Similarly to `metatada_fsync`, this is likely not necessary +Similarly to `metadata_fsync`, this is likely not necessary if geographical replication is used. #### `metadata_auto_snapshot_interval` (since `v0.9.4`) {#metadata_auto_snapshot_interval} If this value is set, Garage will automatically take a snapshot of the metadata -DB file at a regular interval and save it in the metadata directory. +DB file at a regular interval and save it in the metadata directory, +or in [`metadata_snapshots_dir`](#metadata_snapshots_dir) if it is set. This parameter can take any duration string that can be parsed by the [`parse_duration`](https://docs.rs/parse_duration/latest/parse_duration/#syntax) crate. @@ -447,14 +454,19 @@ corrupted, for instance after an unclean shutdown. See [this page](@/documentation/operations/recovering.md#corrupted_meta) for details. Garage keeps only the two most recent snapshots of the metadata DB and deletes older ones automatically. +You can also create metadata snapshots manually at any point using the +`garage meta snapshot` command. + +Using snapshots created by Garage is the best option to make snapshots of your +node's metadata for potential recovery, as they are guaranteed to be clean and +consistent, contrarily to filesystem-level snapshots that may be taken while +some writes are in-flight and thus might be corrupted. Note that taking a metadata snapshot is a relatively intensive operation as the entire data file is copied. A snapshot being taken might have performance impacts on the Garage node while it is running. If the cluster is under heavy write load when a snapshot operation is running, this might also cause the database file to grow in size significantly as pages cannot be recycled easily. -For this reason, it might be better to use filesystem-level snapshots instead -if possible. #### `disable_scrub` {#disable_scrub} @@ -542,19 +554,19 @@ awaits for one of the `block_max_concurrent_reads` slots to be available slot, it reads the entire block file to RAM and frees the slot as soon as the block file is finished reading. Only after the slot is released will the block's data start being transferred over the network. If the request fails to -acquire a reading slot wihtin 15 seconds, it fails with a timeout error. +acquire a reading slot within 15 seconds, it fails with a timeout error. Timeout events can be monitored through the `block_read_semaphore_timeouts` metric in Prometheus: a non-zero number of such events indicates an I/O bottleneck on HDD read speed. -#### `block_max_concurrent_writes_per_request` (since `v2.1.0`) {#block_max_concurrent_writes_per_request} +#### `block_max_concurrent_writes_per_request` (since `v1.3.1` / `v2.2.0`) {#block_max_concurrent_writes_per_request} This parameter is designed to adapt to the concurrent write performance of -different storage media.Maximum number of parallel block writes per put request -Higher values improve throughput but increase memory usage. +different storage media. Maximum number of parallel block writes per put request. +Higher values may improve throughput but increase memory usage. -Default: 3, Recommended: 10-30 for NVMe, 3-10 for HDD +Default value: 3. Recommended values: 10-30 for NVMe, 3-10 for spinning HDD. #### `lmdb_map_size` {#lmdb_map_size} @@ -605,11 +617,11 @@ storing the secret as the `GARAGE_RPC_SECRET_FILE` environment variable. #### `rpc_bind_addr` {#rpc_bind_addr} -The address and port on which to bind for inter-cluster communcations -(reffered to as RPC for remote procedure calls). +The address and port on which to bind for inter-cluster communications +(referred to as RPC for remote procedure calls). The port specified here should be the same one that other nodes will used to contact the node, even in the case of a NAT: the NAT should be configured to forward the external -port number to the same internal port nubmer. This means that if you have several nodes running +port number to the same internal port number. This means that if you have several nodes running behind a NAT, they should each use a different RPC port number. #### `rpc_bind_outgoing` (since `v0.9.2`) {#rpc_bind_outgoing} @@ -728,6 +740,18 @@ node_prefix "" { } ``` + +#### `datacenters` {#consul_datacenters} + +Optional list of datacenters that allow garage to do service discovery when Consul is configured in WAN federation. + +Example: `datacenters = ["dc1", "dc2", "dc3"]` + +In a WAN configuration, by default the Consul services API only responds with +local LAN services. When a list of datacenters is specified using this option, +Garage will query the consul server API by datacenter directly, allowing for +Garage to discover nodes across the Consul WAN. + #### `tags` and `meta` {#consul_tags_and_meta} Additional list of tags and map of service meta to add during service registration. @@ -760,14 +784,14 @@ manually. #### `api_bind_addr` {#s3_api_bind_addr} The IP and port on which to bind for accepting S3 API calls. -This endpoint does not suport TLS: a reverse proxy should be used to provide it. +This endpoint does not support TLS: a reverse proxy should be used to provide it. Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode. #### `s3_region` {#s3_region} -Garage will accept S3 API calls that are targetted to the S3 region defined here. -API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error +Garage will accept S3 API calls that are targeted to the S3 region defined here. +API calls targeted to other regions will fail with a AuthorizationHeaderMalformed error message that redirects the client to the correct region. #### `root_domain` {#s3_root_domain} @@ -775,7 +799,7 @@ message that redirects the client to the correct region. The optional suffix to access bucket using vhost-style in addition to path-style request. Note path-style requests are always enabled, whether or not vhost-style is configured. Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate, -but might be required by softwares not supporting path-style requests. +but might be required by software not supporting path-style requests. If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with using the hostname `my-bucket.s3.garage.eu`. @@ -791,7 +815,7 @@ behaviour of this module. The IP and port on which to bind for accepting HTTP requests to buckets configured for website access. -This endpoint does not suport TLS: a reverse proxy should be used to provide it. +This endpoint does not support TLS: a reverse proxy should be used to provide it. Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode. @@ -824,10 +848,34 @@ See [administration API reference](@/documentation/reference-manual/admin-api.md Alternatively, since `v0.8.5`, a path can be used to create a unix socket. Note that for security reasons, the socket will have 0220 mode. Make sure to set user and group permissions accordingly. +#### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN`, `GARAGE_ADMIN_TOKEN_FILE` (env) {#admin_token} + +The token for accessing all administration functions on the admin endpoint, +with the exception of the metrics endpoint (see `metrics_token`). + +You can use any random string for this value. We recommend generating a random +token with `openssl rand -base64 32`. + +For Garage version earlier than `v2.0`, if this token is not set, +access to these endpoints is disabled entirely. + +Since Garage `v2.0`, additional admin API tokens can be defined dynamically +in your Garage cluster using administration commands. This new admin token system +is more flexible since it allows admin tokens to have an expiration date, +and to have a scope restricted to certain admin API functions. If `admin_token` +is set, it behaves as an admin token without expiration and with full scope. +Otherwise, only admin API tokens defined dynamically can be used. + +`admin_token` was introduced in Garage `v0.7.2`. +`admin_token_file` and the `GARAGE_ADMIN_TOKEN` environment variable are supported since Garage `v0.8.2`. + +`GARAGE_ADMIN_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`. + #### `metrics_token`, `metrics_token_file` or `GARAGE_METRICS_TOKEN`, `GARAGE_METRICS_TOKEN_FILE` (env) {#admin_metrics_token} -The token for accessing the Metrics endpoint. If this token is not set, the -Metrics endpoint can be accessed without access control. +The token for accessing the Prometheus metrics endpoint (`/metrics`). +If this token is not set, and unless `metrics_require_token` is set to `true`, +the metrics endpoint can be accessed without access control. You can use any random string for this value. We recommend generating a random token with `openssl rand -base64 32`. @@ -836,17 +884,12 @@ You can use any random string for this value. We recommend generating a random t `GARAGE_METRICS_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`. -#### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN`, `GARAGE_ADMIN_TOKEN_FILE` (env) {#admin_token} +#### `metrics_require_token` (since `v2.0.0`) {#admin_metrics_require_token} -The token for accessing all of the other administration endpoints. If this -token is not set, access to these endpoints is disabled entirely. - -You can use any random string for this value. We recommend generating a random token with `openssl rand -base64 32`. - -`admin_token` was introduced in Garage `v0.7.2`. -`admin_token_file` and the `GARAGE_ADMIN_TOKEN` environment variable are supported since Garage `v0.8.2`. - -`GARAGE_ADMIN_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`. +If this is set to `true`, accessing the metrics endpoint will always require +an access token. Valid tokens include the `metrics_token` if it is set, +and admin API token defined dynamically in Garage which have +the `Metrics` endpoint in their scope. #### `trace_sink` {#admin_trace_sink} diff --git a/doc/book/reference-manual/features.md b/doc/book/reference-manual/features.md index 481aef01..aa801704 100644 --- a/doc/book/reference-manual/features.md +++ b/doc/book/reference-manual/features.md @@ -46,7 +46,7 @@ to select the replication mode best suited to your use case (hint: in most cases ### Compression and deduplication -All data stored in Garage is deduplicated, and optionnally compressed using +All data stored in Garage is deduplicated, and optionally compressed using Zstd. Objects uploaded to Garage are chunked in blocks of constant sizes (see [`block_size`](@/documentation/reference-manual/configuration.md#block_size)), and the hashes of individual blocks are used to dispatch them to storage nodes @@ -84,13 +84,13 @@ exposing the same content under different domain names. Garage also supports bucket aliases which are local to a single user: this allows different users to have different buckets with the same name, thus avoiding naming collisions. -This can be helpfull for instance if you want to write an application that creates per-user buckets with always the same name. +This can be helpful for instance if you want to write an application that creates per-user buckets with always the same name. This feature is totally invisible to S3 clients and does not break compatibility with AWS. ### Cluster administration API -Garage provides a fully-fledged REST API to administer your cluster programatically. +Garage provides a fully-fledged REST API to administer your cluster programmatically. Functionality included in the admin API include: setting up and monitoring cluster nodes, managing access credentials, and managing storage buckets and bucket aliases. A full reference of the administration API is available [here](@/documentation/reference-manual/admin-api.md). @@ -100,7 +100,7 @@ A full reference of the administration API is available [here](@/documentation/r Garage makes some internal metrics available in the Prometheus data format, which allows you to build interactive dashboards to visualize the load and internal state of your storage cluster. -For developpers and performance-savvy administrators, +For developers and performance-savvy administrators, Garage also supports exporting traces of what it does internally in OpenTelemetry format. This allows to monitor the time spent at various steps of the processing of requests, in order to detect potential performance bottlenecks. @@ -129,5 +129,5 @@ related to objects stored in an S3 bucket. In the context of our research project, [Aérogramme](https://aerogramme.deuxfleurs.fr), K2V is used to provide metadata and log storage for operations on encrypted e-mail storage. -Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md) +Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/f8be15c37db857e177d543de7be863692628d567/doc/drafts/k2v-spec.md) and on how to enable it in Garage [here](@/documentation/reference-manual/k2v.md). diff --git a/doc/book/reference-manual/k2v.md b/doc/book/reference-manual/k2v.md index c01f641e..a0eaf064 100644 --- a/doc/book/reference-manual/k2v.md +++ b/doc/book/reference-manual/k2v.md @@ -16,10 +16,10 @@ the `k2v` feature flag enabled can be obtained from our download page under with `-k2v` (example: `v0.7.2-k2v`). The specification of the K2V API can be found -[here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/doc/drafts/k2v-spec.md). +[here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/f8be15c37db857e177d543de7be863692628d567/doc/drafts/k2v-spec.md). This document also includes a high-level overview of K2V's design. -The K2V API uses AWSv4 signatures for authentification, same as the S3 API. +The K2V API uses AWSv4 signatures for authentication, same as the S3 API. The AWS region used for signature calculation is always the same as the one defined for the S3 API in the config file. @@ -55,4 +55,3 @@ cargo build --features cli --bin k2v-cli The CLI utility is self-documented, run `k2v-cli --help` to learn how to use it. There is also a short README.md in the `src/k2v-client` folder with some instructions. - diff --git a/doc/book/reference-manual/s3-compatibility.md b/doc/book/reference-manual/s3-compatibility.md index b869b6f4..c44a7b1a 100644 --- a/doc/book/reference-manual/s3-compatibility.md +++ b/doc/book/reference-manual/s3-compatibility.md @@ -45,7 +45,7 @@ we suppose that OpenIO supports presigned URLs. All endpoints that are missing on Garage will return a 501 Not Implemented. Some `x-amz-` headers are not implemented. -### Core endoints +### Core endpoints | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | |------------------------------|----------------------------------|-----------------|---------------|---------|-----| @@ -135,12 +135,12 @@ If you need this feature, please [share your use case in our dedicated issue](ht **PutBucketLifecycleConfiguration:** The only actions supported are `AbortIncompleteMultipartUpload` and `Expiration` (without the `ExpiredObjectDeleteMarker` field). All other operations are dependent on -either bucket versionning or storage classes which Garage currently does not +either bucket versioning or storage classes which Garage currently does not implement. The deprecated `Prefix` member directly in the the `Rule` structure/XML tag is not supported, specified prefixes must be inside the `Filter` structure/XML tag. -**GetBucketVersioning:** Stub implementation which always returns "versionning not enabled", since Garage does not yet support bucket versionning. +**GetBucketVersioning:** Stub implementation which always returns "versioning not enabled", since Garage does not yet support bucket versioning. ### Replication endpoints @@ -155,7 +155,7 @@ Please open an issue if you have a use case for replication. *Note: Ceph documentation briefly says that Ceph supports [replication through the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api) but with some limitations. -Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.* +Additionally, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.* ### Locking objects @@ -197,7 +197,7 @@ Please open an issue if you have a use case. ### Vendor specific endpoints -
Display Amazon specifc endpoints +
Display Amazon specific endpoints | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | @@ -234,4 +234,3 @@ Please open an issue if you have a use case. | [SelectObjectContent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html) | ❌ Missing | ❌| ❌| ❌| ❌|
- diff --git a/doc/book/working-documents/compatibility-target.md b/doc/book/working-documents/compatibility-target.md index 630d15a5..2ed9dad4 100644 --- a/doc/book/working-documents/compatibility-target.md +++ b/doc/book/working-documents/compatibility-target.md @@ -3,7 +3,7 @@ title = "S3 compatibility target" weight = 5 +++ -If there is a specific S3 functionnality you have a need for, feel free to open +If there is a specific S3 functionality you have a need for, feel free to open a PR to put the corresponding endpoints higher in the list. Please explain your motivations for doing so in the PR message. diff --git a/doc/book/working-documents/design-draft.md b/doc/book/working-documents/design-draft.md index 8d3a31f0..de31ba0f 100644 --- a/doc/book/working-documents/design-draft.md +++ b/doc/book/working-documents/design-draft.md @@ -68,7 +68,7 @@ Workflow for DELETE: 1. Check write permission (LDAP) 2. Get current version (or versions) in object table 3. Do the deletion of those versions NOT IN A BACKGROUND JOB THIS TIME -4. Return succes to the user if we were able to delete blocks from the blocks table and entries from the object table +4. Return success to the user if we were able to delete blocks from the blocks table and entries from the object table To delete a version: @@ -92,7 +92,7 @@ Known issue: if someone is reading from a version that we want to delete and the - file path = /meta/(first 3 hex digits of hash)/(rest of hash) - map block hash -> set of version UUIDs where it is referenced -Usefull metadata: +Useful metadata: - list of versions that reference this block in the Casandra table, so that we can do GC by checking in Cassandra that the lines still exist - list of other nodes that we know have acknowledged a write of this block, useful in the rebalancing algorithm diff --git a/doc/book/working-documents/load-balancing.md b/doc/book/working-documents/load-balancing.md index 1a65fdd2..d6cbf4cc 100644 --- a/doc/book/working-documents/load-balancing.md +++ b/doc/book/working-documents/load-balancing.md @@ -49,12 +49,12 @@ The ring construction that selects `n_token` random positions for each nodes giv is not well-balanced: the space between the tokens varies a lot, and some partitions are thus bigger than others. This problem was demonstrated in the original Dynamo DB paper. -To solve this, we want to apply a better second method for partitionning our dataset: +To solve this, we want to apply a better second method for partitioning our dataset: 1. fix an initially large number of partitions (say 1024) with evenly-spaced delimiters, 2. attribute each partition randomly to a node, with a probability - proportionnal to its capacity (which `n_tokens` represented in the first + proportional to its capacity (which `n_tokens` represented in the first method) For now we continue using the multi-DC ring walking described above. @@ -66,7 +66,7 @@ I have studied two ways to do the attribution of partitions to nodes, in a way t MagLev provided significantly better balancing, as it guarantees that the exact same number of partitions is attributed to all nodes that have the same -capacity (and that this number is proportionnal to the node's capacity, except +capacity (and that this number is proportional to the node's capacity, except for large values), however in both cases: - the distribution is still bad, because we use the naive multi-DC ring walking diff --git a/doc/book/working-documents/migration-04.md b/doc/book/working-documents/migration-04.md index 52c56737..5aae2a42 100644 --- a/doc/book/working-documents/migration-04.md +++ b/doc/book/working-documents/migration-04.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.3 to 0.4" -weight = 20 +weight = 80 +++ **Migrating from 0.3 to 0.4 is unsupported. This document is only intended to diff --git a/doc/book/working-documents/migration-06.md b/doc/book/working-documents/migration-06.md index 006b036b..5fa29120 100644 --- a/doc/book/working-documents/migration-06.md +++ b/doc/book/working-documents/migration-06.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.5 to 0.6" -weight = 15 +weight = 75 +++ **This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster. diff --git a/doc/book/working-documents/migration-07.md b/doc/book/working-documents/migration-07.md index 03cdfedc..8631fa99 100644 --- a/doc/book/working-documents/migration-07.md +++ b/doc/book/working-documents/migration-07.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.6 to 0.7" -weight = 14 +weight = 74 +++ **This guide explains how to migrate to 0.7 if you have an existing 0.6 cluster. We don't recommend trying to migrate to 0.7 directly from 0.5 or older.** @@ -19,7 +19,7 @@ The migration steps are as follows: 2. Disable API and web access. Garage does not support disabling these endpoints but you can change the port number or stop your reverse proxy for instance. -3. Check once again that your cluster is healty. Run again `garage repair --all-nodes --yes tables` which is quick. +3. Check once again that your cluster is healthy. Run again `garage repair --all-nodes --yes tables` which is quick. Also check your queues are empty, run `garage stats` to query them. 4. Turn off Garage v0.6 5. Backup the metadata folder of all your nodes: `cd /var/lib/garage ; tar -acf meta-v0.6.tar.zst meta/` diff --git a/doc/book/working-documents/migration-08.md b/doc/book/working-documents/migration-08.md index b7c4c783..17fe078b 100644 --- a/doc/book/working-documents/migration-08.md +++ b/doc/book/working-documents/migration-08.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.7 to 0.8" -weight = 13 +weight = 73 +++ **This guide explains how to migrate to 0.8 if you have an existing 0.7 cluster. diff --git a/doc/book/working-documents/migration-09.md b/doc/book/working-documents/migration-09.md index ba758093..cf5f309c 100644 --- a/doc/book/working-documents/migration-09.md +++ b/doc/book/working-documents/migration-09.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.8 to 0.9" -weight = 12 +weight = 72 +++ **This guide explains how to migrate to 0.9 if you have an existing 0.8 cluster. diff --git a/doc/book/working-documents/migration-1.md b/doc/book/working-documents/migration-1.md index b6c0bb85..9a04d101 100644 --- a/doc/book/working-documents/migration-1.md +++ b/doc/book/working-documents/migration-1.md @@ -1,6 +1,6 @@ +++ title = "Migrating from 0.9 to 1.0" -weight = 11 +weight = 71 +++ **This guide explains how to migrate to 1.0 if you have an existing 0.9 cluster. diff --git a/doc/book/working-documents/migration-2.md b/doc/book/working-documents/migration-2.md new file mode 100644 index 00000000..01d984b3 --- /dev/null +++ b/doc/book/working-documents/migration-2.md @@ -0,0 +1,70 @@ ++++ +title = "Migrating from 1.0 to 2.0" +weight = 70 ++++ + +**This guide explains how to migrate to v2.x if you have an existing v1.x.x cluster. +We don't recommend trying to migrate to v2.x directly from v0.9.x or older.** + +This migration procedure has been tested on several clusters without issues. +However, it is still a *critical procedure* that might cause issues. +**Make sure to back up all your data before attempting it!** + +You might also want to read our [general documentation on upgrading Garage](@/documentation/operations/upgrading.md). + +## Changes introduced in v2.0 + +The following are **breaking changes** in Garage v2.0 that require your attention when migrating: + +- The administration API has been completely reworked. + Some calls to the `/v1/` endpoints will still work but most will not. + New endpoints are prefixed by `/v2/`. **You will need to update all your code that makes use of the admin API.** + +- `replication_mode` is no longer a supported configuration parameter, + please use `replication_factor` and `consistency_mode` instead. + +## Migration procedure + +The migration to Garage v2.0 can be done with almost no downtime, +by restarting all nodes at once in the new version. + +The migration steps are as follows: + +1. Do a `garage repair --all-nodes --yes tables`, check the logs and check that + all data seems to be synced correctly between nodes. If you have time, do + additional `garage repair` procedures (`blocks`, `versions`, `block_refs`, + etc.) + +2. Ensure you have a snapshot of your Garage installation that you can restore + to in case the upgrade goes wrong, with one of the following options: + + - You may use the `garage meta snapshot --all` command + to make a backup snapshot of the metadata directories of your nodes + for backup purposes. Once this command has completed, copy the following + files and directories from the `metadata_dir` of all your nodes + to somewhere safe: `snapshots`, `cluster_layout`, `data_layout`, + `node_key`, `node_key.pub`. (If you have set the `metadata_snapshots_dir` + to a different value in your config file, back up that directory instead.) + + - If you are running a filesystem such as ZFS or BTRFS that support + snapshotting, you can create a filesystem-level snapshot of the `metadata_dir` + of all your nodes to be used as a restoration point if needed. + + - You may also make a back-up manually: turn off each node + individually; back up its metadata folder (for instance, use the following + command if your metadata directory is `/var/lib/garage/meta`: `cd + /var/lib/garage ; tar -acf meta-v1.0.tar.zst meta/`); turn it back on + again. This will allow you to take a backup of all nodes without + impacting global cluster availability. You can do all nodes of a single + zone at once as this does not impact the availability of Garage. + +3. Prepare your updated binaries and configuration files for Garage v2.0. + **Remember to update your configuration file to remove `replication_mode` and replace it by `replication_factor`.** + +4. Shut down all v1.0 nodes simultaneously, and restart them all simultaneously + in v2.0. Use your favorite deployment tool (Ansible, Kubernetes, Nomad) to + achieve this as fast as possible. Garage v2.0 should be in a working state + as soon as enough nodes have started. + +5. Monitor your cluster in the following hours to see if it works well under + your production load. diff --git a/doc/book/working-documents/testing-strategy.md b/doc/book/working-documents/testing-strategy.md index fff706d7..46550b81 100644 --- a/doc/book/working-documents/testing-strategy.md +++ b/doc/book/working-documents/testing-strategy.md @@ -1,6 +1,6 @@ +++ title = "Testing strategy" -weight = 30 +weight = 100 +++ @@ -28,11 +28,11 @@ We should try to test in least invasive ways, i.e. minimize the impact of the te - Not making `garage` a shared library (launch using `execve`, it's perfectly fine) Instead, we should focus on building a clean outer interface for the `garage` binary, -for example loading configuration using environnement variables instead of the configuration file if that's helpfull for writing the tests. +for example loading configuration using environment variables instead of the configuration file if that's helpful for writing the tests. There are two reasons for this: -- Keep the soure code clean and focused +- Keep the source code clean and focused - Test something that is as close as possible as the true garage that will actually be running Reminder: rules of simplicity, concerning changes to Garage's source code. @@ -71,5 +71,3 @@ Interesting blog posts on the blog of the Sled database: Misc: - [mutagen](https://github.com/llogiq/mutagen) - mutation testing is a way to assert our test quality by mutating the code and see if the mutation makes the tests fail - [fuzzing](https://rust-fuzz.github.io/book/) - cargo supports fuzzing, it could be a way to test our software reliability in presence of garbage data. - - diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md index 3ee948cb..778b4fa8 100644 --- a/doc/drafts/admin-api.md +++ b/doc/drafts/admin-api.md @@ -13,8 +13,12 @@ We will bump the version numbers prefixed to each API endpoint each time the syn or semantics change, meaning that code that relies on these endpoints will break when changes are introduced. -The Garage administration API was introduced in version 0.7.2, this document -does not apply to older versions of Garage. +The Garage administration API was introduced in version 0.7.2, and was +changed several times. + +**THIS DOCUMENT IS DEPRECATED.** We now have an OpenAPI spec which is automatically generated +from Garage's source code and is always up-to-date. See `doc/api/garage-admin-v2.html`. +Text in this document is no longer kept in sync with the admin API's actual behavior. ## Access control @@ -52,34 +56,28 @@ Returns an HTTP status 200 if the node is ready to answer user's requests, and an HTTP status 503 (Service Unavailable) if there are some partitions for which a quorum of nodes is not available. A simple textual message is also returned in a body with content-type `text/plain`. -See `/v1/health` for an API that also returns JSON output. +See `/v2/GetClusterHealth` for an API that also returns JSON output. + +### Other special endpoints + +#### CheckDomain `GET /check?domain=` + +Checks whether this Garage cluster serves a website for domain ``. +Returns HTTP 200 Ok if yes, or HTTP 4xx if no website is available for this domain. ### Cluster operations -#### GetClusterStatus `GET /v1/status` +#### GetClusterStatus `GET /v2/GetClusterStatus` Returns the cluster's current status in JSON, including: -- ID of the node being queried and its version of the Garage daemon - Live nodes - Currently configured cluster layout -- Staged changes to the cluster layout Example response body: ```json { - "node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df", - "garageVersion": "v1.3.0", - "garageFeatures": [ - "k2v", - "lmdb", - "sqlite", - "metrics", - "bundled-libs" - ], - "rustVersion": "1.68.0", - "dbEngine": "LMDB (using Heed crate)", "layoutVersion": 5, "nodes": [ { @@ -169,7 +167,7 @@ Example response body: } ``` -#### GetClusterHealth `GET /v1/health` +#### GetClusterHealth `GET /v2/GetClusterHealth` Returns the cluster's current health in JSON format, with the following variables: @@ -178,7 +176,7 @@ Returns the cluster's current health in JSON format, with the following variable - degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions - unavailable: a quorum of write nodes is not available for some partitions - `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started -- `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to +- `connectedNodes`: the number of nodes this Garage node currently has an open connection to - `storageNodes`: the number of storage nodes currently registered in the cluster layout - `storageNodesOk`: the number of storage nodes to which a connection is currently open - `partitions`: the total number of partitions of the data (currently always 256) @@ -202,7 +200,7 @@ Example response body: } ``` -#### ConnectClusterNodes `POST /v1/connect` +#### ConnectClusterNodes `POST /v2/ConnectClusterNodes` Instructs this Garage node to connect to other Garage nodes at specified addresses. @@ -232,7 +230,7 @@ Example response: ] ``` -#### GetClusterLayout `GET /v1/layout` +#### GetClusterLayout `GET /v2/GetClusterLayout` Returns the cluster's current layout in JSON, including: @@ -293,7 +291,7 @@ Example response body: } ``` -#### UpdateClusterLayout `POST /v1/layout` +#### UpdateClusterLayout `POST /v2/UpdateClusterLayout` Send modifications to the cluster layout. These modifications will be included in the staged role changes, visible in subsequent calls @@ -330,7 +328,7 @@ This returns the new cluster layout with the proposed staged changes, as returned by GetClusterLayout. -#### ApplyClusterLayout `POST /v1/layout/apply` +#### ApplyClusterLayout `POST /v2/ApplyClusterLayout` Applies to the cluster the layout changes currently registered as staged layout changes. @@ -350,23 +348,11 @@ existing layout in the cluster. This returns the message describing all the calculations done to compute the new layout, as well as the description of the layout as returned by GetClusterLayout. -#### RevertClusterLayout `POST /v1/layout/revert` +#### RevertClusterLayout `POST /v2/RevertClusterLayout` Clears all of the staged layout changes. -Request body format: - -```json -{ - "version": 13 -} -``` - -Reverting the staged changes is done by incrementing the version number -and clearing the contents of the staged change list. -Similarly to the CLI, the body must include the incremented -version number, which MUST be 1 + the value of the currently -existing layout in the cluster. +This requests contains an empty body. This returns the new cluster layout with all changes reverted, as returned by GetClusterLayout. @@ -374,7 +360,7 @@ as returned by GetClusterLayout. ### Access key operations -#### ListKeys `GET /v1/key` +#### ListKeys `GET /v2/ListKeys` Returns all API access keys in the cluster. @@ -393,8 +379,8 @@ Example response: ] ``` -#### GetKeyInfo `GET /v1/key?id=` -#### GetKeyInfo `GET /v1/key?search=` +#### GetKeyInfo `GET /v2/GetKeyInfo?id=` +#### GetKeyInfo `GET /v2/GetKeyInfo?search=` Returns information about the requested API access key. @@ -402,7 +388,7 @@ If `id` is set, the key is looked up using its exact identifier (faster). If `search` is set, the key is looked up using its name or prefix of identifier (slower, all keys are enumerated to do this). -Optionnally, the query parameter `showSecretKey=true` can be set to reveal the +Optionally, the query parameter `showSecretKey=true` can be set to reveal the associated secret access key. Example response: @@ -468,7 +454,7 @@ Example response: } ``` -#### CreateKey `POST /v1/key` +#### CreateKey `POST /v2/CreateKey` Creates a new API access key. @@ -483,7 +469,7 @@ Request body format: This returns the key info, including the created secret key, in the same format as the result of GetKeyInfo. -#### ImportKey `POST /v1/key/import` +#### ImportKey `POST /v2/ImportKey` Imports an existing API key. This will check that the imported key is in the valid format, i.e. @@ -501,7 +487,7 @@ Request body format: This returns the key info in the same format as the result of GetKeyInfo. -#### UpdateKey `POST /v1/key?id=` +#### UpdateKey `POST /v2/UpdateKey?id=` Updates information about the specified API access key. @@ -523,14 +509,14 @@ The possible flags in `allow` and `deny` are: `createBucket`. This returns the key info in the same format as the result of GetKeyInfo. -#### DeleteKey `DELETE /v1/key?id=` +#### DeleteKey `POST /v2/DeleteKey?id=` Deletes an API access key. ### Bucket operations -#### ListBuckets `GET /v1/bucket` +#### ListBuckets `GET /v2/ListBuckets` Returns all storage buckets in the cluster. @@ -572,8 +558,8 @@ Example response: ] ``` -#### GetBucketInfo `GET /v1/bucket?id=` -#### GetBucketInfo `GET /v1/bucket?globalAlias=` +#### GetBucketInfo `GET /v2/GetBucketInfo?id=` +#### GetBucketInfo `GET /v2/GetBucketInfo?globalAlias=` Returns information about the requested storage bucket. @@ -616,7 +602,7 @@ Example response: } ``` -#### CreateBucket `POST /v1/bucket` +#### CreateBucket `POST /v2/CreateBucket` Creates a new storage bucket. @@ -656,7 +642,7 @@ or no alias at all. Technically, you can also specify both `globalAlias` and `localAlias` and that would create two aliases, but I don't see why you would want to do that. -#### UpdateBucket `PUT /v1/bucket?id=` +#### UpdateBucket `POST /v2/UpdateBucket?id=` Updates configuration of the given bucket. @@ -688,16 +674,38 @@ In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or to remove the quotas. An absent value will be considered the same as a `null`. It is not possible to change only one of the two quotas. -#### DeleteBucket `DELETE /v1/bucket?id=` +#### DeleteBucket `POST /v2/DeleteBucket?id=` Deletes a storage bucket. A bucket cannot be deleted if it is not empty. Warning: this will delete all aliases associated with the bucket! +#### CleanupIncompleteUploads `POST /v2/CleanupIncompleteUploads` + +Cleanup all incomplete uploads in a bucket that are older than a specified number +of seconds. + +Request body format: + +```json +{ + "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "olderThanSecs": 3600 +} +``` + +Response format + +```json +{ + "uploadsDeleted": 12 +} +``` + ### Operations on permissions for keys on buckets -#### BucketAllowKey `POST /v1/bucket/allow` +#### AllowBucketKey `POST /v2/AllowBucketKey` Allows a key to do read/write/owner operations on a bucket. @@ -718,7 +726,7 @@ Request body format: Flags in `permissions` which have the value `true` will be activated. Other flags will remain unchanged. -#### BucketDenyKey `POST /v1/bucket/deny` +#### DenyBucketKey `POST /v2/DenyBucketKey` Denies a key from doing read/write/owner operations on a bucket. @@ -742,19 +750,35 @@ Other flags will remain unchanged. ### Operations on bucket aliases -#### GlobalAliasBucket `PUT /v1/bucket/alias/global?id=&alias=` +#### AddBucketAlias `POST /v2/AddBucketAlias` -Empty body. Creates a global alias for a bucket. +Creates an alias for a bucket in the namespace of a specific access key. +To create a global alias, specify the `globalAlias` field. +To create a local alias, specify the `localAlias` and `accessKeyId` fields. -#### GlobalUnaliasBucket `DELETE /v1/bucket/alias/global?id=&alias=` +Request body format: -Removes a global alias for a bucket. +```json +{ + "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "globalAlias": "my-bucket" +} +``` -#### LocalAliasBucket `PUT /v1/bucket/alias/local?id=&accessKeyId=&alias=` +or: -Empty body. Creates a local alias for a bucket in the namespace of a specific access key. +```json +{ + "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "localAlias": "my-bucket" +} +``` -#### LocalUnaliasBucket `DELETE /v1/bucket/alias/local?id=&accessKeyId&alias=` +#### RemoveBucketAlias `POST /v2/RemoveBucketAlias` -Removes a local alias for a bucket in the namespace of a specific access key. +Removes an alias for a bucket in the namespace of a specific access key. +To remove a global alias, specify the `globalAlias` field. +To remove a local alias, specify the `localAlias` and `accessKeyId` fields. +Request body format: same as AddBucketAlias. diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index f9696717..b16628e2 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -35,7 +35,7 @@ Triples in K2V are constituted of three fields: partition key in which the client wants to read/delete lists of items - a sort key (`sk`), an utf8 string that defines the index of the triplet inside its - partition; triplets are uniquely idendified by their partition key + sort key + partition; triplets are uniquely identified by their partition key + sort key - a value (`v`), an opaque binary blob associated to the partition key + sort key; they are transmitted as binary when possible but in most case in the JSON API @@ -74,7 +74,7 @@ are obsoleted by the new write. **Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a simple case where there was no insertion in-between reading the value -mentionned above and writing `v4`, and supposing that node2 receives the +mentioned above and writing `v4`, and supposing that node2 receives the InsertItem query: - `node2` generates a timestamp `t4` such that `t4 > t3`. @@ -332,7 +332,7 @@ Inserts a single item. This request does not use JSON, the body is sent directly To supersede previous values, the HTTP header `X-Garage-Causality-Token` should be set to the causality token returned by a previous read on this key. This -header can be ommitted for the first writes to the key. +header can be omitted for the first writes to the key. Example query: @@ -397,7 +397,7 @@ smallest partition key that exists. It returns partition keys in increasing order, or decreasing order if `reverse` is set to `true`, and stops when either of the following conditions is met: -1. if `end` is specfied, the partition key `end` is reached or surpassed (if it +1. if `end` is specified, the partition key `end` is reached or surpassed (if it is reached exactly, it is not included in the result) 2. if `limit` is specified, `limit` partition keys have been listed @@ -491,7 +491,7 @@ the triplet is inserted for the first time, the causality token should be set to The value is expected to be a base64-encoded binary blob. The value `null` can also be used to delete the triplet while preserving causality information: this -allows to know if a delete has happenned concurrently with an insert, in which +allows to know if a delete has happened concurrently with an insert, in which case both are preserved and returned on reads (see below). Partition keys and sort keys are utf8 strings which are stored sorted by @@ -540,7 +540,7 @@ JSON struct with the following fields: For each of the searches, triplets are listed and returned separately. The semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The -additionnal parameter `singleItem` allows to get a single item, whose sort key +additional parameter `singleItem` allows to get a single item, whose sort key is the one given in `start`. Parameters `conflictsOnly` and `tombstones` control additional filters on the items that are returned. diff --git a/doc/optimal_layout_report/geodistrib.tex b/doc/optimal_layout_report/geodistrib.tex index bb6f0391..56d4c925 100644 --- a/doc/optimal_layout_report/geodistrib.tex +++ b/doc/optimal_layout_report/geodistrib.tex @@ -59,7 +59,7 @@ To link the effective storage capacity of the cluster to partition assignment, w \end{equation} This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored blocks. -Every node $n$ wille store some number $p_n$ of partitions (it is the number of partitions $p$ such that $n$ appears in the $\alpha_p$). Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/p_n$. This remark leads us to define the optimal size that we will want to maximize: +Every node $n$ will store some number $p_n$ of partitions (it is the number of partitions $p$ such that $n$ appears in the $\alpha_p$). Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/p_n$. This remark leads us to define the optimal size that we will want to maximize: \begin{equation} \label{eq:optimal} diff --git a/doc/optimal_layout_report/optimal_layout.tex b/doc/optimal_layout_report/optimal_layout.tex index 005e7b50..42c9d3fd 100644 --- a/doc/optimal_layout_report/optimal_layout.tex +++ b/doc/optimal_layout_report/optimal_layout.tex @@ -38,7 +38,7 @@ We would like to compute an assignment of nodes to partitions. We will impose so \end{equation} This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored large objects. -Every node $n$ wille store some number $k_n$ of partitions. Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/k_n$. This remark leads us to define the optimal size that we will want to maximize: +Every node $n$ will store some number $k_n$ of partitions. Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/k_n$. This remark leads us to define the optimal size that we will want to maximize: \begin{equation} \label{eq:optimal} @@ -62,7 +62,7 @@ For now, in the following, we ask the following redundancy constraint: \textbf{Mode 3:} every partition needs to be assignated to three nodes. We try to spread the three nodes over different zones as much as possible. -\textbf{Warning:} This is a working document written incrementaly. The last version of the algorithm is the \textbf{parametric assignment} described in the next section. +\textbf{Warning:} This is a working document written incrementally. The last version of the algorithm is the \textbf{parametric assignment} described in the next section. \section{Computation of a parametric assignment} @@ -318,7 +318,7 @@ $$ $$ which is the universal upper bound on $s^*$. Hence any optimal utilization $(n_v)$ can be modified to another optimal utilization such that $n_v\ge \hat{n}_v$ -Because $z_0$ cannot store more than $N$ partition occurences, in any assignment, at least $2N$ partitions must be assignated to the zones $Z\setminus\{z_0\}$. Let $C_0 = C-c_{z_0}$. Suppose that there exists a zone $z_1\neq z_0$ such that $c_{z_1}/C_0 \ge 1/2$. Then, with the same argument as for $z_0$, we can define +Because $z_0$ cannot store more than $N$ partition occurrences, in any assignment, at least $2N$ partitions must be assignated to the zones $Z\setminus\{z_0\}$. Let $C_0 = C-c_{z_0}$. Suppose that there exists a zone $z_1\neq z_0$ such that $c_{z_1}/C_0 \ge 1/2$. Then, with the same argument as for $z_0$, we can define $$\hat{n}_v = \left\lfloor\frac{c_v}{c_{z_1}}N\right\rfloor$$ for every $v\in z_1$. @@ -351,7 +351,7 @@ Define $3N$ tokens $t_1,\ldots, t_{3N}\in V$ as follows: Then for $1\le i \le N$, define the triplet $T_i$ to be $(t_i, t_{i+N}, t_{i+2N})$. Since the same nodes of a zone appear contiguously, the three nodes of a triplet must belong to three distinct zones. -However simple, this solution to go from an utilization to an assignment has the drawback of not spreading the triplets: a node will tend to be associated to the same two other nodes for many partitions. Hence, during data transfer, it will tend to use only two link, instead of spreading the bandwith use over many other links to other nodes. To achieve this goal, we will reframe the search of an assignment as a flow problem. and in the flow algorithm, we will introduce randomness in the order of exploration. This will be sufficient to obtain a good dispersion of the triplets. +However simple, this solution to go from an utilization to an assignment has the drawback of not spreading the triplets: a node will tend to be associated to the same two other nodes for many partitions. Hence, during data transfer, it will tend to use only two link, instead of spreading the bandwidth use over many other links to other nodes. To achieve this goal, we will reframe the search of an assignment as a flow problem. and in the flow algorithm, we will introduce randomness in the order of exploration. This will be sufficient to obtain a good dispersion of the triplets. \begin{figure} \centering @@ -436,7 +436,7 @@ T_3=(b,c,d'). $$ One can check that in this case, it is impossible to minimize both the number of zone and node changes. -Because of the redundancy constraint, we cannot use a greedy algorithm to just replace nodes in the triplets to try to get the new utilization rate: this could lead to blocking situation where there is still a hole to fill in a triplet but no available node satisfies the zone separation constraint. To circumvent this issue, we propose an algorithm based on finding cycles in a graph encoding of the assignment. As in section \ref{sec:opt_assign}, we can explore the neigbours in a random order in the graph algorithms, to spread the triplets distribution. +Because of the redundancy constraint, we cannot use a greedy algorithm to just replace nodes in the triplets to try to get the new utilization rate: this could lead to blocking situation where there is still a hole to fill in a triplet but no available node satisfies the zone separation constraint. To circumvent this issue, we propose an algorithm based on finding cycles in a graph encoding of the assignment. As in section \ref{sec:opt_assign}, we can explore the neighbours in a random order in the graph algorithms, to spread the triplets distribution. \subsubsection{Minimizing the zone discrepancy} @@ -550,8 +550,8 @@ We give some considerations of worst case complexity for these algorithms. In th Algorithm \ref{alg:util} can be implemented with complexity $O(\#V^2)$. The complexity of the function call at line \ref{lin:subutil} is $O(\#V)$. The difference between the sum of the subutilizations and $3N$ is at most the sum of the rounding errors when computing the $\hat{n}_v$. Hence it is bounded by $\#V$ and the loop at line \ref{lin:loopsub} is iterated at most $\#V$ times. Finding the minimizing $v$ at line \ref{lin:findmin} takes $O(\#V)$ operations (naively, we could also use a heap). Algorithm \ref{alg:opt} can be implemented with complexity $O(N^3\times \#Z)$. The flow graph has $O(N+\#Z)$ vertices and $O(N\times \#Z)$ edges. Dinic's algorithm has complexity $O(\#\mathrm{Vertices}^2\#\mathrm{Edges})$ hence in our case it is $O(N^3\times \#Z)$. - -Algorithm \ref{alg:mini} can be implented with complexity $O(N^3\# Z)$ under \eqref{hyp:A} and $O(N^3 \#Z \#V)$ under \eqref{hyp:B}. + +Algorithm \ref{alg:mini} can be implemented with complexity $O(N^3\# Z)$ under \eqref{hyp:A} and $O(N^3 \#Z \#V)$ under \eqref{hyp:B}. The graph $G_T$ has $O(N)$ vertices and $O(N\times \#Z)$ edges under assumption \eqref{hyp:A} and respectively $O(N\times \#Z)$ vertices and $O(N\times \#V)$ edges under assumption \eqref{hyp:B}. The loop at line \ref{lin:repeat} is iterated at most $N$ times since the distance between $T$ and $T'$ decreases at every iteration. Bellman-Ford algorithm has complexity $O(\#\mathrm{Vertices}\#\mathrm{Edges})$, which in our case amounts to $O(N^2\# Z)$ under \eqref{hyp:A} and $O(N^2 \#Z \#V)$ under \eqref{hyp:B}. \begin{algorithm} @@ -637,7 +637,7 @@ We try to maximize $s^*$ defined in \eqref{eq:optimal}. So we can compute the op \subsection{Computation of a candidate assignment} -To compute a candidate assignment (that does not optimize zone spreading nor distance to a previous assignment yet), we can use the folowing flow problem. +To compute a candidate assignment (that does not optimize zone spreading nor distance to a previous assignment yet), we can use the following flow problem. Define the oriented weighted graph $(X,E)$. The set of vertices $X$ contains the source $\mathbf{s}$, the sink $\mathbf{t}$, vertices $\mathbf{x}_p, \mathbf{u}^+_p, \mathbf{u}^-_p$ for every partition $p$, vertices $\mathbf{y}_{p,z}$ for every partition $p$ and zone $z$, and vertices $\mathbf{z}_v$ for every node $v$. @@ -680,14 +680,14 @@ Given the flow $f$, let $G_f=(X',E_f)$ be the multi-graph where $X' = X\setminus \end{itemize} To summarize, arcs are oriented left to right if they correspond to a presence of flow in $f$, and right to left if they correspond to an absence of flow. They are positively weighted if we want them to stay at their current state, and negatively if we want them to switch. Let us compute the weight of such graph. -\begin{multline*} +\begin{multiline*} w(G_f) = \sum_{e\in E_f} w(e_f) \\ = (\alpha - \beta -\gamma) N_1 + (\alpha +\beta - \gamma) N_2 + (\alpha+\beta+\gamma) N_3 \\ + \#V\times N - 4 \sum_p 3-\#(T_p\cap T'_p) \\ =(\#V-12+\alpha-\beta-\gamma)\times N + 4Q_V + 2\beta N_2 + 2(\beta+\gamma) N_3 \\ -\end{multline*} +\end{multiline*} As for the mode 3-strict, one can check that the difference of two such graphs corresponding to the same $(n_v)$ is always eulerian. Hence we can navigate in this class with the same greedy algorithm that discovers positive cycles and flips them. diff --git a/doc/talks/2025-10-06-josy/.gitignore b/doc/talks/2025-10-06-josy/.gitignore new file mode 100644 index 00000000..9f1f00e6 --- /dev/null +++ b/doc/talks/2025-10-06-josy/.gitignore @@ -0,0 +1,17 @@ +* + +!*.txt +!*.md + +!assets + +!.gitignore +!*.svg +!*.png +!*.jpg +!*.tex +!Makefile +!.gitignore +!assets/*.drawio.pdf + +!talk.pdf diff --git a/doc/talks/2025-10-06-josy/Makefile b/doc/talks/2025-10-06-josy/Makefile new file mode 100644 index 00000000..f0aae6a8 --- /dev/null +++ b/doc/talks/2025-10-06-josy/Makefile @@ -0,0 +1,19 @@ +ASSETS=../assets/lattice/lattice1.pdf_tex \ + ../assets/lattice/lattice2.pdf_tex \ + ../assets/lattice/lattice3.pdf_tex \ + ../assets/lattice/lattice4.pdf_tex \ + ../assets/lattice/lattice5.pdf_tex \ + ../assets/lattice/lattice6.pdf_tex \ + ../assets/lattice/lattice7.pdf_tex \ + ../assets/lattice/lattice8.pdf_tex \ + ../assets/logos/deuxfleurs.pdf \ + ../assets/timeline-22-24.pdf + +talk.pdf: talk.tex $(ASSETS) + pdflatex talk.tex + +%.pdf: %.svg + inkscape -D -z --file=$^ --export-pdf=$@ + +%.pdf_tex: %.svg + inkscape -D -z --file=$^ --export-pdf=$@ --export-latex diff --git a/doc/talks/2025-10-06-josy/talk.pdf b/doc/talks/2025-10-06-josy/talk.pdf new file mode 100644 index 00000000..2194908a Binary files /dev/null and b/doc/talks/2025-10-06-josy/talk.pdf differ diff --git a/doc/talks/2025-10-06-josy/talk.tex b/doc/talks/2025-10-06-josy/talk.tex new file mode 100644 index 00000000..aa483766 --- /dev/null +++ b/doc/talks/2025-10-06-josy/talk.tex @@ -0,0 +1,702 @@ +\nonstopmode +\documentclass[aspectratio=169,xcolor={svgnames}]{beamer} +\usepackage[utf8]{inputenc} +% \usepackage[frenchb]{babel} +\usepackage{amsmath} +\usepackage{mathtools} +\usepackage{breqn} +\usepackage{multirow} +\usetheme{boxes} +\usepackage{graphicx} +\usepackage{import} +\usepackage{adjustbox} +\usepackage[absolute,overlay]{textpos} +%\useoutertheme[footline=authortitle,subsection=false]{miniframes} +%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes} +\useoutertheme{infolines} +\setbeamertemplate{headline}{} + +\beamertemplatenavigationsymbolsempty + +\definecolor{TitleOrange}{RGB}{255,137,0} +\setbeamercolor{title}{fg=TitleOrange} +\setbeamercolor{frametitle}{fg=TitleOrange} + +\definecolor{ListOrange}{RGB}{255,145,5} +\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$} + +\definecolor{verygrey}{RGB}{70,70,70} +\setbeamercolor{normal text}{fg=verygrey} + + +\usepackage{tabu} +\usepackage{multicol} +\usepackage{vwcol} +\usepackage{stmaryrd} +\usepackage{graphicx} + +\usepackage[normalem]{ulem} + +\AtBeginSection[]{ + \begin{frame} + \vfill + \centering + \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} + \usebeamerfont{title}\insertsectionhead\par% + \end{beamercolorbox} + \vfill + \end{frame} +} + +\title{Garage, an S3 backend as reliable as possible} +\author{Garage Authors} +\date{JoSy S3, 2025-10-08} + +\begin{document} + +\begin{frame} + \centering + \includegraphics[width=.3\linewidth]{../../sticker/Garage.png} + \vspace{1em} + + {\large\bf Garage, an S3 backend as reliable as possible} + \vspace{1em} + + \url{https://garagehq.deuxfleurs.fr/}\\ + \url{mailto:garagehq@deuxfleurs.fr}\\ + \texttt{\#garage:deuxfleurs.fr} on Matrix +\end{frame} + + +\section{Meet Garage} + +\begin{frame} + \frametitle{A non-profit initiative} + + + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.5\linewidth, valign=t]{../assets/logos/deuxfleurs.pdf} + \end{column} + \begin{column}{.8\textwidth} + \textbf{Part of a degrowth initiative}\\ + Garage has been created at Deuxfleurs where we experiment running Internet services without datacenter on commodity and refurbished hardware. + \end{column} + + \end{columns} + \vspace{2em} + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.5\linewidth, valign=t]{../assets/community.png} + \end{column} + \begin{column}{.8\textwidth} + \textbf{Developed by a community}\\ + {\small Some recent contributors: Arthur C, Charles H, dongdigua, Etienne L, Jonah A, Julien K, Lapineige, MagicRR, Milas B, Niklas M, RockWolf, Schwitzd, trinity-1686a, Xavier S, babykart, Baptiste J, eddster2309, James O'C, Joker9944, Maximilien R, Renjaya RZ, Yureka...} + \end{column} + + \end{columns} + \vspace{2em} + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.5\linewidth, valign=t]{../assets/logos/AGPLv3_Logo.png} + \end{column} + \begin{column}{.8\textwidth} + \textbf{Owned by nobody, open-core is impossible, zero VC money}\\ + AGPL + no Contributor License Agreement = Garage ownership spreads among hundredth of contributors. + \end{column} + + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Getting support for Garage} + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.4\linewidth, valign=t]{../assets/alex.jpg} + \end{column} + \begin{column}{.4\textwidth} + \textbf{Alex Auvolat}\\ + PhD; co-founder of Deuxfleurs\\ + Garage maintainer, Freelance + \end{column} + \begin{column}{.3\textwidth} + \centering + \adjincludegraphics[width=.4\linewidth, valign=t]{../assets/support.png} + \end{column} + \begin{column}{.1\textwidth} + ~ + \end{column} + \end{columns} + \vspace{2em} + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.4\linewidth, valign=t]{../assets/quentin.jpg} + \end{column} + \begin{column}{.4\textwidth} + \textbf{Quentin Dufour}\\ + PhD; co-founder of Deuxfleurs\\ + Garage contributor, Freelance + \end{column} + \begin{column}{.4\textwidth} + For support requests, write at: \\ + \url{garagehq@deuxfleurs.fr} + \end{column} + \end{columns} + \vspace{2em} + \begin{columns}[t] + \begin{column}{.2\textwidth} + \centering + \adjincludegraphics[width=.4\linewidth, valign=t]{../assets/armael.jpg} + \end{column} + \begin{column}{.4\textwidth} + \textbf{Armaël Guéneau}\\ + PhD; member of Deuxfleurs\\ + Garage contributor, Freelance + \end{column} + \begin{column}{.4\textwidth} + Eligible: email support, architecture design, specific feature development, etc. + \end{column} + \end{columns} + + +\end{frame} + +\begin{frame} + \frametitle{Our initial goal} + + \centering + \Large + + Being a self-sovereign community to be free of our degrowth choice + + $\big\downarrow$ + + As web citizens, datacenters are big black boxes. \\ + We want to leave them to autonoumously manage our servers. + + $\big\downarrow$ + + We want reliable services without relying on dedicated hardware or places. + +\end{frame} + +\begin{frame} + \frametitle{Building a resilient system with cheap stuff} + + \only<1,4-7>{ + \begin{itemize} + \item \textcolor<5->{gray}{Commodity hardware (e.g. old desktop PCs)\\ + \vspace{.5em} + \visible<4->{{\footnotesize (can die at any time)}}} + \vspace{1.5em} + \item<5-> \textcolor<7->{gray}{Regular Internet (e.g. FTTB, FTTH) and power grid connections\\ + \vspace{.5em} + \visible<6->{{\footnotesize (can be unavailable randomly)}}} + \vspace{1.5em} + \item<7-> \textbf{Geographical redundancy} (multi-site replication) + \end{itemize} + } + \only<2>{ + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/neptune.jpg} + \end{center} + } + \only<3>{ + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/atuin.jpg} + \end{center} + } + \only<8>{ + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/inframap_jdll2023.pdf} + \end{center} + } +\end{frame} + +\begin{frame} + \frametitle{Object storage: a crucial component} + \begin{center} + \includegraphics[height=6em]{../assets/logos/Amazon-S3.jpg} + \hspace{3em} + \visible<2->{\includegraphics[height=5em]{../assets/logos/minio.png}} + \hspace{3em} + \visible<3>{\includegraphics[height=6em]{../../logo/garage_hires_crop.png}} + \end{center} + \vspace{1em} + S3: a de-facto standard, many compatible applications + + \vspace{1em} + \visible<2->{MinIO is self-hostable but not suited for geo-distributed deployments} + + \vspace{1em} + \visible<3->{\textbf{Garage is a self-hosted drop-in replacement for the Amazon S3 object store}} +\end{frame} + +\begin{frame} + \frametitle{CRDTs / weak consistency instead of consensus} + + \underline{Internally, Garage uses only CRDTs} (conflict-free replicated data types) + + \vspace{2em} + Why not Raft, Paxos, ...? Issues of consensus algorithms: + + \vspace{1em} + \begin{itemize} + \item<2-> \textbf{Software complexity} + \vspace{1em} + \item<3-> \textbf{Performance issues:} + \vspace{.5em} + \begin{itemize} + \item<4-> The leader is a \textbf{bottleneck} for all requests\\ + \vspace{.5em} + \item<5-> \textbf{Sensitive to higher latency} between nodes + \vspace{.5em} + \item<6-> \textbf{Takes time to reconverge} when disrupted (e.g. node going down) + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The data model of object storage} + Object storage is basically a \textbf{key-value store}: + \vspace{.5em} + + {\scriptsize + \begin{center} + \begin{tabular}{|l|p{7cm}|} + \hline + \textbf{Key: file path + name} & \textbf{Value: file data + metadata} \\ + \hline + \hline + \texttt{index.html} & + \texttt{Content-Type: text/html; charset=utf-8} \newline + \texttt{Content-Length: 24929} \newline + \texttt{} \\ + \hline + \texttt{img/logo.svg} & + \texttt{Content-Type: text/svg+xml} \newline + \texttt{Content-Length: 13429} \newline + \texttt{} \\ + \hline + \texttt{download/index.html} & + \texttt{Content-Type: text/html; charset=utf-8} \newline + \texttt{Content-Length: 26563} \newline + \texttt{} \\ + \hline + \end{tabular} + \end{center} + } + + \vspace{1em} + \begin{itemize} + \item<2> Maps well to CRDT data types + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Performance gains in practice} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/perf/endpoint_latency_0.7_0.8_minio.png} + \end{center} +\end{frame} + +% ======================================== OPERATING +% ======================================== OPERATING +% ======================================== OPERATING + + +\section{Production clusters} + +\begin{frame} + \frametitle{Deployment kinds} + + \includegraphics[width=.9\linewidth]{../assets/cluster_kind.png} + \vspace{1em} + +\end{frame} + +\begin{frame} + \frametitle{How big they are?} + + \includegraphics[width=.9\linewidth]{../assets/cluster_size.png} + \vspace{1em} + + \textit{"Petabyte storage setup for a video site. Nginx as CDN in-front using garage-s3-website feature. Each storage node has ~64TB storage with raid10, no replication within garage. 25gbit nic. haproxy to loadbalance across 5 nodes. mostly reads with very few writes."} + + \vspace{1em} + \textit{"We currently manage 7 Garage nodes, 28TB total storage, 6M blocks for 3M objects and 4TB of object data. We have been running Garage in production for 2.5 years."} + +\end{frame} + +\begin{frame} + \frametitle{Operating Garage} + \begin{center} + \only<1-2>{ + \includegraphics[width=.9\linewidth]{../assets/screenshots/garage_status_0.10.png} + \\\vspace{1em} + \visible<2>{\includegraphics[width=.9\linewidth]{../assets/screenshots/garage_status_unhealthy_0.10.png}} + } + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Garage's architecture} + \begin{center} + \only<1>{\includegraphics[width=.45\linewidth]{../assets/garage.drawio.pdf}}% + \only<2>{\includegraphics[width=.6\linewidth]{../assets/garage_sync.drawio.pdf}}% + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Digging deeper} + \begin{center} + \only<1>{\includegraphics[width=.9\linewidth]{../assets/screenshots/garage_stats_0.10.png}} + \only<2>{\includegraphics[width=.5\linewidth]{../assets/screenshots/garage_worker_list_0.10.png}} + \only<3>{\includegraphics[width=.6\linewidth]{../assets/screenshots/garage_worker_param_0.10.png}} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Potential limitations and bottlenecks} + \begin{itemize} + \item Global: + \begin{itemize} + \item Max. $\sim$100 nodes per cluster (excluding gateways) + \end{itemize} + \vspace{1em} + \item Metadata: + \begin{itemize} + \item One big bucket = bottleneck, object list on 3 nodes only + \end{itemize} + \vspace{1em} + \item Block manager: + \begin{itemize} + \item Lots of small files on disk + \item Processing the resync queue can be slow + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Deployment advice for very large clusters} + \begin{itemize} + \item Metadata storage: + \begin{itemize} + \item ZFS mirror (x2) on fast NVMe + \item Use LMDB storage engine + \end{itemize} + \vspace{.5em} + \item Data block storage: + \begin{itemize} + \item Use Garage's native multi-HDD support + \item XFS on individual drives + \item Increase block size (1MB $\to$ 10MB, requires more RAM and good networking) + \item Tune \texttt{resync-tranquility} and \texttt{resync-worker-count} dynamically + \end{itemize} + \vspace{.5em} + \item Other : + \begin{itemize} + \item Split data over several buckets + \item Use less than 100 storage nodes + \item Use gateway nodes + \end{itemize} + \vspace{.5em} + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{Focus on Deuxfleurs} + + Host institutional websites, partnership with a web agency. + Matrix media backend. + + Plan to use it as an email backend for an internally developed email server. + +\end{frame} + + +% ======================================== TIMELINE +% ======================================== TIMELINE +% ======================================== TIMELINE + +\section{Recent developments} + +% ====================== v0.7.0 =============================== + +\begin{frame} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/tl.drawio.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{April 2022 - Garage v0.7.0} + Focus on \underline{observability and ecosystem integration} + \vspace{2em} + \begin{itemize} + \item \textbf{Monitoring:} metrics and traces, using OpenTelemetry + \vspace{1em} + \item Replication modes with 1 or 2 copies / weaker consistency + \vspace{1em} + \item Kubernetes integration for node discovery + \vspace{1em} + \item Admin API (v0.7.2) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Metrics (Prometheus + Grafana)} + \begin{center} + \includegraphics[width=.9\linewidth]{../assets/screenshots/grafana_dashboard.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Traces (Jaeger)} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/screenshots/jaeger_listobjects.png} + \end{center} +\end{frame} + +% ====================== v0.8.0 =============================== + +\begin{frame} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/tl.drawio.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{November 2022 - Garage v0.8.0} + Focus on \underline{performance} + \vspace{2em} + \begin{itemize} + \item \textbf{Alternative metadata DB engines} (LMDB, Sqlite) + \vspace{1em} + \item \textbf{Performance improvements:} block streaming, various optimizations... + \vspace{1em} + \item Bucket quotas (max size, max \#objects) + \vspace{1em} + \item Quality of life improvements, observability, etc. + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{About metadata DB engines} + \textbf{Issues with Sled:} + \vspace{1em} + \begin{itemize} + \item Huge files on disk + \vspace{.5em} + \item Unpredictable performance, especially on HDD + \vspace{.5em} + \item API limitations + \vspace{.5em} + \item Not actively maintained + \end{itemize} + + \vspace{2em} + \textbf{LMDB:} very stable, good performance, file size is reasonable\\ + \textbf{Sqlite} also available as a second choice + + \vspace{1em} + Sled will be removed in Garage v1.0 +\end{frame} + +\begin{frame} + \frametitle{DB engine performance comparison} + \begin{center} + \includegraphics[width=.6\linewidth]{../assets/perf/db_engine.png} + \end{center} + NB: Sqlite was slow due to synchronous mode, now configurable +\end{frame} + +\begin{frame} + \frametitle{Block streaming} + \begin{center} + \only<1>{\includegraphics[width=.8\linewidth]{../assets/schema-streaming-1.png}} + \only<2>{\includegraphics[width=.8\linewidth]{../assets/schema-streaming-2.png}} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{TTFB benchmark} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/perf/ttfb.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Throughput benchmark} + \begin{center} + \includegraphics[width=.7\linewidth]{../assets/perf/io-0.7-0.8-minio.png} + \end{center} +\end{frame} + +% ====================== v0.9.0 =============================== + +\begin{frame} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/tl.drawio.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{October 2023 - Garage v0.9.0} + Focus on \underline{streamlining \& usability} + \vspace{2em} + \begin{itemize} + \item Support multiple HDDs per node + \vspace{1em} + \item S3 compatibility: + \vspace{1em} + \begin{itemize} + \item support basic lifecycle configurations + \vspace{.5em} + \item allow for multipart upload part retries + \end{itemize} + \vspace{1em} + \item LMDB by default, deprecation of Sled + \vspace{1em} + \item New layout computation algorithm + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{Layout computation} + \begin{overprint} + \onslide<1> + \begin{center} + \includegraphics[width=\linewidth, trim=0 0 0 -4cm]{../assets/screenshots/garage_status_0.9_prod_zonehl.png} + \end{center} + \onslide<2> + \begin{center} + \includegraphics[width=.7\linewidth]{../assets/map.png} + \end{center} + \end{overprint} + \vspace{1em} + Garage stores replicas on different zones when possible +\end{frame} + +\begin{frame} + \frametitle{What a "layout" is} + \textbf{A layout is a precomputed index table:} + \vspace{1em} + + {\footnotesize + \begin{center} + \begin{tabular}{|l|l|l|l|} + \hline + \textbf{Partition} & \textbf{Node 1} & \textbf{Node 2} & \textbf{Node 3} \\ + \hline + \hline + Partition 0 & df-ymk (bespin) & Abricot (scorpio) & Courgette (neptune) \\ + \hline + Partition 1 & Ananas (scorpio) & Courgette (neptune) & df-ykl (bespin) \\ + \hline + Partition 2 & df-ymf (bespin) & Celeri (neptune) & Abricot (scorpio) \\ + \hline + \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ \\ + \hline + Partition 255 & Concombre (neptune) & df-ykl (bespin) & Abricot (scorpio) \\ + \hline + \end{tabular} + \end{center} + } + + \vspace{2em} + \visible<2->{ + The index table is built centrally using an optimal algorithm,\\ + then propagated to all nodes + } + + \vspace{1em} + \visible<3->{ + \footnotesize + Oulamara, M., \& Auvolat, A. (2023). \emph{An algorithm for geo-distributed and redundant storage in Garage}.\\ arXiv preprint arXiv:2302.13798. + } +\end{frame} + + + +% ====================== v1.0.0 =============================== + +\begin{frame} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/tl.drawio.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{April 2024 - Garage v1.0.0} + Focus on \underline{consistency, security \& stability} + \vspace{2em} + \begin{itemize} + \item Fix consistency issues when reshuffling data (Jepsen testing) + \vspace{1em} + \item \textbf{Security audit} by Radically Open Security + \vspace{1em} + \item Misc. S3 features (SSE-C, checksums, ...) and compatibility fixes + \end{itemize} +\end{frame} + +% ====================== v2.0.0 =============================== + +\begin{frame} + \begin{center} + \includegraphics[width=.8\linewidth]{../assets/tl.drawio.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Garage v2.0.0} + Focus on \underline{} + \vspace{2em} + \begin{itemize} + \item TODO + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{Currently funding...} + + \textit{...} +\end{frame} + +\begin{frame} + \frametitle{We run community surveys} + \begin{center} + \includegraphics[width=.6\linewidth]{../assets/survey_requested_features.png} + \end{center} +\end{frame} + +% ======================================== END +% ======================================== END +% ======================================== END + +\begin{frame} + \frametitle{Where to find us} + \begin{center} + \includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\ + \vspace{-1em} + \url{https://garagehq.deuxfleurs.fr/}\\ + \url{mailto:garagehq@deuxfleurs.fr}\\ + \texttt{\#garage:deuxfleurs.fr} on Matrix + + \vspace{1.5em} + \includegraphics[width=.06\linewidth]{../assets/logos/rust_logo.png} + \includegraphics[width=.13\linewidth]{../assets/logos/AGPLv3_Logo.png} + \end{center} +\end{frame} + +\end{document} + +%% vim: set ts=4 sw=4 tw=0 noet spelllang=en : diff --git a/doc/talks/assets/armael.jpg b/doc/talks/assets/armael.jpg new file mode 100644 index 00000000..54b97662 Binary files /dev/null and b/doc/talks/assets/armael.jpg differ diff --git a/doc/talks/assets/cluster_kind.png b/doc/talks/assets/cluster_kind.png new file mode 100644 index 00000000..80f8f4b5 Binary files /dev/null and b/doc/talks/assets/cluster_kind.png differ diff --git a/doc/talks/assets/cluster_size.png b/doc/talks/assets/cluster_size.png new file mode 100644 index 00000000..b4b0f5ce Binary files /dev/null and b/doc/talks/assets/cluster_size.png differ diff --git a/doc/talks/assets/community.png b/doc/talks/assets/community.png new file mode 100644 index 00000000..06c7a1af Binary files /dev/null and b/doc/talks/assets/community.png differ diff --git a/doc/talks/assets/quentin.jpg b/doc/talks/assets/quentin.jpg new file mode 100644 index 00000000..a68d9d7b Binary files /dev/null and b/doc/talks/assets/quentin.jpg differ diff --git a/doc/talks/assets/support.png b/doc/talks/assets/support.png new file mode 100644 index 00000000..c20d179b Binary files /dev/null and b/doc/talks/assets/support.png differ diff --git a/doc/talks/assets/tl.drawio.png b/doc/talks/assets/tl.drawio.png new file mode 100644 index 00000000..c60c310a Binary files /dev/null and b/doc/talks/assets/tl.drawio.png differ diff --git a/flake.lock b/flake.lock index 211b70e0..e265d0c3 100644 --- a/flake.lock +++ b/flake.lock @@ -12,16 +12,17 @@ "original": { "owner": "ipetkov", "repo": "crane", + "rev": "6fe74265bbb6d016d663b1091f015e2976c4a527", "type": "github" } }, "flake-compat": { "locked": { - "lastModified": 1717312683, - "narHash": "sha256-FrlieJH50AuvagamEvWMIE6D2OAnERuDboFDYAED/dE=", + "lastModified": 1761640442, + "narHash": "sha256-AtrEP6Jmdvrqiv4x2xa5mrtaIp3OEe8uBYCDZDS+hu8=", "owner": "nix-community", "repo": "flake-compat", - "rev": "38fd3954cf65ce6faf3d0d45cd26059e059f07ea", + "rev": "4a56054d8ffc173222d09dad23adf4ba946c8884", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 01a077c4..81d94215 100644 --- a/flake.nix +++ b/flake.nix @@ -11,7 +11,8 @@ "github:oxalica/rust-overlay/ab726555a9a72e6dc80649809147823a813fa95b"; inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs"; - inputs.crane.url = "github:ipetkov/crane"; + # Crane as of 2025-01-24 + inputs.crane.url = "github:ipetkov/crane/6fe74265bbb6d016d663b1091f015e2976c4a527"; inputs.flake-compat.url = "github:nix-community/flake-compat"; inputs.flake-utils.url = "github:numtide/flake-utils"; @@ -66,7 +67,7 @@ clippy = lints.garage-cargo-clippy; }; - # ---- developpment shell, for making native builds only ---- + # ---- development shell, for making native builds only ---- devShells = let targets = compile { @@ -89,6 +90,9 @@ cargo-outdated cargo-machete nixpkgs-fmt + openssl + socat + killall ]; }; }; diff --git a/nix/build_index.nix b/nix/build_index.nix index 7869566f..92931eea 100644 --- a/nix/build_index.nix +++ b/nix/build_index.nix @@ -167,7 +167,7 @@ let

Sources:

diff --git a/script/dev-bucket.sh b/script/dev-bucket.sh index 708c2c43..82e73652 100755 --- a/script/dev-bucket.sh +++ b/script/dev-bucket.sh @@ -17,13 +17,19 @@ else fi $GARAGE_BIN -c /tmp/config.1.toml bucket create eprouvette -if [ "$GARAGE_08" = "1" ]; then +if [ "$GARAGE_OLDVER" = "v08" ]; then KEY_INFO=$($GARAGE_BIN -c /tmp/config.1.toml key new --name opérateur) -else + ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'` + SECRET_KEY=`echo $KEY_INFO|grep -Po 'Secret key: [a-f0-9]+'|grep -Po '[a-f0-9]+$'` +elif [ "$GARAGE_OLDVER" = "v1" ]; then KEY_INFO=$($GARAGE_BIN -c /tmp/config.1.toml key create opérateur) + ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'` + SECRET_KEY=`echo $KEY_INFO|grep -Po 'Secret key: [a-f0-9]+'|grep -Po '[a-f0-9]+$'` +else + KEY_INFO=$($GARAGE_BIN -c /tmp/config.1.toml json-api CreateKey '{"name":"opérateur"}') + ACCESS_KEY=`echo $KEY_INFO|jq -r .accessKeyId` + SECRET_KEY=`echo $KEY_INFO|jq -r .secretAccessKey` fi -ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'` -SECRET_KEY=`echo $KEY_INFO|grep -Po 'Secret key: [a-f0-9]+'|grep -Po '[a-f0-9]+$'` $GARAGE_BIN -c /tmp/config.1.toml bucket allow eprouvette --read --write --owner --key $ACCESS_KEY echo "$ACCESS_KEY $SECRET_KEY" > /tmp/garage.s3 diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh index 998ffdb9..81a37099 100755 --- a/script/dev-cluster.sh +++ b/script/dev-cluster.sh @@ -30,6 +30,12 @@ for count in $(seq 1 3); do CONF_PATH="/tmp/config.$count.toml" LABEL="\e[${FANCYCOLORS[$count]}[$count]\e[49m" +if [ "$GARAGE_OLDVER" == "v08" ]; then + REPLICATION_MODE="replication_mode = \"3\"" +else + REPLICATION_MODE="replication_factor = 3" +fi + cat > $CONF_PATH <&1|grep -q HEALTHY ; do sleep 1 done -if [ "$GARAGE_08" = "1" ]; then +if [ "$GARAGE_OLDVER" = "v08" ]; then $GARAGE_BIN -c /tmp/config.1.toml status \ | grep 'NO ROLE' \ | grep -Po '^[0-9a-f]+' \ diff --git a/script/dev-env-aws.sh b/script/dev-env-aws.sh index 41f1fdde..808f9cf1 100644 --- a/script/dev-env-aws.sh +++ b/script/dev-env-aws.sh @@ -1,7 +1,6 @@ export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` export AWS_DEFAULT_REGION='garage' -export AWS_REQUEST_CHECKSUM_CALCULATION='when_required' # FUTUREWORK: set AWS_ENDPOINT_URL instead, once nixpkgs bumps awscli to >=2.13.0. function aws { command aws --endpoint-url http://127.0.0.1:3911 $@ ; } diff --git a/script/helm/garage/Chart.yaml b/script/helm/garage/Chart.yaml index b3a7b921..6c93b37f 100644 --- a/script/helm/garage/Chart.yaml +++ b/script/helm/garage/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: garage description: S3-compatible object store for small self-hosted geo-distributed deployments type: application -version: 0.7.3 -appVersion: "v1.3.1" +version: 0.9.2 +appVersion: "v2.2.0" home: https://garagehq.deuxfleurs.fr/ icon: https://garagehq.deuxfleurs.fr/images/garage-logo.svg diff --git a/script/helm/garage/README.md b/script/helm/garage/README.md index bdf69ec4..a16d05ba 100644 --- a/script/helm/garage/README.md +++ b/script/helm/garage/README.md @@ -1,6 +1,6 @@ # garage -![Version: 0.7.3](https://img.shields.io/badge/Version-0.7.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.3.1](https://img.shields.io/badge/AppVersion-v1.3.1-informational?style=flat-square) +![Version: 0.9.2](https://img.shields.io/badge/Version-0.9.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v2.2.0](https://img.shields.io/badge/AppVersion-v2.2.0-informational?style=flat-square) S3-compatible object store for small self-hosted geo-distributed deployments @@ -15,6 +15,7 @@ S3-compatible object store for small self-hosted geo-distributed deployments | Key | Type | Default | Description | |-----|------|---------|-------------| | affinity | object | `{}` | | +| commonLabels | object | `{}` | Extra labels for all resources | | deployment.kind | string | `"StatefulSet"` | Switchable to DaemonSet | | deployment.podManagementPolicy | string | `"OrderedReady"` | If using statefulset, allow Parallel or OrderedReady (default) | | deployment.replicaCount | int | `3` | Number of StatefulSet replicas/garage nodes to start | @@ -22,15 +23,16 @@ S3-compatible object store for small self-hosted geo-distributed deployments | extraVolumeMounts | object | `{}` | | | extraVolumes | object | `{}` | | | fullnameOverride | string | `""` | | -| garage.blockSize | string | `"1048576"` | Defaults is 1MB An increase can result in better performance in certain scenarios https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block-size | +| garage.blockSize | string | `"1048576"` | Defaults is 1MB An increase can result in better performance in certain scenarios https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block_size | | garage.bootstrapPeers | list | `[]` | This is not required if you use the integrated kubernetes discovery | -| garage.compressionLevel | string | `"1"` | zstd compression level of stored blocks https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression-level | -| garage.dbEngine | string | `"lmdb"` | Can be changed for better performance on certain systems https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0 | +| garage.compressionLevel | string | `"1"` | zstd compression level of stored blocks https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression_level | +| garage.dbEngine | string | `"lmdb"` | Can be changed for better performance on certain systems https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db_engine | | garage.existingConfigMap | string | `""` | if not empty string, allow using an existing ConfigMap for the garage.toml, if set, ignores garage.toml | | garage.garageTomlString | string | `""` | String Template for the garage configuration if set, ignores above values. Values can be templated, see https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/ | -| garage.kubernetesSkipCrd | bool | `false` | Set to true if you want to use k8s discovery but install the CRDs manually outside of the helm chart, for example if you operate at namespace level without cluster ressources | +| garage.kubernetesSkipCrd | bool | `false` | Set to true if you want to use k8s discovery but install the CRDs manually outside of the helm chart, for example if you operate at namespace level without cluster resources | +| garage.replicationFactor | string | `"3"` | Default to 3 replicas, see the replication_factor section at https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication_factor | +| garage.consistencyMode | string | `"consistent"` | Default to read-after-write consistency, see the consistency_mode section at https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#consistency_mode | | garage.metadataAutoSnapshotInterval | string | `""` | If this value is set, Garage will automatically take a snapshot of the metadata DB file at a regular interval and save it in the metadata directory. https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#metadata_auto_snapshot_interval | -| garage.replicationMode | string | `"3"` | Default to 3 replicas, see the replication_mode section at https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode | | garage.rpcBindAddr | string | `"[::]:3901"` | | | garage.rpcSecret | string | `""` | If not given, a random secret will be generated and stored in a Secret object | | garage.s3.api.region | string | `"garage"` | | @@ -74,7 +76,7 @@ S3-compatible object store for small self-hosted geo-distributed deployments | persistence.enabled | bool | `true` | | | persistence.meta.hostPath | string | `"/var/lib/garage/meta"` | | | persistence.meta.size | string | `"100Mi"` | | -| podAnnotations | object | `{}` | additonal pod annotations | +| podAnnotations | object | `{}` | additional pod annotations | | podSecurityContext.fsGroup | int | `1000` | | | podSecurityContext.runAsGroup | int | `1000` | | | podSecurityContext.runAsNonRoot | bool | `true` | | diff --git a/script/helm/garage/templates/_helpers.tpl b/script/helm/garage/templates/_helpers.tpl index 037a5f1c..2ffb90c6 100644 --- a/script/helm/garage/templates/_helpers.tpl +++ b/script/helm/garage/templates/_helpers.tpl @@ -27,7 +27,7 @@ If release name contains chart name it will be used as a full name. Create the name of the rpc secret */}} {{- define "garage.rpcSecretName" -}} -{{- printf "%s-rpc-secret" (include "garage.fullname" .) -}} +{{- .Values.garage.existingRpcSecret | default (printf "%s-rpc-secret" (include "garage.fullname" .)) -}} {{- end }} {{/* @@ -47,6 +47,9 @@ helm.sh/chart: {{ include "garage.chart" . }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{- toYaml . | nindent 0 }} +{{- end }} {{- end }} {{/* diff --git a/script/helm/garage/templates/configmap.yaml b/script/helm/garage/templates/configmap.yaml index ab5b84db..4fc3e152 100644 --- a/script/helm/garage/templates/configmap.yaml +++ b/script/helm/garage/templates/configmap.yaml @@ -13,9 +13,10 @@ data: db_engine = "{{ .Values.garage.dbEngine }}" - block_size = {{ .Values.garage.blockSize }} + block_size = "{{ .Values.garage.blockSize }}" - replication_mode = "{{ .Values.garage.replicationMode }}" + replication_factor = {{ .Values.garage.replicationFactor }} + consistency_mode = "{{ .Values.garage.consistencyMode }}" compression_level = {{ .Values.garage.compressionLevel }} @@ -27,8 +28,16 @@ data: # rpc_secret will be populated by the init container from a k8s secret object rpc_secret = "__RPC_SECRET_REPLACE__" - bootstrap_peers = {{ .Values.garage.bootstrapPeers }} + bootstrap_peers = [ + {{- range $index, $peer := .Values.garage.bootstrapPeers }} + {{- if $index}}, {{ end }}{{ $peer | quote }} + {{ end }} + ] + {{- if .Values.garage.additionalTopLevelConfig }} + {{ .Values.garage.additionalTopLevelConfig | nindent 4 }} + {{- end }} + [kubernetes_discovery] namespace = "{{ .Release.Namespace }}" service_name = "{{ include "garage.fullname" . }}" diff --git a/script/helm/garage/templates/secret.yaml b/script/helm/garage/templates/secret.yaml index 54749424..c0c45b93 100644 --- a/script/helm/garage/templates/secret.yaml +++ b/script/helm/garage/templates/secret.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.garage.existingRpcSecret }} apiVersion: v1 kind: Secret metadata: @@ -12,3 +13,4 @@ data: {{- $prevRpcSecret := $prevSecretData.rpcSecret | default "" | b64dec }} {{/* Priority is: 1. from values, 2. previous value, 3. generate random */}} rpcSecret: {{ .Values.garage.rpcSecret | default $prevRpcSecret | default (include "jupyterhub.randHex" 64) | b64enc | quote }} +{{- end }} diff --git a/script/helm/garage/templates/workload.yaml b/script/helm/garage/templates/workload.yaml index d144cb41..4264253e 100644 --- a/script/helm/garage/templates/workload.yaml +++ b/script/helm/garage/templates/workload.yaml @@ -21,7 +21,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} labels: - {{- include "garage.selectorLabels" . | nindent 8 }} + {{- include "garage.labels" . | nindent 8 }} spec: {{- with .Values.imagePullSecrets }} imagePullSecrets: diff --git a/script/helm/garage/values.yaml b/script/helm/garage/values.yaml index 5e419fe2..785aea22 100644 --- a/script/helm/garage/values.yaml +++ b/script/helm/garage/values.yaml @@ -2,23 +2,32 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. +# -- Additional labels to add to all resources created by this chart +commonLabels: {} +# app.kubernetes.io/part-of: storage +# team: platform + # Garage configuration. These values go to garage.toml garage: # -- Can be changed for better performance on certain systems - # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0 + # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db_engine dbEngine: "lmdb" # -- Defaults is 1MB # An increase can result in better performance in certain scenarios - # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block-size + # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block_size blockSize: "1048576" - # -- Default to 3 replicas, see the replication_mode section at - # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode - replicationMode: "3" + # -- Default to 3 replicas, see the replication_factor section at + # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication_factor + replicationFactor: "3" + + # -- By default, enable read-after-write consistency guarantees, see the consistency_mode section at + # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#consistency_mode + consistencyMode: "consistent" # -- zstd compression level of stored blocks - # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression-level + # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression_level compressionLevel: "1" # -- If this value is set, Garage will automatically take a snapshot of the metadata DB file at a regular interval and save it in the metadata directory. @@ -28,10 +37,14 @@ garage: rpcBindAddr: "[::]:3901" # -- If not given, a random secret will be generated and stored in a Secret object rpcSecret: "" + # -- If you want to provide an rpcSecret within an existing k8s secret, + # specify the secret name here, and store the value under the secret key `rpcSecret` + # the default secret will not be created + existingRpcSecret: "" # -- This is not required if you use the integrated kubernetes discovery bootstrapPeers: [] # -- Set to true if you want to use k8s discovery but install the CRDs manually outside - # of the helm chart, for example if you operate at namespace level without cluster ressources + # of the helm chart, for example if you operate at namespace level without cluster resources kubernetesSkipCrd: false s3: api: @@ -41,6 +54,12 @@ garage: rootDomain: ".web.garage.tld" index: "index.html" + # -- Additional configuration to append to garage.toml. Use a multi-line string for custom config. + # Example: + # additionalTopLevelConfig: |- + # data_fsync = true + additionalTopLevelConfig: "" + # -- if not empty string, allow using an existing ConfigMap for the garage.toml, # if set, ignores garage.toml existingConfigMap: "" @@ -101,13 +120,14 @@ serviceAccount: # If not set and create is true, a name is generated using the fullname template name: "" -# -- additonal pod annotations +# -- additional pod annotations podAnnotations: {} podSecurityContext: runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 + fsGroupChangePolicy: "OnRootMismatch" runAsNonRoot: true securityContext: @@ -189,7 +209,7 @@ ingress: # - kubernetes.docker.internal resources: {} - # The following are indicative for a small-size deployement, for anything serious double them. + # The following are indicative for a small-size deployment, for anything serious double them. # limits: # cpu: 100m # memory: 1024Mi diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 50c7eb38..4a74471d 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -127,7 +127,7 @@ They are due to the download being interrupted in the middle (^C during first la Add `:force?` to the `cached-wget!` call in `daemon.clj` to re-download the binary, or restar the VMs to clear temporary files. -### In `jepsen.garage`: prefix wierdness +### In `jepsen.garage`: prefix weirdness In `store/garage set1/20231019T163358.615+0200`: @@ -146,12 +146,12 @@ and passing all values that were previously in the context (creds and prefix) as The reg2 test is our custom checker for CRDT read-after-write on individual object keys, acting as registers which can be updated. The test fails without the timestamp fix, which is expected as the clock scrambler will prevent nodes from having a correct ordering of objects. -With the timestamp fix (`--patch tsfix1`), the happenned-before relationship should at least be respected, meaning that when a PutObject call starts +With the timestamp fix (`--patch tsfix1`), the happened-before relationship should at least be respected, meaning that when a PutObject call starts after another PutObject call has ended, the second call should overwrite the value of the first call, and that value should not be readable by future GetObject calls. However, we observed inconsistencies even with the timestamp fix. -The inconsistencies seemed to always happenned after writing a nil value, which translates to a DeleteObject call +The inconsistencies seemed to always happened after writing a nil value, which translates to a DeleteObject call instead of a PutObject. By removing the possibility of writing nil values, therefore only doing PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject. diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 0ea773fb..9267a03a 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -43,7 +43,7 @@ "rpc_bind_addr = \"0.0.0.0:3901\"\n" "rpc_public_addr = \"" node ":3901\"\n" "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" + "replication_factor = 3\n" "data_dir = \"" data-dir "\"\n" "metadata_dir = \"" meta-dir "\"\n" "[s3_api]\n" diff --git a/script/k8s/config.yaml b/script/k8s/config.yaml index 8cf40fc2..bfefd999 100644 --- a/script/k8s/config.yaml +++ b/script/k8s/config.yaml @@ -8,7 +8,7 @@ data: metadata_dir = "/tmp/meta" data_dir = "/tmp/data" - replication_mode = "3" + replication_factor = 3 rpc_bind_addr = "[::]:3901" rpc_secret = "1799bccfd7411eddcf9ebd316bc1f5287ad12a68094e1c6ac6abde7e6feae1ec" diff --git a/script/telemetry/grafana-garage-dashboard-prometheus.json b/script/telemetry/grafana-garage-dashboard-prometheus.json index 28ef1ec0..1e127f8a 100644 --- a/script/telemetry/grafana-garage-dashboard-prometheus.json +++ b/script/telemetry/grafana-garage-dashboard-prometheus.json @@ -694,32 +694,7 @@ ] } }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "10.83.2.3:3903" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, diff --git a/script/test-skip-part.sh b/script/test-skip-part.sh index 20ae017d..bb9d5616 100644 --- a/script/test-skip-part.sh +++ b/script/test-skip-part.sh @@ -2,7 +2,7 @@ : ' This script tests whether uploaded parts can be skipped in a - CompleteMultipartUpoad + CompleteMultipartUpload On Minio: yes, parts can be skipped @@ -52,7 +52,7 @@ Conclusions: - - Skipping a part in a CompleteMultipartUpoad call is OK + - Skipping a part in a CompleteMultipartUpload call is OK - The part is simply not included in the stored object - Sequential part renumbering counts only non-skipped parts ' diff --git a/script/test-smoke.sh b/script/test-smoke.sh index acf56a90..eee206ba 100755 --- a/script/test-smoke.sh +++ b/script/test-smoke.sh @@ -112,6 +112,23 @@ if [ -z "$SKIP_S3CMD" ]; then done fi +# BOTO3 +if [ -z "$SKIP_BOTO3" ]; then + echo "🛠️ Testing with boto3 for STREAMING-UNSIGNED-PAYLOAD-TRAILER" + source ${SCRIPT_FOLDER}/dev-env-aws.sh + AWS_ENDPOINT_URL=https://localhost:4443 python <> /tmp/garage.log 2>&1 & sleep 3 echo "🛠️ Retrieving data from old cluster" -rclone copy garage:eprouvette/test_dotgit /tmp/test_dotgit --stats=1s --stats-log-level=NOTICE --stats-one-line --fast-list +rclone copy garage:eprouvette/test_dotgit /tmp/test_dotgit \ + --stats=1s --stats-log-level=NOTICE --stats-one-line --fast-list if ! diff <(find "${SCRIPT_FOLDER}/../.git" -type f | xargs md5sum | cut -d ' ' -f 1 | sort) <(find /tmp/test_dotgit -type f | xargs md5sum | cut -d ' ' -f 1 | sort); then echo "TEST FAILURE: directories are different" @@ -68,6 +93,23 @@ if ! diff <(find "${SCRIPT_FOLDER}/../.git" -type f | xargs md5sum | cut -d ' ' fi rm -r /tmp/test_dotgit +if [ "$DO_SSEC_TEST" = "1" ]; then + rclone copy garage:eprouvette/test-ssec /tmp/test_ssec_out \ + --s3-sse-customer-algorithm AES256 \ + --s3-sse-customer-key-base64 "$SSEC_KEY" \ + --stats=1s --stats-log-level=NOTICE --stats-one-line + if ! diff "/tmp/test_ssec_out/test-upgrade.sh" "${SCRIPT_FOLDER}/test-upgrade.sh"; then + echo "SSEC-FAILURE (small file)" + exit 1 + fi + if ! diff "/tmp/test_ssec_out/randfile-for-upgrade" "/tmp/randfile-for-upgrade"; then + echo "SSEC-FAILURE (big file)" + exit 1 + fi + rm -r /tmp/test_ssec_out + rm /tmp/randfile-for-upgrade +fi + echo "🏁 Teardown" rm -rf /tmp/garage-{data,meta}-* rm -rf /tmp/config.*.toml diff --git a/shell.nix b/shell.nix index c3dedca8..4bbfedc7 100644 --- a/shell.nix +++ b/shell.nix @@ -26,17 +26,21 @@ in s3cmd minio-client rclone + (python313.withPackages (ps: [ ps.boto3 ])) + socat psmisc which openssl curl jq + typos ]; shellHook = '' export AWS_REQUEST_CHECKSUM_CALCULATION='when_required' function to_s3 { + AWS_REQUEST_CHECKSUM_CALCULATION=WHEN_REQUIRED AWS_RESPONSE_CHECKSUM_VALIDATION=WHEN_REQUIRED \ aws \ --endpoint-url https://garage.deuxfleurs.fr \ --region garage \ @@ -48,7 +52,7 @@ in function to_docker { executor \ --force \ - --customPlatform="$(echo "''${DOCKER_PLATFORM}" | sed 's/i386/386/')" \ + --custom-platform="$(echo "''${DOCKER_PLATFORM}" | sed 's/i386/386/')" \ --destination "$(echo "''${CONTAINER_NAME}" | sed 's/i386/386/'):''${CONTAINER_TAG}" \ --context dir://`pwd` \ --verbosity=debug @@ -93,6 +97,7 @@ in nix-build nix/build_index.nix + AWS_REQUEST_CHECKSUM_CALCULATION=WHEN_REQUIRED AWS_RESPONSE_CHECKSUM_VALIDATION=WHEN_REQUIRED \ aws \ --endpoint-url https://garage.deuxfleurs.fr \ --region garage \ @@ -100,6 +105,7 @@ in result/share/_releases.json \ s3://garagehq.deuxfleurs.fr/ + AWS_REQUEST_CHECKSUM_CALCULATION=WHEN_REQUIRED AWS_RESPONSE_CHECKSUM_VALIDATION=WHEN_REQUIRED \ aws \ --endpoint-url https://garage.deuxfleurs.fr \ --region garage \ diff --git a/src/api/admin/Cargo.toml b/src/api/admin/Cargo.toml index 656c6825..c8bc3423 100644 --- a/src/api/admin/Cargo.toml +++ b/src/api/admin/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_api_admin" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -14,7 +14,9 @@ path = "lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +format_table.workspace = true garage_model.workspace = true +garage_block.workspace = true garage_table.workspace = true garage_util.workspace = true garage_rpc.workspace = true @@ -22,8 +24,11 @@ garage_api_common.workspace = true argon2.workspace = true async-trait.workspace = true +bytesize.workspace = true +chrono.workspace = true thiserror.workspace = true hex.workspace = true +paste.workspace = true tracing.workspace = true futures.workspace = true @@ -34,10 +39,12 @@ url.workspace = true serde.workspace = true serde_json.workspace = true +utoipa.workspace = true opentelemetry.workspace = true opentelemetry-prometheus = { workspace = true, optional = true } prometheus = { workspace = true, optional = true } [features] -metrics = [ "opentelemetry-prometheus", "prometheus" ] +metrics = ["opentelemetry-prometheus", "prometheus"] +k2v = ["garage_model/k2v"] diff --git a/src/api/admin/admin_token.rs b/src/api/admin/admin_token.rs new file mode 100644 index 00000000..242c9958 --- /dev/null +++ b/src/api/admin/admin_token.rs @@ -0,0 +1,292 @@ +use std::sync::Arc; + +use chrono::{DateTime, Utc}; + +use garage_table::*; +use garage_util::time::now_msec; + +use garage_model::admin_token_table::*; +use garage_model::garage::Garage; + +use crate::api::*; +use crate::error::*; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for ListAdminTokensRequest { + type Response = ListAdminTokensResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let now = now_msec(); + + let mut res = garage + .admin_token_table + .get_range( + &EmptyKey, + None, + Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), + 10000, + EnumerationOrder::Forward, + ) + .await? + .iter() + .map(|t| admin_token_info_results(t, now)) + .collect::>(); + + if garage.config.admin.metrics_token.is_some() { + res.insert( + 0, + GetAdminTokenInfoResponse { + id: None, + created: None, + name: "metrics_token (from daemon configuration)".into(), + expiration: None, + expired: false, + scope: vec!["Metrics".into()], + }, + ); + } + + if garage.config.admin.admin_token.is_some() { + res.insert( + 0, + GetAdminTokenInfoResponse { + id: None, + created: None, + name: "admin_token (from daemon configuration)".into(), + expiration: None, + expired: false, + scope: vec!["*".into()], + }, + ); + } + + Ok(ListAdminTokensResponse(res)) + } +} + +impl RequestHandler for GetAdminTokenInfoRequest { + type Response = GetAdminTokenInfoResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let token = match (self.id, self.search) { + (Some(id), None) => get_existing_admin_token(garage, &id).await?, + (None, Some(search)) => { + let candidates = garage + .admin_token_table + .get_range( + &EmptyKey, + None, + Some(KeyFilter::MatchesAndNotDeleted(search.to_string())), + 10, + EnumerationOrder::Forward, + ) + .await? + .into_iter() + .collect::>(); + if candidates.len() != 1 { + return Err(Error::bad_request(format!( + "{} matching admin tokens", + candidates.len() + ))); + } + candidates.into_iter().next().unwrap() + } + _ => { + return Err(Error::bad_request( + "Either id or search must be provided (but not both)", + )); + } + }; + + Ok(admin_token_info_results(&token, now_msec())) + } +} + +impl RequestHandler for CreateAdminTokenRequest { + type Response = CreateAdminTokenResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let (mut token, secret) = if self.0.name.is_some() { + AdminApiToken::new("") + } else { + AdminApiToken::new(&format!("token_{}", Utc::now().format("%Y%m%d_%H%M"))) + }; + + apply_token_updates(&mut token, self.0)?; + + garage.admin_token_table.insert(&token).await?; + + Ok(CreateAdminTokenResponse { + secret_token: secret, + info: admin_token_info_results(&token, now_msec()), + }) + } +} + +impl RequestHandler for UpdateAdminTokenRequest { + type Response = UpdateAdminTokenResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut token = get_existing_admin_token(garage, &self.id).await?; + + apply_token_updates(&mut token, self.body)?; + + garage.admin_token_table.insert(&token).await?; + + Ok(UpdateAdminTokenResponse(admin_token_info_results( + &token, + now_msec(), + ))) + } +} + +impl RequestHandler for DeleteAdminTokenRequest { + type Response = DeleteAdminTokenResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let token = get_existing_admin_token(garage, &self.id).await?; + + garage + .admin_token_table + .insert(&AdminApiToken::delete(token.prefix)) + .await?; + + Ok(DeleteAdminTokenResponse) + } +} + +impl RequestHandler for GetCurrentAdminTokenInfoRequest { + type Response = GetCurrentAdminTokenInfoResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let now = now_msec(); + + if garage + .config + .admin + .metrics_token + .as_ref() + .is_some_and(|s| s == &self.admin_token) + { + return Ok(GetCurrentAdminTokenInfoResponse( + GetAdminTokenInfoResponse { + id: None, + created: None, + name: "metrics_token (from daemon configuration)".into(), + expiration: None, + expired: false, + scope: vec!["Metrics".into()], + }, + )); + } + + if garage + .config + .admin + .admin_token + .as_ref() + .is_some_and(|s| s == &self.admin_token) + { + return Ok(GetCurrentAdminTokenInfoResponse( + GetAdminTokenInfoResponse { + id: None, + created: None, + name: "admin_token (from daemon configuration)".into(), + expiration: None, + expired: false, + scope: vec!["*".into()], + }, + )); + } + + let (prefix, _) = self.admin_token.split_once('.').unwrap(); + let token = get_existing_admin_token(garage, &prefix.to_string()).await?; + + Ok(GetCurrentAdminTokenInfoResponse(admin_token_info_results( + &token, now, + ))) + } +} + +// ---- helpers ---- + +fn admin_token_info_results(token: &AdminApiToken, now: u64) -> GetAdminTokenInfoResponse { + let params = token.params().unwrap(); + + GetAdminTokenInfoResponse { + id: Some(token.prefix.clone()), + created: Some( + DateTime::from_timestamp_millis(params.created as i64) + .expect("invalid timestamp stored in db"), + ), + name: params.name.get().to_string(), + expiration: params.expiration.get().map(|x| { + DateTime::from_timestamp_millis(x as i64).expect("invalid timestamp stored in db") + }), + expired: params.is_expired(now), + scope: params.scope.get().0.clone(), + } +} + +async fn get_existing_admin_token(garage: &Garage, id: &String) -> Result { + garage + .admin_token_table + .get(&EmptyKey, id) + .await? + .filter(|k| !k.state.is_deleted()) + .ok_or_else(|| Error::NoSuchAdminToken(id.to_string())) +} + +fn apply_token_updates( + token: &mut AdminApiToken, + updates: UpdateAdminTokenRequestBody, +) -> Result<(), Error> { + if updates.never_expires && updates.expiration.is_some() { + return Err(Error::bad_request( + "cannot specify `expiration` and `never_expires`", + )); + } + + let params = token.params_mut().unwrap(); + + if let Some(name) = updates.name { + params.name.update(name); + } + if let Some(expiration) = updates.expiration { + params + .expiration + .update(Some(expiration.timestamp_millis() as u64)); + } + if updates.never_expires { + params.expiration.update(None); + } + if let Some(scope) = updates.scope { + params.scope.update(AdminApiTokenScope(scope)); + } + + Ok(()) +} diff --git a/src/api/admin/api.rs b/src/api/admin/api.rs new file mode 100644 index 00000000..83d456a4 --- /dev/null +++ b/src/api/admin/api.rs @@ -0,0 +1,1352 @@ +use std::collections::HashMap; +use std::convert::TryFrom; +use std::net::SocketAddr; +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use paste::paste; +use serde::{Deserialize, Serialize}; +use utoipa::{IntoParams, ToSchema}; + +use garage_rpc::*; + +use garage_model::garage::Garage; + +use garage_api_common::{common_error::CommonError, helpers::is_default}; + +use crate::api_server::{find_matching_nodes, AdminRpc, AdminRpcResponse}; +use crate::error::Error; +use crate::macros::*; +use crate::{Admin, RequestHandler}; + +// This generates the following: +// +// - An enum AdminApiRequest that contains a variant for all endpoints +// +// - An enum AdminApiResponse that contains a variant for all non-special endpoints. +// This enum is serialized in api_server.rs, without the enum tag, +// which gives directly the JSON response corresponding to the API call. +// This enum does not implement Deserialize as its meaning can be ambiguous. +// +// - An enum TaggedAdminApiResponse that contains the same variants, but +// serializes as a tagged enum. This allows it to be transmitted through +// Garage RPC and deserialized correctly upon receival. +// Conversion from untagged to tagged can be done using the `.tagged()` method. +// +// - AdminApiRequest::name() that returns the name of the endpoint +// +// - impl EndpointHandler for AdminApiHandler, that uses the impl EndpointHandler +// of each request type below for non-special endpoints +admin_endpoints![ + // Special endpoints of the Admin API + @special Options, + @special CheckDomain, + @special Health, + @special Metrics, + + // Cluster operations + GetClusterStatus, + GetClusterHealth, + GetClusterStatistics, + ConnectClusterNodes, + + // Admin tokens operations + ListAdminTokens, + GetAdminTokenInfo, + CreateAdminToken, + UpdateAdminToken, + DeleteAdminToken, + GetCurrentAdminTokenInfo, + + // Layout operations + GetClusterLayout, + GetClusterLayoutHistory, + UpdateClusterLayout, + PreviewClusterLayoutChanges, + ApplyClusterLayout, + RevertClusterLayout, + ClusterLayoutSkipDeadNodes, + + // Access key operations + ListKeys, + GetKeyInfo, + CreateKey, + ImportKey, + UpdateKey, + DeleteKey, + + // Bucket operations + ListBuckets, + GetBucketInfo, + CreateBucket, + UpdateBucket, + DeleteBucket, + CleanupIncompleteUploads, + InspectObject, + + // Operations on permissions for keys on buckets + AllowBucketKey, + DenyBucketKey, + + // Operations on bucket aliases + AddBucketAlias, + RemoveBucketAlias, + + // Node operations + GetNodeInfo, + GetNodeStatistics, + CreateMetadataSnapshot, + LaunchRepairOperation, + + // Worker operations + ListWorkers, + GetWorkerInfo, + GetWorkerVariable, + SetWorkerVariable, + + // Block operations + ListBlockErrors, + GetBlockInfo, + RetryBlockResync, + PurgeBlocks, +]; + +local_admin_endpoints![ + // Node operations + GetNodeInfo, + GetNodeStatistics, + CreateMetadataSnapshot, + LaunchRepairOperation, + // Background workers + ListWorkers, + GetWorkerInfo, + GetWorkerVariable, + SetWorkerVariable, + // Block operations + ListBlockErrors, + GetBlockInfo, + RetryBlockResync, + PurgeBlocks, +]; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MultiRequest { + pub node: String, + pub body: RB, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct MultiResponse { + /// Map of node id to response returned by this node, for nodes that were able to + /// successfully complete the API call + pub success: HashMap, + /// Map of node id to error message, for nodes that were unable to complete the API + /// call + pub error: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct MultiRequestQueryParams { + /// Node ID to query, or `*` for all nodes, or `self` for the node responding to the request + pub node: String, +} + +// ********************************************** +// Special endpoints +// +// These endpoints don't have associated *Response structs +// because they directly produce an http::Response +// ********************************************** + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OptionsRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct CheckDomainRequest { + /// The domain name to check for + pub domain: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HealthRequest; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetricsRequest; + +// ********************************************** +// Cluster operations +// ********************************************** + +// ---- GetClusterStatus ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetClusterStatusRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetClusterStatusResponse { + /// Current version number of the cluster layout + pub layout_version: u64, + /// List of nodes that are either currently connected, part of the + /// current cluster layout, or part of an older cluster layout that + /// is still active in the cluster (being drained). + pub nodes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct NodeResp { + /// Full-length node identifier + pub id: String, + /// Garage version + pub garage_version: Option, + /// Socket address used by other nodes to connect to this node for RPC + #[schema(value_type = Option)] + pub addr: Option, + /// Hostname of the node + pub hostname: Option, + /// Whether this node is connected in the cluster + pub is_up: bool, + /// For disconnected nodes, the number of seconds since last contact, + /// or `null` if no contact was established since Garage restarted. + pub last_seen_secs_ago: Option, + /// Role assigned to this node in the current cluster layout + pub role: Option, + /// Whether this node is part of an older layout version and is draining data. + pub draining: bool, + /// Total and available space on the disk partition(s) containing the data + /// directory(ies) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub data_partition: Option, + /// Total and available space on the disk partition containing the + /// metadata directory + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata_partition: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct NodeAssignedRole { + /// Zone name assigned by the cluster administrator + pub zone: String, + /// List of tags assigned by the cluster administrator + pub tags: Vec, + /// Capacity (in bytes) assigned by the cluster administrator, + /// absent for gateway nodes + pub capacity: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct FreeSpaceResp { + /// Number of bytes available + pub available: u64, + /// Total number of bytes + pub total: u64, +} + +// ---- GetClusterHealth ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetClusterHealthRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetClusterHealthResponse { + /// One of `healthy`, `degraded` or `unavailable`: + /// - `healthy`: Garage node is connected to all storage nodes + /// - `degraded`: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions + /// - `unavailable`: a quorum of write nodes is not available for some partitions + pub status: String, + /// the number of nodes this Garage node has had a TCP connection to since the daemon started + pub known_nodes: usize, + /// the number of nodes this Garage node currently has an open connection to + pub connected_nodes: usize, + /// the number of storage nodes currently registered in the cluster layout + pub storage_nodes: usize, + /// the number of storage nodes to which a connection is currently open + pub storage_nodes_up: usize, + /// the total number of partitions of the data (currently always 256) + pub partitions: usize, + /// the number of partitions for which a quorum of write nodes is available + pub partitions_quorum: usize, + /// the number of partitions for which we are connected to all storage nodes responsible of storing it + pub partitions_all_ok: usize, +} + +// ---- GetClusterStatistics ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct GetClusterStatisticsRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct GetClusterStatisticsResponse { + pub freeform: String, +} + +// ---- ConnectClusterNodes ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ConnectClusterNodesRequest(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ConnectClusterNodesResponse(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ConnectNodeResponse { + /// `true` if Garage managed to connect to this node + pub success: bool, + /// An error message if Garage did not manage to connect to this node + pub error: Option, +} + +// ********************************************** +// Admin token operations +// ********************************************** + +// ---- ListAdminTokens ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ListAdminTokensRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ListAdminTokensResponse(pub Vec); + +// ---- GetAdminTokenInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +#[serde(rename_all = "camelCase")] +pub struct GetAdminTokenInfoRequest { + /// Admin API token ID + pub id: Option, + /// Partial token ID or name to search for + pub search: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetAdminTokenInfoResponse { + /// Identifier of the admin token (which is also a prefix of the full bearer token) + pub id: Option, + /// Creation date + pub created: Option>, + /// Name of the admin API token + pub name: String, + /// Expiration time and date, formatted according to RFC 3339 + pub expiration: Option>, + /// Whether this admin token is expired already + pub expired: bool, + /// Scope of the admin API token, a list of admin endpoint names (such as + /// `GetClusterStatus`, etc), or the special value `*` to allow all + /// admin endpoints + pub scope: Vec, +} + +// ---- CreateAdminToken ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CreateAdminTokenRequest(pub UpdateAdminTokenRequestBody); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateAdminTokenResponse { + /// The secret bearer token. **CAUTION:** This token will be shown only + /// ONCE, so this value MUST be remembered somewhere, or the token + /// will be unusable. + pub secret_token: String, + #[serde(flatten)] + pub info: GetAdminTokenInfoResponse, +} + +// ---- UpdateAdminToken ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct UpdateAdminTokenRequest { + /// Admin API token ID + pub id: String, + #[param(ignore = true)] + pub body: UpdateAdminTokenRequestBody, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct UpdateAdminTokenRequestBody { + /// Name of the admin API token + pub name: Option, + /// Expiration time and date, formatted according to RFC 3339 + pub expiration: Option>, + /// Set the admin token to never expire + #[serde(default)] + pub never_expires: bool, + /// Scope of the admin API token, a list of admin endpoint names (such as + /// `GetClusterStatus`, etc), or the special value `*` to allow all + /// admin endpoints. **WARNING:** Granting a scope of `CreateAdminToken` or + /// `UpdateAdminToken` trivially allows for privilege escalation, and is thus + /// functionally equivalent to granting a scope of `*`. + pub scope: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UpdateAdminTokenResponse(pub GetAdminTokenInfoResponse); + +// ---- DeleteAdminToken ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct DeleteAdminTokenRequest { + /// Admin API token ID + pub id: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DeleteAdminTokenResponse; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetCurrentAdminTokenInfoRequest { + pub admin_token: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetCurrentAdminTokenInfoResponse(pub GetAdminTokenInfoResponse); + +// ********************************************** +// Layout operations +// ********************************************** + +// ---- GetClusterLayout ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetClusterLayoutRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetClusterLayoutResponse { + /// The current version number of the cluster layout + pub version: u64, + /// List of nodes that currently have a role in the cluster layout + pub roles: Vec, + /// Layout parameters used when the current layout was computed + pub parameters: LayoutParameters, + /// The size, in bytes, of one Garage partition (= a shard) + pub partition_size: u64, + /// List of nodes that will have a new role or whose role will be + /// removed in the next version of the cluster layout + pub staged_role_changes: Vec, + /// Layout parameters to use when computing the next version of + /// the cluster layout + pub staged_parameters: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LayoutNodeRole { + /// Identifier of the node + pub id: String, + /// Zone name assigned by the cluster administrator + pub zone: String, + /// List of tags assigned by the cluster administrator + pub tags: Vec, + /// Capacity (in bytes) assigned by the cluster administrator, + /// absent for gateway nodes + pub capacity: Option, + /// Number of partitions stored on this node + /// (a result of the layout computation) + pub stored_partitions: Option, + /// Capacity (in bytes) that is actually usable on this node in the current + /// layout, which is equal to `stored_partitions` × `partition_size` + pub usable_capacity: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct NodeRoleChange { + /// ID of the node for which this change applies + pub id: String, + #[serde(flatten)] + pub action: NodeRoleChangeEnum, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged)] +pub enum NodeRoleChangeEnum { + #[serde(rename_all = "camelCase")] + Remove { + /// Set `remove` to `true` to remove the node from the layout + remove: bool, + }, + #[serde(rename_all = "camelCase")] + Update(NodeAssignedRole), +} + +#[derive(Copy, Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LayoutParameters { + /// Minimum number of zones in which a data partition must be replicated + pub zone_redundancy: ZoneRedundancy, +} + +#[derive(Copy, Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub enum ZoneRedundancy { + /// Partitions must be replicated in at least this number of + /// distinct zones. + AtLeast(usize), + /// Partitions must be replicated in as many zones as possible: + /// as many zones as there are replicas, if there are enough distinct + /// zones, or at least one in each zone otherwise. + Maximum, +} + +// ---- GetClusterLayoutHistory ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetClusterLayoutHistoryRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetClusterLayoutHistoryResponse { + /// The current version number of the cluster layout + pub current_version: u64, + /// All nodes in the cluster are aware of layout versions up to + /// this version number (at least) + pub min_ack: u64, + /// Layout version history + pub versions: Vec, + /// Detailed update trackers for nodes (see + /// `https://garagehq.deuxfleurs.fr/blog/2023-12-preserving-read-after-write-consistency/`) + pub update_trackers: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ClusterLayoutVersion { + /// Version number of this layout version + pub version: u64, + /// Status of this layout version + pub status: ClusterLayoutVersionStatus, + /// Number of nodes with an assigned storage capacity in this layout version + pub storage_nodes: u64, + /// Number of nodes with a gateway role in this layout version + pub gateway_nodes: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub enum ClusterLayoutVersionStatus { + /// This is the most up-to-date layout version + Current, + /// This version is still active in the cluster because metadata + /// is being rebalanced or migrated from old nodes + Draining, + /// This version is no longer active in the cluster for metadata + /// reads and writes. Note that there is still the possibility + /// that data blocks are being migrated away from nodes in this + /// layout version. + Historical, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct NodeUpdateTrackers { + pub ack: u64, + pub sync: u64, + pub sync_ack: u64, +} + +// ---- UpdateClusterLayout ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UpdateClusterLayoutRequest { + /// New node roles to assign or remove in the cluster layout + #[serde(default)] + pub roles: Vec, + /// New layout computation parameters to use + #[serde(default)] + pub parameters: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UpdateClusterLayoutResponse(pub GetClusterLayoutResponse); + +// ---- PreviewClusterLayoutChanges ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PreviewClusterLayoutChangesRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged)] +pub enum PreviewClusterLayoutChangesResponse { + #[serde(rename_all = "camelCase")] + Error { + /// Error message indicating that the layout could not be computed + /// with the provided configuration + error: String, + }, + #[serde(rename_all = "camelCase")] + Success { + /// Plain-text information about the layout computation + /// (do not try to parse this) + message: Vec, + /// Details about the new cluster layout + new_layout: GetClusterLayoutResponse, + }, +} + +// ---- ApplyClusterLayout ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ApplyClusterLayoutRequest { + /// As a safety measure, the new version number of the layout must + /// be specified here + pub version: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ApplyClusterLayoutResponse { + /// Plain-text information about the layout computation + /// (do not try to parse this) + pub message: Vec, + /// Details about the new cluster layout + pub layout: GetClusterLayoutResponse, +} + +// ---- RevertClusterLayout ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RevertClusterLayoutRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct RevertClusterLayoutResponse(pub GetClusterLayoutResponse); + +// ---- ClusterLayoutSkipDeadNodes ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ClusterLayoutSkipDeadNodesRequest { + /// Version number of the layout to assume is currently up-to-date. + /// This will generally be the current layout version. + pub version: u64, + /// Allow the skip even if a quorum of nodes could not be found for + /// the data among the remaining nodes + pub allow_missing_data: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ClusterLayoutSkipDeadNodesResponse { + /// Nodes for which the ACK update tracker has been updated to `version` + pub ack_updated: Vec, + /// If `allow_missing_data` is set, + /// nodes for which the SYNC update tracker has been updated to `version` + pub sync_updated: Vec, +} + +// ********************************************** +// Access key operations +// ********************************************** + +// ---- ListKeys ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ListKeysRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ListKeysResponse(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ListKeysResponseItem { + pub id: String, + pub name: String, + pub created: Option>, + pub expiration: Option>, + pub expired: bool, +} + +// ---- GetKeyInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +#[serde(rename_all = "camelCase")] +pub struct GetKeyInfoRequest { + /// Access key ID + pub id: Option, + /// Partial key ID or name to search for + pub search: Option, + /// Whether to return the secret access key + #[serde(default)] + pub show_secret_key: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetKeyInfoResponse { + pub access_key_id: String, + pub created: Option>, + pub name: String, + pub expiration: Option>, + pub expired: bool, + #[serde(default, skip_serializing_if = "is_default")] + pub secret_access_key: Option, + pub permissions: KeyPerm, + pub buckets: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct KeyPerm { + #[serde(default)] + pub create_bucket: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct KeyInfoBucketResponse { + pub id: String, + pub global_aliases: Vec, + pub local_aliases: Vec, + pub permissions: ApiBucketKeyPerm, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ApiBucketKeyPerm { + #[serde(default)] + pub read: bool, + #[serde(default)] + pub write: bool, + #[serde(default)] + pub owner: bool, +} + +// ---- CreateKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateKeyRequest(pub UpdateKeyRequestBody); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct CreateKeyResponse(pub GetKeyInfoResponse); + +// ---- ImportKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ImportKeyRequest { + pub access_key_id: String, + pub secret_access_key: String, + pub name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ImportKeyResponse(pub GetKeyInfoResponse); + +// ---- UpdateKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct UpdateKeyRequest { + /// Access key ID + pub id: String, + #[param(ignore = true)] + pub body: UpdateKeyRequestBody, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UpdateKeyResponse(pub GetKeyInfoResponse); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct UpdateKeyRequestBody { + /// Name of the API key + pub name: Option, + /// Expiration time and date, formatted according to RFC 3339 + pub expiration: Option>, + /// Set the access key to never expire + #[serde(default)] + pub never_expires: bool, + /// Permissions to allow for the key + pub allow: Option, + /// Permissions to deny for the key + pub deny: Option, +} + +// ---- DeleteKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct DeleteKeyRequest { + /// Access key ID + pub id: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DeleteKeyResponse; + +// ********************************************** +// Bucket operations +// ********************************************** + +// ---- ListBuckets ---- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ListBucketsRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ListBucketsResponse(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ListBucketsResponseItem { + pub id: String, + pub created: DateTime, + pub global_aliases: Vec, + pub local_aliases: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BucketLocalAlias { + pub access_key_id: String, + pub alias: String, +} + +// ---- GetBucketInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +#[serde(rename_all = "camelCase")] +pub struct GetBucketInfoRequest { + /// Exact bucket ID to look up + pub id: Option, + /// Global alias of bucket to look up + pub global_alias: Option, + /// Partial ID or alias to search for + pub search: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetBucketInfoResponse { + /// Identifier of the bucket + pub id: String, + /// Bucket creation date + pub created: DateTime, + /// List of global aliases for this bucket + pub global_aliases: Vec, + /// Whether website access is enabled for this bucket + pub website_access: bool, + #[serde(default)] + /// Website configuration for this bucket + pub website_config: Option, + /// List of access keys that have permissions granted on this bucket + pub keys: Vec, + /// Number of objects in this bucket + pub objects: i64, + /// Total number of bytes used by objects in this bucket + pub bytes: i64, + /// Number of unfinished uploads in this bucket + pub unfinished_uploads: i64, + /// Number of unfinished multipart uploads in this bucket + pub unfinished_multipart_uploads: i64, + /// Number of parts in unfinished multipart uploads in this bucket + pub unfinished_multipart_upload_parts: i64, + /// Total number of bytes used by unfinished multipart uploads in this bucket + pub unfinished_multipart_upload_bytes: i64, + /// Quotas that apply to this bucket + pub quotas: ApiBucketQuotas, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetBucketInfoWebsiteResponse { + pub index_document: String, + pub error_document: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct GetBucketInfoKey { + pub access_key_id: String, + pub name: String, + pub permissions: ApiBucketKeyPerm, + pub bucket_local_aliases: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ApiBucketQuotas { + pub max_size: Option, + pub max_objects: Option, +} + +// ---- CreateBucket ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateBucketRequest { + pub global_alias: Option, + pub local_alias: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct CreateBucketResponse(pub GetBucketInfoResponse); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateBucketLocalAlias { + pub access_key_id: String, + pub alias: String, + #[serde(default)] + pub allow: ApiBucketKeyPerm, +} + +// ---- UpdateBucket ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct UpdateBucketRequest { + /// ID of the bucket to update + pub id: String, + #[param(ignore = true)] + pub body: UpdateBucketRequestBody, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UpdateBucketResponse(pub GetBucketInfoResponse); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct UpdateBucketRequestBody { + pub website_access: Option, + pub quotas: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct UpdateBucketWebsiteAccess { + pub enabled: bool, + pub index_document: Option, + pub error_document: Option, +} + +// ---- DeleteBucket ---- + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct DeleteBucketRequest { + /// ID of the bucket to delete + pub id: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DeleteBucketResponse; + +// ---- CleanupIncompleteUploads ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CleanupIncompleteUploadsRequest { + pub bucket_id: String, + pub older_than_secs: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CleanupIncompleteUploadsResponse { + pub uploads_deleted: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, IntoParams)] +#[into_params(parameter_in = Query)] +#[serde(rename_all = "camelCase")] +pub struct InspectObjectRequest { + pub bucket_id: String, + pub key: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct InspectObjectResponse { + /// ID of the bucket containing the inspected object + pub bucket_id: String, + /// Key of the inspected object + pub key: String, + /// List of versions currently stored for this object + pub versions: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct InspectObjectVersion { + /// Version ID + pub uuid: String, + /// Creation timestamp of this object version + pub timestamp: DateTime, + /// Whether this object version was created with SSE-C encryption + pub encrypted: bool, + /// Whether this object version is still uploading + pub uploading: bool, + /// Whether this is an aborted upload + pub aborted: bool, + /// Whether this version is a delete marker (a tombstone indicating that a previous version of + /// the object has been deleted) + pub delete_marker: bool, + /// Whether the object's data is stored inline (for small objects) + pub inline: bool, + /// Size of the object, in bytes + pub size: Option, + /// Etag of this object version + pub etag: Option, + /// Metadata (HTTP headers) associated with this object version + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub headers: Vec<(String, String)>, + /// List of data blocks for this object version + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub blocks: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct InspectObjectBlock { + /// Part number of the part containing this block, for multipart uploads + pub part_number: u64, + /// Offset of this block within the part + pub offset: u64, + /// Hash (blake2 sum) of the block's data + pub hash: String, + /// Length of the blocks's data + pub size: u64, +} + +// ********************************************** +// Operations on permissions for keys on buckets +// ********************************************** + +// ---- AllowBucketKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct AllowBucketKeyRequest(pub BucketKeyPermChangeRequest); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct AllowBucketKeyResponse(pub GetBucketInfoResponse); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BucketKeyPermChangeRequest { + pub bucket_id: String, + pub access_key_id: String, + pub permissions: ApiBucketKeyPerm, +} + +// ---- DenyBucketKey ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct DenyBucketKeyRequest(pub BucketKeyPermChangeRequest); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct DenyBucketKeyResponse(pub GetBucketInfoResponse); + +// ********************************************** +// Operations on bucket aliases +// ********************************************** + +// ---- AddBucketAlias ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct AddBucketAliasRequest { + pub bucket_id: String, + #[serde(flatten)] + pub alias: BucketAliasEnum, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct AddBucketAliasResponse(pub GetBucketInfoResponse); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged)] +pub enum BucketAliasEnum { + #[serde(rename_all = "camelCase")] + Global { global_alias: String }, + #[serde(rename_all = "camelCase")] + Local { + local_alias: String, + access_key_id: String, + }, +} + +// ---- RemoveBucketAlias ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RemoveBucketAliasRequest { + pub bucket_id: String, + #[serde(flatten)] + pub alias: BucketAliasEnum, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct RemoveBucketAliasResponse(pub GetBucketInfoResponse); + +// ********************************************** +// Node operations +// ********************************************** + +// ---- GetNodeInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LocalGetNodeInfoRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalGetNodeInfoResponse { + pub node_id: String, + pub garage_version: String, + pub garage_features: Option>, + pub rust_version: String, + pub db_engine: String, +} + +// ---- GetNodeStatistics ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LocalGetNodeStatisticsRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalGetNodeStatisticsResponse { + pub freeform: String, +} + +// ---- CreateMetadataSnapshot ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LocalCreateMetadataSnapshotRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalCreateMetadataSnapshotResponse; + +// ---- LaunchRepairOperation ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalLaunchRepairOperationRequest { + pub repair_type: RepairType, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub enum RepairType { + Tables, + Blocks, + Versions, + MultipartUploads, + BlockRefs, + BlockRc, + Rebalance, + Scrub(ScrubCommand), + Aliases, + ClearResyncQueue, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub enum ScrubCommand { + Start, + Pause, + Resume, + Cancel, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalLaunchRepairOperationResponse; + +// ********************************************** +// Worker operations +// ********************************************** + +// ---- ListWorkers ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalListWorkersRequest { + #[serde(default)] + pub busy_only: bool, + #[serde(default)] + pub error_only: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalListWorkersResponse(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct WorkerInfoResp { + pub id: u64, + pub name: String, + pub state: WorkerStateResp, + pub errors: u64, + pub consecutive_errors: u64, + pub last_error: Option, + pub tranquility: Option, + pub progress: Option, + pub queue_length: Option, + pub persistent_errors: Option, + pub freeform: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub enum WorkerStateResp { + Busy, + #[serde(rename_all = "camelCase")] + Throttled { + duration_secs: f32, + }, + Idle, + Done, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct WorkerLastError { + pub message: String, + pub secs_ago: u64, +} + +// ---- GetWorkerInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalGetWorkerInfoRequest { + pub id: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalGetWorkerInfoResponse(pub WorkerInfoResp); + +// ---- GetWorkerVariable ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalGetWorkerVariableRequest { + pub variable: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalGetWorkerVariableResponse(pub HashMap); + +// ---- SetWorkerVariable ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalSetWorkerVariableRequest { + pub variable: String, + pub value: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalSetWorkerVariableResponse { + pub variable: String, + pub value: String, +} + +// ********************************************** +// Block operations +// ********************************************** + +// ---- ListBlockErrors ---- + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LocalListBlockErrorsRequest; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct LocalListBlockErrorsResponse(pub Vec); + +#[derive(Serialize, Deserialize, Clone, Debug, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BlockError { + pub block_hash: String, + pub refcount: u64, + pub error_count: u64, + pub last_try_secs_ago: u64, + pub next_try_in_secs: u64, +} + +// ---- GetBlockInfo ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalGetBlockInfoRequest { + pub block_hash: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalGetBlockInfoResponse { + pub block_hash: String, + pub refcount: u64, + pub versions: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BlockVersion { + pub version_id: String, + pub ref_deleted: bool, + pub version_deleted: bool, + pub garbage_collected: bool, + pub backlink: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub enum BlockVersionBacklink { + #[serde(rename_all = "camelCase")] + Object { bucket_id: String, key: String }, + #[serde(rename_all = "camelCase")] + Upload { + upload_id: String, + upload_deleted: bool, + upload_garbage_collected: bool, + bucket_id: Option, + key: Option, + }, +} + +// ---- RetryBlockResync ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged)] +pub enum LocalRetryBlockResyncRequest { + #[serde(rename_all = "camelCase")] + All { all: bool }, + #[serde(rename_all = "camelCase")] + Blocks { block_hashes: Vec }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalRetryBlockResyncResponse { + pub count: u64, +} + +// ---- PurgeBlocks ---- + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalPurgeBlocksRequest(pub Vec); + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct LocalPurgeBlocksResponse { + pub blocks_purged: u64, + pub objects_deleted: u64, + pub uploads_deleted: u64, + pub versions_deleted: u64, + pub block_refs_purged: u64, +} diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs index 6f0c474f..aa8d8e96 100644 --- a/src/api/admin/api_server.rs +++ b/src/api/admin/api_server.rs @@ -1,333 +1,237 @@ -use std::collections::HashMap; +use std::borrow::Cow; use std::sync::Arc; -use argon2::password_hash::PasswordHash; - -use http::header::{ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN, ALLOW}; -use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode}; +use http::header::{HeaderValue, ACCESS_CONTROL_ALLOW_ORIGIN, AUTHORIZATION}; +use hyper::{body::Incoming as IncomingBody, Request, Response}; +use serde::{Deserialize, Serialize}; use tokio::sync::watch; use opentelemetry::trace::SpanRef; #[cfg(feature = "metrics")] use opentelemetry_prometheus::PrometheusExporter; -#[cfg(feature = "metrics")] -use prometheus::{Encoder, TextEncoder}; use garage_model::garage::Garage; -use garage_rpc::system::ClusterHealthStatus; +use garage_rpc::{Endpoint as RpcEndpoint, *}; +use garage_table::EmptyKey; +use garage_util::background::BackgroundRunner; +use garage_util::data::Uuid; use garage_util::error::Error as GarageError; use garage_util::socket_address::UnixOrTCPSocketAddress; +use garage_util::time::now_msec; use garage_api_common::generic_server::*; use garage_api_common::helpers::*; -use crate::bucket::*; -use crate::cluster::*; +use crate::api::*; use crate::error::*; -use crate::key::*; use crate::router_v0; -use crate::router_v1::{Authorization, Endpoint}; +use crate::router_v1; +use crate::Authorization; +use crate::RequestHandler; + +// ---- FOR RPC ---- + +pub const ADMIN_RPC_PATH: &str = "garage_api/admin/rpc.rs/Rpc"; + +#[derive(Debug, Serialize, Deserialize)] +pub enum AdminRpc { + Proxy(AdminApiRequest), + Internal(LocalAdminApiRequest), +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum AdminRpcResponse { + ProxyApiOkResponse(TaggedAdminApiResponse), + InternalApiOkResponse(LocalAdminApiResponse), + ApiErrorResponse { + http_code: u16, + error_code: String, + message: String, + }, +} + +impl Rpc for AdminRpc { + type Response = Result; +} + +impl EndpointHandler for AdminApiServer { + async fn handle( + self: &Arc, + message: &AdminRpc, + _from: NodeID, + ) -> Result { + match message { + AdminRpc::Proxy(req) => { + info!("Proxied admin API request: {}", req.name()); + let res = req.clone().handle(&self.garage, self).await; + match res { + Ok(res) => Ok(AdminRpcResponse::ProxyApiOkResponse(res.tagged())), + Err(e) => Ok(AdminRpcResponse::ApiErrorResponse { + http_code: e.http_status_code().as_u16(), + error_code: e.code().to_string(), + message: e.to_string(), + }), + } + } + AdminRpc::Internal(req) => { + info!("Internal admin API request: {}", req.name()); + let res = req.clone().handle(&self.garage, self).await; + match res { + Ok(res) => Ok(AdminRpcResponse::InternalApiOkResponse(res)), + Err(e) => Ok(AdminRpcResponse::ApiErrorResponse { + http_code: e.http_status_code().as_u16(), + error_code: e.code().to_string(), + message: e.to_string(), + }), + } + } + } + } +} + +// ---- FOR HTTP ---- pub type ResBody = BoxBody; pub struct AdminApiServer { garage: Arc, #[cfg(feature = "metrics")] - exporter: PrometheusExporter, + pub(crate) exporter: PrometheusExporter, metrics_token: Option, + metrics_require_token: bool, admin_token: Option, + pub(crate) background: Arc, + pub(crate) endpoint: Arc>, +} + +pub enum HttpEndpoint { + Old(router_v1::Endpoint), + New(String), } impl AdminApiServer { pub fn new( garage: Arc, + background: Arc, #[cfg(feature = "metrics")] exporter: PrometheusExporter, - ) -> Self { + ) -> Arc { let cfg = &garage.config.admin; let metrics_token = cfg.metrics_token.as_deref().map(hash_bearer_token); let admin_token = cfg.admin_token.as_deref().map(hash_bearer_token); - Self { + let metrics_require_token = cfg.metrics_require_token; + + let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into()); + let admin = Arc::new(Self { garage, #[cfg(feature = "metrics")] exporter, metrics_token, + metrics_require_token, admin_token, - } + background, + endpoint, + }); + admin.endpoint.set_handler(admin.clone()); + admin } pub async fn run( - self, + self: Arc, bind_addr: UnixOrTCPSocketAddress, must_exit: watch::Receiver, ) -> Result<(), GarageError> { let region = self.garage.config.s3_api.s3_region.clone(); - ApiServer::new(region, self) + ApiServer::new(region, ArcAdminApiServer(self)) .run_server(bind_addr, Some(0o220), must_exit) .await } - fn handle_options(&self, _req: &Request) -> Result, Error> { - Ok(Response::builder() - .status(StatusCode::NO_CONTENT) - .header(ALLOW, "OPTIONS, GET, POST") - .header(ACCESS_CONTROL_ALLOW_METHODS, "OPTIONS, GET, POST") - .header(ACCESS_CONTROL_ALLOW_ORIGIN, "*") - .body(empty_body())?) - } - - async fn handle_check_domain( + async fn handle_http_api( &self, req: Request, + endpoint: HttpEndpoint, ) -> Result, Error> { - let query_params: HashMap = req - .uri() - .query() - .map(|v| { - url::form_urlencoded::parse(v.as_bytes()) - .into_owned() - .collect() - }) - .unwrap_or_else(HashMap::new); + let auth_header = req.headers().get(AUTHORIZATION).cloned(); - let has_domain_key = query_params.contains_key("domain"); - - if !has_domain_key { - return Err(Error::bad_request("No domain query string found")); - } - - let domain = query_params - .get("domain") - .ok_or_internal_error("Could not parse domain query string")?; - - if self.check_domain(domain).await? { - Ok(Response::builder() - .status(StatusCode::OK) - .body(string_body(format!( - "Domain '{domain}' is managed by Garage" - )))?) - } else { - Err(Error::bad_request(format!( - "Domain '{domain}' is not managed by Garage" - ))) - } - } - - async fn check_domain(&self, domain: &str) -> Result { - // Resolve bucket from domain name, inferring if the website must be activated for the - // domain to be valid. - let (bucket_name, must_check_website) = if let Some(bname) = self - .garage - .config - .s3_api - .root_domain - .as_ref() - .and_then(|rd| host_to_bucket(domain, rd)) - { - (bname.to_string(), false) - } else if let Some(bname) = self - .garage - .config - .s3_web - .as_ref() - .and_then(|sw| host_to_bucket(domain, sw.root_domain.as_str())) - { - (bname.to_string(), true) - } else { - (domain.to_string(), true) + let request = match endpoint { + HttpEndpoint::Old(endpoint_v1) => AdminApiRequest::from_v1(endpoint_v1, req).await?, + HttpEndpoint::New(_) => AdminApiRequest::from_request(req).await?, }; - let bucket_id = match self - .garage - .bucket_helper() - .resolve_global_bucket_name(&bucket_name) - .await? - { - Some(bucket_id) => bucket_id, - None => return Ok(false), - }; - - if !must_check_website { - return Ok(true); - } - - let bucket = self - .garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let bucket_state = bucket.state.as_option().unwrap(); - let bucket_website_config = bucket_state.website_config.get(); - - match bucket_website_config { - Some(_v) => Ok(true), - None => Ok(false), - } - } - - fn handle_health(&self) -> Result, Error> { - let health = self.garage.system.health(); - - let (status, status_str) = match health.status { - ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"), - ClusterHealthStatus::Degraded => ( - StatusCode::OK, - "Garage is operational but some storage nodes are unavailable", - ), - ClusterHealthStatus::Unavailable => ( - StatusCode::SERVICE_UNAVAILABLE, - "Quorum is not available for some/all partitions, reads and writes will fail", + let (global_token_hash, token_required) = match request.authorization_type() { + Authorization::None => (None, false), + Authorization::MetricsToken => ( + self.metrics_token.as_deref(), + self.metrics_token.is_some() || self.metrics_require_token, ), + Authorization::AdminToken => (self.admin_token.as_deref(), true), }; - let status_str = format!( - "{}\nConsult the full health check API endpoint at /v1/health for more details\n", - status_str - ); - Ok(Response::builder() - .status(status) - .header(http::header::CONTENT_TYPE, "text/plain") - .body(string_body(status_str))?) - } - - fn handle_metrics(&self) -> Result, Error> { - #[cfg(feature = "metrics")] - { - use opentelemetry::trace::Tracer; - - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - - let tracer = opentelemetry::global::tracer("garage"); - let metric_families = tracer.in_span("admin/gather_metrics", |_| { - self.exporter.registry().gather() - }); - - encoder - .encode(&metric_families, &mut buffer) - .ok_or_internal_error("Could not serialize metrics")?; - - Ok(Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, encoder.format_type()) - .body(bytes_body(buffer.into()))?) + if token_required { + verify_authorization(&self.garage, global_token_hash, auth_header, request.name())?; + } + + match request { + AdminApiRequest::Options(req) => req.handle(&self.garage, self).await, + AdminApiRequest::CheckDomain(req) => req.handle(&self.garage, self).await, + AdminApiRequest::Health(req) => req.handle(&self.garage, self).await, + AdminApiRequest::Metrics(req) => req.handle(&self.garage, self).await, + req => { + let res = req.handle(&self.garage, self).await?; + let mut res = json_ok_response(&res)?; + res.headers_mut() + .insert(ACCESS_CONTROL_ALLOW_ORIGIN, HeaderValue::from_static("*")); + Ok(res) + } } - #[cfg(not(feature = "metrics"))] - Err(Error::bad_request( - "Garage was built without the metrics feature".to_string(), - )) } } -impl ApiHandler for AdminApiServer { +struct ArcAdminApiServer(Arc); + +impl ApiHandler for ArcAdminApiServer { const API_NAME: &'static str = "admin"; const API_NAME_DISPLAY: &'static str = "Admin"; - type Endpoint = Endpoint; + type Endpoint = HttpEndpoint; type Error = Error; - fn parse_endpoint(&self, req: &Request) -> Result { + fn parse_endpoint(&self, req: &Request) -> Result { if req.uri().path().starts_with("/v0/") { let endpoint_v0 = router_v0::Endpoint::from_request(req)?; - Endpoint::from_v0(endpoint_v0) + let endpoint_v1 = router_v1::Endpoint::from_v0(endpoint_v0)?; + Ok(HttpEndpoint::Old(endpoint_v1)) + } else if req.uri().path().starts_with("/v1/") { + let endpoint_v1 = router_v1::Endpoint::from_request(req)?; + Ok(HttpEndpoint::Old(endpoint_v1)) } else { - Endpoint::from_request(req) + Ok(HttpEndpoint::New(req.uri().path().to_string())) } } async fn handle( &self, req: Request, - endpoint: Endpoint, + endpoint: HttpEndpoint, ) -> Result, Error> { - let required_auth_hash = - match endpoint.authorization_type() { - Authorization::None => None, - Authorization::MetricsToken => self.metrics_token.as_deref(), - Authorization::AdminToken => match self.admin_token.as_deref() { - None => return Err(Error::forbidden( - "Admin token isn't configured, admin API access is disabled for security.", - )), - Some(t) => Some(t), - }, - }; + self.0.handle_http_api(req, endpoint).await + } - if let Some(password_hash) = required_auth_hash { - match req.headers().get("Authorization") { - None => return Err(Error::forbidden("Authorization token must be provided")), - Some(authorization) => { - verify_bearer_token(&authorization, password_hash)?; - } - } - } - - match endpoint { - Endpoint::Options => self.handle_options(&req), - Endpoint::CheckDomain => self.handle_check_domain(req).await, - Endpoint::Health => self.handle_health(), - Endpoint::Metrics => self.handle_metrics(), - Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await, - Endpoint::GetClusterHealth => handle_get_cluster_health(&self.garage).await, - Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await, - // Layout - Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await, - Endpoint::UpdateClusterLayout => handle_update_cluster_layout(&self.garage, req).await, - Endpoint::ApplyClusterLayout => handle_apply_cluster_layout(&self.garage, req).await, - Endpoint::RevertClusterLayout => handle_revert_cluster_layout(&self.garage).await, - // Keys - Endpoint::ListKeys => handle_list_keys(&self.garage).await, - Endpoint::GetKeyInfo { - id, - search, - show_secret_key, - } => { - let show_secret_key = show_secret_key.map(|x| x == "true").unwrap_or(false); - handle_get_key_info(&self.garage, id, search, show_secret_key).await - } - Endpoint::CreateKey => handle_create_key(&self.garage, req).await, - Endpoint::ImportKey => handle_import_key(&self.garage, req).await, - Endpoint::UpdateKey { id } => handle_update_key(&self.garage, id, req).await, - Endpoint::DeleteKey { id } => handle_delete_key(&self.garage, id).await, - // Buckets - Endpoint::ListBuckets => handle_list_buckets(&self.garage).await, - Endpoint::GetBucketInfo { id, global_alias } => { - handle_get_bucket_info(&self.garage, id, global_alias).await - } - Endpoint::CreateBucket => handle_create_bucket(&self.garage, req).await, - Endpoint::DeleteBucket { id } => handle_delete_bucket(&self.garage, id).await, - Endpoint::UpdateBucket { id } => handle_update_bucket(&self.garage, id, req).await, - // Bucket-key permissions - Endpoint::BucketAllowKey => { - handle_bucket_change_key_perm(&self.garage, req, true).await - } - Endpoint::BucketDenyKey => { - handle_bucket_change_key_perm(&self.garage, req, false).await - } - // Bucket aliasing - Endpoint::GlobalAliasBucket { id, alias } => { - handle_global_alias_bucket(&self.garage, id, alias).await - } - Endpoint::GlobalUnaliasBucket { id, alias } => { - handle_global_unalias_bucket(&self.garage, id, alias).await - } - Endpoint::LocalAliasBucket { - id, - access_key_id, - alias, - } => handle_local_alias_bucket(&self.garage, id, access_key_id, alias).await, - Endpoint::LocalUnaliasBucket { - id, - access_key_id, - alias, - } => handle_local_unalias_bucket(&self.garage, id, access_key_id, alias).await, - } + fn key_id_from_request(&self, req: &Request) -> Option { + let auth_header = req.headers().get(AUTHORIZATION)?; + let token = parse_authorization(auth_header).ok()?; + let key_id = token.split_once('.')?.0; + Some(key_id.to_string()) } } -impl ApiEndpoint for Endpoint { - fn name(&self) -> &'static str { - Endpoint::name(self) +impl ApiEndpoint for HttpEndpoint { + fn name(&self) -> Cow<'static, str> { + match self { + Self::Old(endpoint_v1) => Cow::Borrowed(endpoint_v1.name()), + Self::New(path) => Cow::Owned(path.clone()), + } } fn add_span_attributes(&self, _span: SpanRef<'_>) {} @@ -347,20 +251,91 @@ fn hash_bearer_token(token: &str) -> String { .to_string() } -fn verify_bearer_token(token: &hyper::http::HeaderValue, password_hash: &str) -> Result<(), Error> { - use argon2::{password_hash::PasswordVerifier, Argon2}; - - let parsed_hash = PasswordHash::new(&password_hash).unwrap(); - - token +fn parse_authorization(auth_header: &hyper::http::HeaderValue) -> Result<&str, Error> { + let token = auth_header .to_str()? .strip_prefix("Bearer ") - .and_then(|token| { - Argon2::default() - .verify_password(token.trim().as_bytes(), &parsed_hash) - .ok() - }) - .ok_or_else(|| Error::forbidden("Invalid authorization token"))?; + .ok_or_else(|| Error::forbidden("Invalid Authorization header"))? + .trim(); + Ok(token) +} + +fn verify_authorization( + garage: &Garage, + global_token_hash: Option<&str>, + auth_header: Option, + endpoint_name: &str, +) -> Result<(), Error> { + use argon2::{password_hash::PasswordHash, password_hash::PasswordVerifier, Argon2}; + + let invalid_msg = "Invalid bearer token"; + + let token = match &auth_header { + None => { + return Err(Error::forbidden( + "Bearer token must be provided in Authorization header", + )) + } + Some(authorization) => parse_authorization(authorization)?, + }; + + let token_hash_string = if let Some((prefix, _)) = token.split_once('.') { + garage + .admin_token_table + .get_local(&EmptyKey, &prefix.to_string())? + .and_then(|k| k.state.into_option()) + .filter(|p| !p.is_expired(now_msec())) + // GetCurrentAdminTokenInfo endpoint must be accessible even if it is not in the token scopes + .filter(|p| p.has_scope(endpoint_name) || endpoint_name == "GetCurrentAdminTokenInfo") + .ok_or_else(|| Error::forbidden(invalid_msg))? + .token_hash + } else { + global_token_hash + .ok_or_else(|| Error::forbidden(invalid_msg))? + .to_string() + }; + + let token_hash = + PasswordHash::new(&token_hash_string).ok_or_internal_error("Could not parse token hash")?; + + Argon2::default() + .verify_password(token.as_bytes(), &token_hash) + .map_err(|_| Error::forbidden(invalid_msg))?; Ok(()) } + +pub(crate) fn find_matching_nodes(garage: &Garage, spec: &str) -> Result, Error> { + if spec == "self" { + Ok(vec![garage.system.id]) + } else { + // Collect all nodes currently up and/or in cluster layout + let mut res = vec![]; + if let Ok(all_nodes) = garage.system.cluster_layout().all_nodes() { + res = all_nodes.to_vec(); + } + for node in garage.system.get_known_nodes() { + if node.is_up && !res.contains(&node.id) { + res.push(node.id); + } + } + + if spec == "*" { + // match all nodes + Ok(res) + } else { + // filter nodes that match spec + res.retain(|node| hex::encode(node).starts_with(spec)); + if res.is_empty() { + Err(Error::bad_request(format!("No nodes matching {}", spec))) + } else if res.len() > 1 { + Err(Error::bad_request(format!( + "Multiple nodes matching {}: {:?}", + spec, res + ))) + } else { + Ok(res) + } + } + } +} diff --git a/src/api/admin/block.rs b/src/api/admin/block.rs new file mode 100644 index 00000000..30729866 --- /dev/null +++ b/src/api/admin/block.rs @@ -0,0 +1,284 @@ +use std::sync::Arc; + +use garage_util::data::*; +use garage_util::error::Error as GarageError; +use garage_util::time::now_msec; + +use garage_table::EmptyKey; + +use garage_model::garage::Garage; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; + +use garage_api_common::common_error::CommonErrorDerivative; + +use crate::api::*; +use crate::error::*; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for LocalListBlockErrorsRequest { + type Response = LocalListBlockErrorsResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let errors = garage.block_manager.list_resync_errors()?; + let now = now_msec(); + let errors = errors + .into_iter() + .map(|e| BlockError { + block_hash: hex::encode(e.hash), + refcount: e.refcount, + error_count: e.error_count, + last_try_secs_ago: now.saturating_sub(e.last_try) / 1000, + next_try_in_secs: e.next_try.saturating_sub(now) / 1000, + }) + .collect(); + Ok(LocalListBlockErrorsResponse(errors)) + } +} + +impl RequestHandler for LocalGetBlockInfoRequest { + type Response = LocalGetBlockInfoResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let hash = find_block_hash_by_prefix(garage, &self.block_hash)?; + let refcount = garage.block_manager.get_block_rc(&hash)?; + let block_refs = garage + .block_ref_table + .get_range(&hash, None, None, 10000, Default::default()) + .await?; + let mut versions = vec![]; + for br in block_refs { + if let Some(v) = garage.version_table.get(&br.version, &EmptyKey).await? { + let bl = match &v.backlink { + VersionBacklink::MultipartUpload { upload_id } => { + if let Some(u) = garage.mpu_table.get(upload_id, &EmptyKey).await? { + BlockVersionBacklink::Upload { + upload_id: hex::encode(upload_id), + upload_deleted: u.deleted.get(), + upload_garbage_collected: false, + bucket_id: Some(hex::encode(u.bucket_id)), + key: Some(u.key.to_string()), + } + } else { + BlockVersionBacklink::Upload { + upload_id: hex::encode(upload_id), + upload_deleted: true, + upload_garbage_collected: true, + bucket_id: None, + key: None, + } + } + } + VersionBacklink::Object { bucket_id, key } => BlockVersionBacklink::Object { + bucket_id: hex::encode(bucket_id), + key: key.to_string(), + }, + }; + versions.push(BlockVersion { + version_id: hex::encode(br.version), + ref_deleted: br.deleted.get(), + version_deleted: v.deleted.get(), + garbage_collected: false, + backlink: Some(bl), + }); + } else { + versions.push(BlockVersion { + version_id: hex::encode(br.version), + ref_deleted: br.deleted.get(), + version_deleted: true, + garbage_collected: true, + backlink: None, + }); + } + } + Ok(LocalGetBlockInfoResponse { + block_hash: hex::encode(hash), + refcount, + versions, + }) + } +} + +impl RequestHandler for LocalRetryBlockResyncRequest { + type Response = LocalRetryBlockResyncResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + match self { + Self::All { all: true } => { + let blocks = garage.block_manager.list_resync_errors()?; + for b in blocks.iter() { + garage.block_manager.resync.clear_backoff(&b.hash)?; + } + Ok(LocalRetryBlockResyncResponse { + count: blocks.len() as u64, + }) + } + Self::All { all: false } => Err(Error::bad_request("nonsense")), + Self::Blocks { block_hashes } => { + for hash in block_hashes.iter() { + let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?; + let hash = Hash::try_from(&hash).ok_or_bad_request("invalid hash")?; + garage.block_manager.resync.clear_backoff(&hash)?; + } + Ok(LocalRetryBlockResyncResponse { + count: block_hashes.len() as u64, + }) + } + } + } +} + +impl RequestHandler for LocalPurgeBlocksRequest { + type Response = LocalPurgeBlocksResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut obj_dels = 0; + let mut mpu_dels = 0; + let mut ver_dels = 0; + let mut br_dels = 0; + + for hash in self.0.iter() { + let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?; + let hash = Hash::try_from(&hash).ok_or_bad_request("invalid hash")?; + let block_refs = garage + .block_ref_table + .get_range(&hash, None, None, 10000, Default::default()) + .await?; + + for br in block_refs { + if let Some(version) = garage.version_table.get(&br.version, &EmptyKey).await? { + handle_block_purge_version_backlink( + garage, + &version, + &mut obj_dels, + &mut mpu_dels, + ) + .await?; + + if !version.deleted.get() { + let deleted_version = Version::new(version.uuid, version.backlink, true); + garage.version_table.insert(&deleted_version).await?; + ver_dels += 1; + } + } + if !br.deleted.get() { + let mut br = br; + br.deleted.set(); + garage.block_ref_table.insert(&br).await?; + br_dels += 1; + } + } + } + + Ok(LocalPurgeBlocksResponse { + blocks_purged: self.0.len() as u64, + block_refs_purged: br_dels, + versions_deleted: ver_dels, + objects_deleted: obj_dels, + uploads_deleted: mpu_dels, + }) + } +} + +fn find_block_hash_by_prefix(garage: &Arc, prefix: &str) -> Result { + if prefix.len() < 4 { + return Err(Error::bad_request( + "Please specify at least 4 characters of the block hash", + )); + } + + let prefix_bin = hex::decode(&prefix[..prefix.len() & !1]).ok_or_bad_request("invalid hash")?; + + let iter = garage + .block_ref_table + .data + .store + .range(&prefix_bin[..]..) + .map_err(GarageError::from)?; + let mut found = None; + for item in iter { + let (k, _v) = item.map_err(GarageError::from)?; + let hash = Hash::try_from(&k[..32]).unwrap(); + if hash.as_slice()[..prefix_bin.len()] != prefix_bin { + break; + } + if hex::encode(hash.as_slice()).starts_with(prefix) { + match &found { + Some(x) if *x == hash => (), + Some(_) => { + return Err(Error::bad_request(format!( + "Several blocks match prefix `{}`", + prefix + ))); + } + None => { + found = Some(hash); + } + } + } + } + + found.ok_or_else(|| Error::NoSuchBlock(prefix.to_string())) +} + +async fn handle_block_purge_version_backlink( + garage: &Arc, + version: &Version, + obj_dels: &mut u64, + mpu_dels: &mut u64, +) -> Result<(), Error> { + let (bucket_id, key, ov_id) = match &version.backlink { + VersionBacklink::Object { bucket_id, key } => (*bucket_id, key.clone(), version.uuid), + VersionBacklink::MultipartUpload { upload_id } => { + if let Some(mut mpu) = garage.mpu_table.get(upload_id, &EmptyKey).await? { + if !mpu.deleted.get() { + mpu.parts.clear(); + mpu.deleted.set(); + garage.mpu_table.insert(&mpu).await?; + *mpu_dels += 1; + } + (mpu.bucket_id, mpu.key.clone(), *upload_id) + } else { + return Ok(()); + } + } + }; + + if let Some(object) = garage.object_table.get(&bucket_id, &key).await? { + let ov = object.versions().iter().rev().find(|v| v.is_complete()); + if let Some(ov) = ov { + if ov.uuid == ov_id { + let del_uuid = gen_uuid(); + let deleted_object = Object::new( + bucket_id, + key, + vec![ObjectVersion { + uuid: del_uuid, + timestamp: ov.timestamp + 1, + state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), + }], + ); + garage.object_table.insert(&deleted_object).await?; + *obj_dels += 1; + } + } + } + + Ok(()) +} diff --git a/src/api/admin/bucket.rs b/src/api/admin/bucket.rs index 207693b6..046ed44c 100644 --- a/src/api/admin/bucket.rs +++ b/src/api/admin/bucket.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; -use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode}; -use serde::{Deserialize, Serialize}; +use chrono::DateTime; use garage_util::crdt::*; use garage_util::data::*; @@ -18,102 +18,603 @@ use garage_model::s3::mpu_table; use garage_model::s3::object_table::*; use garage_api_common::common_error::CommonError; -use garage_api_common::helpers::*; -use crate::api_server::ResBody; +use crate::api::*; use crate::error::*; -use crate::key::ApiBucketKeyPerm; +use crate::{Admin, RequestHandler}; -pub async fn handle_list_buckets(garage: &Arc) -> Result, Error> { - let buckets = garage - .bucket_table - .get_range( - &EmptyKey, - None, - Some(DeletedFilter::NotDeleted), - 10000, - EnumerationOrder::Forward, - ) +impl RequestHandler for ListBucketsRequest { + type Response = ListBucketsResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let buckets = garage + .bucket_table + .get_range( + &EmptyKey, + None, + Some(DeletedFilter::NotDeleted), + 10000, + EnumerationOrder::Forward, + ) + .await?; + + let res = buckets + .into_iter() + .map(|b| { + let state = b.state.as_option().unwrap(); + ListBucketsResponseItem { + id: hex::encode(b.id), + created: DateTime::from_timestamp_millis(state.creation_date as i64) + .expect("invalid timestamp stored in db"), + global_aliases: state + .aliases + .items() + .iter() + .filter(|(_, _, a)| *a) + .map(|(n, _, _)| n.to_string()) + .collect::>(), + local_aliases: state + .local_aliases + .items() + .iter() + .filter(|(_, _, a)| *a) + .map(|((k, n), _, _)| BucketLocalAlias { + access_key_id: k.to_string(), + alias: n.to_string(), + }) + .collect::>(), + } + }) + .collect::>(); + + Ok(ListBucketsResponse(res)) + } +} + +impl RequestHandler for GetBucketInfoRequest { + type Response = GetBucketInfoResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let bucket_id = match (self.id, self.global_alias, self.search) { + (Some(id), None, None) => parse_bucket_id(&id)?, + (None, Some(ga), None) => garage + .bucket_alias_table + .get(&EmptyKey, &ga) + .await? + .and_then(|x| *x.state.get()) + .ok_or_else(|| HelperError::NoSuchBucket(ga.to_string()))?, + (None, None, Some(search)) => { + let helper = garage.bucket_helper(); + if let Some(bucket) = helper.resolve_global_bucket(&search).await? { + bucket.id + } else { + let hexdec = if search.len() >= 2 { + search + .get(..search.len() & !1) + .and_then(|x| hex::decode(x).ok()) + } else { + None + }; + let hex = hexdec + .ok_or_else(|| Error::Common(CommonError::NoSuchBucket(search.clone())))?; + + let mut start = [0u8; 32]; + start + .as_mut_slice() + .get_mut(..hex.len()) + .ok_or_bad_request("invalid length")? + .copy_from_slice(&hex); + let mut candidates = garage + .bucket_table + .get_range( + &EmptyKey, + Some(start.into()), + Some(DeletedFilter::NotDeleted), + 10, + EnumerationOrder::Forward, + ) + .await? + .into_iter() + .collect::>(); + candidates.retain(|x| hex::encode(x.id).starts_with(&search)); + if candidates.is_empty() { + return Err(Error::Common(CommonError::NoSuchBucket(search.clone()))); + } else if candidates.len() == 1 { + candidates.into_iter().next().unwrap().id + } else { + return Err(Error::bad_request(format!( + "Several matching buckets: {}", + search + ))); + } + } + } + _ => { + return Err(Error::bad_request( + "Either id, globalAlias or search must be provided (but not several of them)", + )); + } + }; + + bucket_info_results(garage, bucket_id).await + } +} + +impl RequestHandler for CreateBucketRequest { + type Response = CreateBucketResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let helper = garage.locked_helper().await; + + if let Some(ga) = &self.global_alias { + if !is_valid_bucket_name(ga, garage.config.allow_punycode) { + return Err(Error::bad_request(format!( + "{}: {}", + ga, INVALID_BUCKET_NAME_MESSAGE + ))); + } + + if let Some(alias) = garage.bucket_alias_table.get(&EmptyKey, ga).await? { + if alias.state.get().is_some() { + return Err(CommonError::BucketAlreadyExists.into()); + } + } + } + + if let Some(la) = &self.local_alias { + if !is_valid_bucket_name(&la.alias, garage.config.allow_punycode) { + return Err(Error::bad_request(format!( + "{}: {}", + la.alias, INVALID_BUCKET_NAME_MESSAGE + ))); + } + + let key = helper.key().get_existing_key(&la.access_key_id).await?; + let state = key.state.as_option().unwrap(); + if state.local_aliases.get(&la.alias).is_some() { + return Err(Error::bad_request("Local alias already exists")); + } + } + + let bucket = Bucket::new(); + garage.bucket_table.insert(&bucket).await?; + + if let Some(ga) = &self.global_alias { + helper.set_global_bucket_alias(bucket.id, ga).await?; + } + + if let Some(la) = &self.local_alias { + helper + .set_local_bucket_alias(bucket.id, &la.access_key_id, &la.alias) + .await?; + + if la.allow.read || la.allow.write || la.allow.owner { + helper + .set_bucket_key_permissions( + bucket.id, + &la.access_key_id, + BucketKeyPerm { + timestamp: now_msec(), + allow_read: la.allow.read, + allow_write: la.allow.write, + allow_owner: la.allow.owner, + }, + ) + .await?; + } + } + + Ok(CreateBucketResponse( + bucket_info_results(garage, bucket.id).await?, + )) + } +} + +impl RequestHandler for DeleteBucketRequest { + type Response = DeleteBucketResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let helper = garage.locked_helper().await; + + let bucket_id = parse_bucket_id(&self.id)?; + + let mut bucket = helper.bucket().get_existing_bucket(bucket_id).await?; + let state = bucket.state.as_option().unwrap(); + + // Check bucket is empty + if !helper.bucket().is_bucket_empty(bucket_id).await? { + return Err(CommonError::BucketNotEmpty.into()); + } + + // --- done checking, now commit --- + // 1. delete authorization from keys that had access + for (key_id, perm) in bucket.authorized_keys() { + if perm.is_any() { + helper + .set_bucket_key_permissions(bucket.id, key_id, BucketKeyPerm::NO_PERMISSIONS) + .await?; + } + } + // 2. delete all local aliases + for ((key_id, alias), _, active) in state.local_aliases.items().iter() { + if *active { + helper + .purge_local_bucket_alias(bucket.id, key_id, alias) + .await?; + } + } + // 3. delete all global aliases + for (alias, _, active) in state.aliases.items().iter() { + if *active { + helper.purge_global_bucket_alias(bucket.id, alias).await?; + } + } + + // 4. delete bucket + bucket.state = Deletable::delete(); + garage.bucket_table.insert(&bucket).await?; + + Ok(DeleteBucketResponse) + } +} + +impl RequestHandler for UpdateBucketRequest { + type Response = UpdateBucketResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let bucket_id = parse_bucket_id(&self.id)?; + + let mut bucket = garage + .bucket_helper() + .get_existing_bucket(bucket_id) + .await?; + + let state = bucket.state.as_option_mut().unwrap(); + + if let Some(wa) = self.body.website_access { + if wa.enabled { + let (redirect_all, routing_rules) = match state.website_config.get() { + Some(wc) => (wc.redirect_all.clone(), wc.routing_rules.clone()), + None => (None, Vec::new()), + }; + state.website_config.update(Some(WebsiteConfig { + index_document: wa.index_document.ok_or_bad_request( + "Please specify indexDocument when enabling website access.", + )?, + error_document: wa.error_document, + redirect_all, + routing_rules, + })); + } else { + if wa.index_document.is_some() || wa.error_document.is_some() { + return Err(Error::bad_request( + "Cannot specify indexDocument or errorDocument when disabling website access.", + )); + } + state.website_config.update(None); + } + } + + if let Some(q) = self.body.quotas { + state.quotas.update(BucketQuotas { + max_size: q.max_size, + max_objects: q.max_objects, + }); + } + + garage.bucket_table.insert(&bucket).await?; + + Ok(UpdateBucketResponse( + bucket_info_results(garage, bucket.id).await?, + )) + } +} + +impl RequestHandler for CleanupIncompleteUploadsRequest { + type Response = CleanupIncompleteUploadsResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let duration = Duration::from_secs(self.older_than_secs); + + let bucket_id = parse_bucket_id(&self.bucket_id)?; + + let count = garage + .bucket_helper() + .cleanup_incomplete_uploads(&bucket_id, duration) + .await?; + + Ok(CleanupIncompleteUploadsResponse { + uploads_deleted: count as u64, + }) + } +} + +impl RequestHandler for InspectObjectRequest { + type Response = InspectObjectResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let bucket_id = parse_bucket_id(&self.bucket_id)?; + + let object = garage + .object_table + .get(&bucket_id, &self.key) + .await? + .ok_or_else(|| Error::NoSuchKey)?; + + let mut versions = vec![]; + for obj_ver in object.versions().iter() { + let ver = garage.version_table.get(&obj_ver.uuid, &EmptyKey).await?; + let blocks = ver + .map(|v| { + v.blocks + .items() + .iter() + .map(|(vk, vb)| InspectObjectBlock { + part_number: vk.part_number, + offset: vk.offset, + hash: hex::encode(vb.hash), + size: vb.size, + }) + .collect::>() + }) + .unwrap_or_default(); + let uuid = hex::encode(obj_ver.uuid); + let timestamp = DateTime::from_timestamp_millis(obj_ver.timestamp as i64) + .expect("invalid timestamp in db"); + match &obj_ver.state { + ObjectVersionState::Uploading { encryption, .. } => { + versions.push(InspectObjectVersion { + uuid, + timestamp, + encrypted: !matches!(encryption, ObjectVersionEncryption::Plaintext { .. }), + uploading: true, + headers: match encryption { + ObjectVersionEncryption::Plaintext { inner } => inner.headers.clone(), + _ => vec![], + }, + blocks, + ..Default::default() + }); + } + ObjectVersionState::Complete(data) => match data { + ObjectVersionData::DeleteMarker => { + versions.push(InspectObjectVersion { + uuid, + timestamp, + delete_marker: true, + ..Default::default() + }); + } + ObjectVersionData::Inline(meta, _) => { + versions.push(InspectObjectVersion { + uuid, + timestamp, + inline: true, + size: Some(meta.size), + etag: Some(meta.etag.clone()), + encrypted: !matches!( + meta.encryption, + ObjectVersionEncryption::Plaintext { .. } + ), + headers: match &meta.encryption { + ObjectVersionEncryption::Plaintext { inner } => { + inner.headers.clone() + } + _ => vec![], + }, + ..Default::default() + }); + } + ObjectVersionData::FirstBlock(meta, _) => { + versions.push(InspectObjectVersion { + uuid, + timestamp, + size: Some(meta.size), + etag: Some(meta.etag.clone()), + encrypted: !matches!( + meta.encryption, + ObjectVersionEncryption::Plaintext { .. } + ), + headers: match &meta.encryption { + ObjectVersionEncryption::Plaintext { inner } => { + inner.headers.clone() + } + _ => vec![], + }, + blocks, + ..Default::default() + }); + } + }, + ObjectVersionState::Aborted => { + versions.push(InspectObjectVersion { + uuid, + timestamp, + aborted: true, + blocks, + ..Default::default() + }); + } + } + } + + Ok(InspectObjectResponse { + bucket_id: hex::encode(object.bucket_id), + key: object.key, + versions, + }) + } +} + +// ---- BUCKET/KEY PERMISSIONS ---- + +impl RequestHandler for AllowBucketKeyRequest { + type Response = AllowBucketKeyResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let res = handle_bucket_change_key_perm(garage, self.0, true).await?; + Ok(AllowBucketKeyResponse(res)) + } +} + +impl RequestHandler for DenyBucketKeyRequest { + type Response = DenyBucketKeyResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let res = handle_bucket_change_key_perm(garage, self.0, false).await?; + Ok(DenyBucketKeyResponse(res)) + } +} + +pub async fn handle_bucket_change_key_perm( + garage: &Arc, + req: BucketKeyPermChangeRequest, + new_perm_flag: bool, +) -> Result { + let helper = garage.locked_helper().await; + + let bucket_id = parse_bucket_id(&req.bucket_id)?; + + let bucket = helper.bucket().get_existing_bucket(bucket_id).await?; + let state = bucket.state.as_option().unwrap(); + + let key = helper.key().get_existing_key(&req.access_key_id).await?; + + let mut perm = state + .authorized_keys + .get(&key.key_id) + .cloned() + .unwrap_or(BucketKeyPerm::NO_PERMISSIONS); + + if req.permissions.read { + perm.allow_read = new_perm_flag; + } + if req.permissions.write { + perm.allow_write = new_perm_flag; + } + if req.permissions.owner { + perm.allow_owner = new_perm_flag; + } + + helper + .set_bucket_key_permissions(bucket.id, &key.key_id, perm) .await?; - let res = buckets - .into_iter() - .map(|b| { - let state = b.state.as_option().unwrap(); - ListBucketResultItem { - id: hex::encode(b.id), - global_aliases: state - .aliases - .items() - .iter() - .filter(|(_, _, a)| *a) - .map(|(n, _, _)| n.to_string()) - .collect::>(), - local_aliases: state - .local_aliases - .items() - .iter() - .filter(|(_, _, a)| *a) - .map(|((k, n), _, _)| BucketLocalAlias { - access_key_id: k.to_string(), - alias: n.to_string(), - }) - .collect::>(), + bucket_info_results(garage, bucket.id).await +} + +// ---- BUCKET ALIASES ---- + +impl RequestHandler for AddBucketAliasRequest { + type Response = AddBucketAliasResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let bucket_id = parse_bucket_id(&self.bucket_id)?; + + let helper = garage.locked_helper().await; + + match self.alias { + BucketAliasEnum::Global { global_alias } => { + helper + .set_global_bucket_alias(bucket_id, &global_alias) + .await? + } + BucketAliasEnum::Local { + local_alias, + access_key_id, + } => { + helper + .set_local_bucket_alias(bucket_id, &access_key_id, &local_alias) + .await? } - }) - .collect::>(); - - Ok(json_ok_response(&res)?) -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct ListBucketResultItem { - id: String, - global_aliases: Vec, - local_aliases: Vec, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct BucketLocalAlias { - access_key_id: String, - alias: String, -} - -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -struct ApiBucketQuotas { - max_size: Option, - max_objects: Option, -} - -pub async fn handle_get_bucket_info( - garage: &Arc, - id: Option, - global_alias: Option, -) -> Result, Error> { - let bucket_id = match (id, global_alias) { - (Some(id), None) => parse_bucket_id(&id)?, - (None, Some(ga)) => garage - .bucket_helper() - .resolve_global_bucket_name(&ga) - .await? - .ok_or_else(|| HelperError::NoSuchBucket(ga.to_string()))?, - _ => { - return Err(Error::bad_request( - "Either id or globalAlias must be provided (but not both)", - )); } - }; - bucket_info_results(garage, bucket_id).await + Ok(AddBucketAliasResponse( + bucket_info_results(garage, bucket_id).await?, + )) + } } +impl RequestHandler for RemoveBucketAliasRequest { + type Response = RemoveBucketAliasResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let bucket_id = parse_bucket_id(&self.bucket_id)?; + + let helper = garage.locked_helper().await; + + match self.alias { + BucketAliasEnum::Global { global_alias } => { + helper + .unset_global_bucket_alias(bucket_id, &global_alias) + .await? + } + BucketAliasEnum::Local { + local_alias, + access_key_id, + } => { + helper + .unset_local_bucket_alias(bucket_id, &access_key_id, &local_alias) + .await? + } + } + + Ok(RemoveBucketAliasResponse( + bucket_info_results(garage, bucket_id).await?, + )) + } +} + +// ---- HELPER ---- + async fn bucket_info_results( garage: &Arc, bucket_id: Uuid, -) -> Result, Error> { +) -> Result { let bucket = garage .bucket_helper() .get_existing_bucket(bucket_id) @@ -122,7 +623,7 @@ async fn bucket_info_results( let counters = garage .object_counter_table .table - .get(&bucket_id, &EmptyKey) + .get(&bucket.id, &EmptyKey) .await? .map(|x| x.filtered_values(&garage.system.cluster_layout())) .unwrap_or_default(); @@ -130,7 +631,7 @@ async fn bucket_info_results( let mpu_counters = garage .mpu_counter_table .table - .get(&bucket_id, &EmptyKey) + .get(&bucket.id, &EmptyKey) .await? .map(|x| x.filtered_values(&garage.system.cluster_layout())) .unwrap_or_default(); @@ -176,407 +677,66 @@ async fn bucket_info_results( let state = bucket.state.as_option().unwrap(); let quotas = state.quotas.get(); - let res = - GetBucketInfoResult { - id: hex::encode(bucket.id), - global_aliases: state - .aliases - .items() - .iter() - .filter(|(_, _, a)| *a) - .map(|(n, _, _)| n.to_string()) - .collect::>(), - website_access: state.website_config.get().is_some(), - website_config: state.website_config.get().clone().map(|wsc| { - GetBucketInfoWebsiteResult { - index_document: wsc.index_document, - error_document: wsc.error_document, - } - }), - keys: relevant_keys - .into_values() - .map(|key| { - let p = key.state.as_option().unwrap(); - GetBucketInfoKey { - access_key_id: key.key_id, - name: p.name.get().to_string(), - permissions: p - .authorized_buckets - .get(&bucket.id) - .map(|p| ApiBucketKeyPerm { - read: p.allow_read, - write: p.allow_write, - owner: p.allow_owner, - }) - .unwrap_or_default(), - bucket_local_aliases: p - .local_aliases - .items() - .iter() - .filter(|(_, _, b)| *b == Some(bucket.id)) - .map(|(n, _, _)| n.to_string()) - .collect::>(), - } + let res = GetBucketInfoResponse { + id: hex::encode(bucket.id), + created: DateTime::from_timestamp_millis(state.creation_date as i64) + .expect("invalid timestamp stored in db"), + global_aliases: state + .aliases + .items() + .iter() + .filter(|(_, _, a)| *a) + .map(|(n, _, _)| n.to_string()) + .collect::>(), + website_access: state.website_config.get().is_some(), + website_config: state.website_config.get().clone().map(|wsc| { + GetBucketInfoWebsiteResponse { + index_document: wsc.index_document, + error_document: wsc.error_document, + } + }), + keys: relevant_keys + .into_values() + .filter_map(|key| { + let p = key.state.as_option().unwrap(); + let permissions = p + .authorized_buckets + .get(&bucket.id) + .filter(|p| p.is_any()) + .map(|p| ApiBucketKeyPerm { + read: p.allow_read, + write: p.allow_write, + owner: p.allow_owner, + })?; + Some(GetBucketInfoKey { + access_key_id: key.key_id, + name: p.name.get().to_string(), + permissions, + bucket_local_aliases: p + .local_aliases + .items() + .iter() + .filter(|(_, _, b)| *b == Some(bucket.id)) + .map(|(n, _, _)| n.to_string()) + .collect::>(), }) - .collect::>(), - objects: *counters.get(OBJECTS).unwrap_or(&0), - bytes: *counters.get(BYTES).unwrap_or(&0), - unfinished_uploads: *counters.get(UNFINISHED_UPLOADS).unwrap_or(&0), - unfinished_multipart_uploads: *mpu_counters.get(mpu_table::UPLOADS).unwrap_or(&0), - unfinished_multipart_upload_parts: *mpu_counters.get(mpu_table::PARTS).unwrap_or(&0), - unfinished_multipart_upload_bytes: *mpu_counters.get(mpu_table::BYTES).unwrap_or(&0), - quotas: ApiBucketQuotas { - max_size: quotas.max_size, - max_objects: quotas.max_objects, - }, - }; + }) + .collect::>(), + objects: *counters.get(OBJECTS).unwrap_or(&0), + bytes: *counters.get(BYTES).unwrap_or(&0), + unfinished_uploads: *counters.get(UNFINISHED_UPLOADS).unwrap_or(&0), + unfinished_multipart_uploads: *mpu_counters.get(mpu_table::UPLOADS).unwrap_or(&0), + unfinished_multipart_upload_parts: *mpu_counters.get(mpu_table::PARTS).unwrap_or(&0), + unfinished_multipart_upload_bytes: *mpu_counters.get(mpu_table::BYTES).unwrap_or(&0), + quotas: ApiBucketQuotas { + max_size: quotas.max_size, + max_objects: quotas.max_objects, + }, + }; - Ok(json_ok_response(&res)?) + Ok(res) } -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetBucketInfoResult { - id: String, - global_aliases: Vec, - website_access: bool, - #[serde(default)] - website_config: Option, - keys: Vec, - objects: i64, - bytes: i64, - unfinished_uploads: i64, - unfinished_multipart_uploads: i64, - unfinished_multipart_upload_parts: i64, - unfinished_multipart_upload_bytes: i64, - quotas: ApiBucketQuotas, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetBucketInfoWebsiteResult { - index_document: String, - error_document: Option, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetBucketInfoKey { - access_key_id: String, - name: String, - permissions: ApiBucketKeyPerm, - bucket_local_aliases: Vec, -} - -pub async fn handle_create_bucket( - garage: &Arc, - req: Request, -) -> Result, Error> { - let req = parse_json_body::(req).await?; - - let helper = garage.locked_helper().await; - - if let Some(ga) = &req.global_alias { - if !is_valid_bucket_name(ga, garage.config.allow_punycode) { - return Err(Error::bad_request(format!( - "{}: {}", - ga, INVALID_BUCKET_NAME_MESSAGE - ))); - } - - if let Some(alias) = garage.bucket_alias_table.get(&EmptyKey, ga).await? { - if alias.state.get().is_some() { - return Err(CommonError::BucketAlreadyExists.into()); - } - } - } - - if let Some(la) = &req.local_alias { - if !is_valid_bucket_name(&la.alias, garage.config.allow_punycode) { - return Err(Error::bad_request(format!( - "{}: {}", - la.alias, INVALID_BUCKET_NAME_MESSAGE - ))); - } - - let key = helper.key().get_existing_key(&la.access_key_id).await?; - let state = key.state.as_option().unwrap(); - if matches!(state.local_aliases.get(&la.alias), Some(_)) { - return Err(Error::bad_request("Local alias already exists")); - } - } - - let bucket = Bucket::new(); - garage.bucket_table.insert(&bucket).await?; - - if let Some(ga) = &req.global_alias { - helper.set_global_bucket_alias(bucket.id, ga).await?; - } - - if let Some(la) = &req.local_alias { - helper - .set_local_bucket_alias(bucket.id, &la.access_key_id, &la.alias) - .await?; - - if la.allow.read || la.allow.write || la.allow.owner { - helper - .set_bucket_key_permissions( - bucket.id, - &la.access_key_id, - BucketKeyPerm { - timestamp: now_msec(), - allow_read: la.allow.read, - allow_write: la.allow.write, - allow_owner: la.allow.owner, - }, - ) - .await?; - } - } - - bucket_info_results(garage, bucket.id).await -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct CreateBucketRequest { - global_alias: Option, - local_alias: Option, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct CreateBucketLocalAlias { - access_key_id: String, - alias: String, - #[serde(default)] - allow: ApiBucketKeyPerm, -} - -pub async fn handle_delete_bucket( - garage: &Arc, - id: String, -) -> Result, Error> { - let helper = garage.locked_helper().await; - - let bucket_id = parse_bucket_id(&id)?; - - let mut bucket = helper.bucket().get_existing_bucket(bucket_id).await?; - let state = bucket.state.as_option().unwrap(); - - // Check bucket is empty - if !helper.bucket().is_bucket_empty(bucket_id).await? { - return Err(CommonError::BucketNotEmpty.into()); - } - - // --- done checking, now commit --- - // 1. delete authorization from keys that had access - for (key_id, perm) in bucket.authorized_keys() { - if perm.is_any() { - helper - .set_bucket_key_permissions(bucket.id, key_id, BucketKeyPerm::NO_PERMISSIONS) - .await?; - } - } - // 2. delete all local aliases - for ((key_id, alias), _, active) in state.local_aliases.items().iter() { - if *active { - helper - .purge_local_bucket_alias(bucket.id, key_id, alias) - .await?; - } - } - // 3. delete all global aliases - for (alias, _, active) in state.aliases.items().iter() { - if *active { - helper.purge_global_bucket_alias(bucket.id, alias).await?; - } - } - - // 4. delete bucket - bucket.state = Deletable::delete(); - garage.bucket_table.insert(&bucket).await?; - - Ok(Response::builder() - .status(StatusCode::NO_CONTENT) - .body(empty_body())?) -} - -pub async fn handle_update_bucket( - garage: &Arc, - id: String, - req: Request, -) -> Result, Error> { - let req = parse_json_body::(req).await?; - let bucket_id = parse_bucket_id(&id)?; - - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let state = bucket.state.as_option_mut().unwrap(); - - if let Some(wa) = req.website_access { - if wa.enabled { - state.website_config.update(Some(WebsiteConfig { - index_document: wa.index_document.ok_or_bad_request( - "Please specify indexDocument when enabling website access.", - )?, - error_document: wa.error_document, - })); - } else { - if wa.index_document.is_some() || wa.error_document.is_some() { - return Err(Error::bad_request( - "Cannot specify indexDocument or errorDocument when disabling website access.", - )); - } - state.website_config.update(None); - } - } - - if let Some(q) = req.quotas { - state.quotas.update(BucketQuotas { - max_size: q.max_size, - max_objects: q.max_objects, - }); - } - - garage.bucket_table.insert(&bucket).await?; - - bucket_info_results(garage, bucket_id).await -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct UpdateBucketRequest { - website_access: Option, - quotas: Option, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct UpdateBucketWebsiteAccess { - enabled: bool, - index_document: Option, - error_document: Option, -} - -// ---- BUCKET/KEY PERMISSIONS ---- - -pub async fn handle_bucket_change_key_perm( - garage: &Arc, - req: Request, - new_perm_flag: bool, -) -> Result, Error> { - let req = parse_json_body::(req).await?; - - let helper = garage.locked_helper().await; - - let bucket_id = parse_bucket_id(&req.bucket_id)?; - - let bucket = helper.bucket().get_existing_bucket(bucket_id).await?; - let state = bucket.state.as_option().unwrap(); - - let key = helper.key().get_existing_key(&req.access_key_id).await?; - - let mut perm = state - .authorized_keys - .get(&key.key_id) - .cloned() - .unwrap_or(BucketKeyPerm::NO_PERMISSIONS); - - if req.permissions.read { - perm.allow_read = new_perm_flag; - } - if req.permissions.write { - perm.allow_write = new_perm_flag; - } - if req.permissions.owner { - perm.allow_owner = new_perm_flag; - } - - helper - .set_bucket_key_permissions(bucket.id, &key.key_id, perm) - .await?; - - bucket_info_results(garage, bucket.id).await -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct BucketKeyPermChangeRequest { - bucket_id: String, - access_key_id: String, - permissions: ApiBucketKeyPerm, -} - -// ---- BUCKET ALIASES ---- - -pub async fn handle_global_alias_bucket( - garage: &Arc, - bucket_id: String, - alias: String, -) -> Result, Error> { - let bucket_id = parse_bucket_id(&bucket_id)?; - - let helper = garage.locked_helper().await; - - helper.set_global_bucket_alias(bucket_id, &alias).await?; - - bucket_info_results(garage, bucket_id).await -} - -pub async fn handle_global_unalias_bucket( - garage: &Arc, - bucket_id: String, - alias: String, -) -> Result, Error> { - let bucket_id = parse_bucket_id(&bucket_id)?; - - let helper = garage.locked_helper().await; - - helper.unset_global_bucket_alias(bucket_id, &alias).await?; - - bucket_info_results(garage, bucket_id).await -} - -pub async fn handle_local_alias_bucket( - garage: &Arc, - bucket_id: String, - access_key_id: String, - alias: String, -) -> Result, Error> { - let bucket_id = parse_bucket_id(&bucket_id)?; - - let helper = garage.locked_helper().await; - - helper - .set_local_bucket_alias(bucket_id, &access_key_id, &alias) - .await?; - - bucket_info_results(garage, bucket_id).await -} - -pub async fn handle_local_unalias_bucket( - garage: &Arc, - bucket_id: String, - access_key_id: String, - alias: String, -) -> Result, Error> { - let bucket_id = parse_bucket_id(&bucket_id)?; - - let helper = garage.locked_helper().await; - - helper - .unset_local_bucket_alias(bucket_id, &access_key_id, &alias) - .await?; - - bucket_info_results(garage, bucket_id).await -} - -// ---- HELPER ---- - fn parse_bucket_id(id: &str) -> Result { let id_hex = hex::decode(id).ok_or_bad_request("Invalid bucket id")?; Ok(Uuid::try_from(&id_hex).ok_or_bad_request("Invalid bucket id")?) diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index ffa0fa71..6a97c471 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -1,411 +1,288 @@ use std::collections::HashMap; -use std::net::SocketAddr; +use std::fmt::Write; use std::sync::Arc; -use hyper::{body::Incoming as IncomingBody, Request, Response}; -use serde::{Deserialize, Serialize}; +use format_table::format_table_to_string; -use garage_util::crdt::*; use garage_util::data::*; use garage_rpc::layout; +use garage_rpc::layout::PARTITION_BITS; use garage_model::garage::Garage; -use garage_api_common::helpers::{json_ok_response, parse_json_body}; - -use crate::api_server::ResBody; +use crate::api::*; use crate::error::*; +use crate::{Admin, RequestHandler}; -pub async fn handle_get_cluster_status(garage: &Arc) -> Result, Error> { - let layout = garage.system.cluster_layout(); - let mut nodes = garage - .system - .get_known_nodes() - .into_iter() - .map(|i| { - ( - i.id, - NodeResp { - id: hex::encode(i.id), - addr: i.addr, - hostname: i.status.hostname, - is_up: i.is_up, - last_seen_secs_ago: i.last_seen_secs_ago, - data_partition: i - .status - .data_disk_avail - .map(|(avail, total)| FreeSpaceResp { - available: avail, - total, +impl RequestHandler for GetClusterStatusRequest { + type Response = GetClusterStatusResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let layout = garage.system.cluster_layout(); + let mut nodes = garage + .system + .get_known_nodes() + .into_iter() + .map(|i| { + ( + i.id, + NodeResp { + id: hex::encode(i.id), + garage_version: i.status.garage_version, + addr: i.addr, + hostname: i.status.hostname, + is_up: i.is_up, + last_seen_secs_ago: i.last_seen_secs_ago, + data_partition: i.status.data_disk_avail.map(|(avail, total)| { + FreeSpaceResp { + available: avail, + total, + } }), - metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| { - FreeSpaceResp { - available: avail, - total, - } - }), - ..Default::default() - }, - ) - }) - .collect::>(); + metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| { + FreeSpaceResp { + available: avail, + total, + } + }), + ..Default::default() + }, + ) + }) + .collect::>(); - for (id, _, role) in layout.current().roles.items().iter() { - if let layout::NodeRoleV(Some(r)) = role { - let role = NodeRoleResp { - id: hex::encode(id), - zone: r.zone.to_string(), - capacity: r.capacity, - tags: r.tags.clone(), - }; - match nodes.get_mut(id) { - None => { - nodes.insert( - *id, - NodeResp { - id: hex::encode(id), - role: Some(role), - ..Default::default() - }, - ); - } - Some(n) => { - n.role = Some(role); - } - } - } - } - - for ver in layout.versions().iter().rev().skip(1) { - for (id, _, role) in ver.roles.items().iter() { - if let layout::NodeRoleV(Some(r)) = role { - if r.capacity.is_some() { - if let Some(n) = nodes.get_mut(id) { - if n.role.is_none() { - n.draining = true; + if let Ok(current_layout) = layout.current() { + for (id, _, role) in current_layout.roles.items().iter() { + if let layout::NodeRoleV(Some(r)) = role { + let role = NodeAssignedRole { + zone: r.zone.to_string(), + capacity: r.capacity, + tags: r.tags.clone(), + }; + match nodes.get_mut(id) { + None => { + nodes.insert( + *id, + NodeResp { + id: hex::encode(id), + role: Some(role), + ..Default::default() + }, + ); + } + Some(n) => { + n.role = Some(role); } - } else { - nodes.insert( - *id, - NodeResp { - id: hex::encode(id), - draining: true, - ..Default::default() - }, - ); } } } } - } - let mut nodes = nodes.into_values().collect::>(); - nodes.sort_by(|x, y| x.id.cmp(&y.id)); + if let Ok(layout_versions) = layout.versions() { + for ver in layout_versions.iter().rev().skip(1) { + for (id, _, role) in ver.roles.items().iter() { + if let layout::NodeRoleV(Some(r)) = role { + if r.capacity.is_some() { + if let Some(n) = nodes.get_mut(id) { + if n.role.is_none() { + n.draining = true; + } + } else { + nodes.insert( + *id, + NodeResp { + id: hex::encode(id), + draining: true, + ..Default::default() + }, + ); + } + } + } + } + } + } - let res = GetClusterStatusResponse { - node: hex::encode(garage.system.id), - garage_version: garage_util::version::garage_version(), - garage_features: garage_util::version::garage_features(), - rust_version: garage_util::version::rust_version(), - db_engine: garage.db.engine(), - layout_version: layout.current().version, - nodes, - }; + let mut nodes = nodes.into_values().collect::>(); + nodes.sort_by(|x, y| x.id.cmp(&y.id)); - Ok(json_ok_response(&res)?) -} - -pub async fn handle_get_cluster_health(garage: &Arc) -> Result, Error> { - use garage_rpc::system::ClusterHealthStatus; - let health = garage.system.health(); - let health = ClusterHealth { - status: match health.status { - ClusterHealthStatus::Healthy => "healthy", - ClusterHealthStatus::Degraded => "degraded", - ClusterHealthStatus::Unavailable => "unavailable", - }, - known_nodes: health.known_nodes, - connected_nodes: health.connected_nodes, - storage_nodes: health.storage_nodes, - storage_nodes_ok: health.storage_nodes_ok, - partitions: health.partitions, - partitions_quorum: health.partitions_quorum, - partitions_all_ok: health.partitions_all_ok, - }; - Ok(json_ok_response(&health)?) -} - -pub async fn handle_connect_cluster_nodes( - garage: &Arc, - req: Request, -) -> Result, Error> { - let req = parse_json_body::, _, Error>(req).await?; - - let res = futures::future::join_all(req.iter().map(|node| garage.system.connect(node))) - .await - .into_iter() - .map(|r| match r { - Ok(()) => ConnectClusterNodesResponse { - success: true, - error: None, - }, - Err(e) => ConnectClusterNodesResponse { - success: false, - error: Some(format!("{}", e)), - }, + Ok(GetClusterStatusResponse { + layout_version: layout.inner().current().version, + nodes, }) - .collect::>(); - - Ok(json_ok_response(&res)?) -} - -pub async fn handle_get_cluster_layout(garage: &Arc) -> Result, Error> { - let res = format_cluster_layout(garage.system.cluster_layout().inner()); - - Ok(json_ok_response(&res)?) -} - -fn format_cluster_layout(layout: &layout::LayoutHistory) -> GetClusterLayoutResponse { - let roles = layout - .current() - .roles - .items() - .iter() - .filter_map(|(k, _, v)| v.0.clone().map(|x| (k, x))) - .map(|(k, v)| NodeRoleResp { - id: hex::encode(k), - zone: v.zone.clone(), - capacity: v.capacity, - tags: v.tags.clone(), - }) - .collect::>(); - - let staged_role_changes = layout - .staging - .get() - .roles - .items() - .iter() - .filter(|(k, _, v)| layout.current().roles.get(k) != Some(v)) - .map(|(k, _, v)| match &v.0 { - None => NodeRoleChange { - id: hex::encode(k), - action: NodeRoleChangeEnum::Remove { remove: true }, - }, - Some(r) => NodeRoleChange { - id: hex::encode(k), - action: NodeRoleChangeEnum::Update { - zone: r.zone.clone(), - capacity: r.capacity, - tags: r.tags.clone(), - }, - }, - }) - .collect::>(); - - GetClusterLayoutResponse { - version: layout.current().version, - roles, - staged_role_changes, } } -// ---- +impl RequestHandler for GetClusterHealthRequest { + type Response = GetClusterHealthResponse; -#[derive(Debug, Clone, Copy, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct ClusterHealth { - status: &'static str, - known_nodes: usize, - connected_nodes: usize, - storage_nodes: usize, - storage_nodes_ok: usize, - partitions: usize, - partitions_quorum: usize, - partitions_all_ok: usize, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetClusterStatusResponse { - node: String, - garage_version: &'static str, - garage_features: Option<&'static [&'static str]>, - rust_version: &'static str, - db_engine: String, - layout_version: u64, - nodes: Vec, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct ApplyClusterLayoutResponse { - message: Vec, - layout: GetClusterLayoutResponse, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct ConnectClusterNodesResponse { - success: bool, - error: Option, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetClusterLayoutResponse { - version: u64, - roles: Vec, - staged_role_changes: Vec, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct NodeRoleResp { - id: String, - zone: String, - capacity: Option, - tags: Vec, -} - -#[derive(Serialize, Default)] -#[serde(rename_all = "camelCase")] -struct FreeSpaceResp { - available: u64, - total: u64, -} - -#[derive(Serialize, Default)] -#[serde(rename_all = "camelCase")] -struct NodeResp { - id: String, - role: Option, - addr: Option, - hostname: Option, - is_up: bool, - last_seen_secs_ago: Option, - draining: bool, - #[serde(skip_serializing_if = "Option::is_none")] - data_partition: Option, - #[serde(skip_serializing_if = "Option::is_none")] - metadata_partition: Option, -} - -// ---- update functions ---- - -pub async fn handle_update_cluster_layout( - garage: &Arc, - req: Request, -) -> Result, Error> { - let updates = parse_json_body::(req).await?; - - let mut layout = garage.system.cluster_layout().inner().clone(); - - let mut roles = layout.current().roles.clone(); - roles.merge(&layout.staging.get().roles); - - for change in updates { - let node = hex::decode(&change.id).ok_or_bad_request("Invalid node identifier")?; - let node = Uuid::try_from(&node).ok_or_bad_request("Invalid node identifier")?; - - let new_role = match change.action { - NodeRoleChangeEnum::Remove { remove: true } => None, - NodeRoleChangeEnum::Update { - zone, - capacity, - tags, - } => Some(layout::NodeRole { - zone, - capacity, - tags, - }), - _ => return Err(Error::bad_request("Invalid layout change")), + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + use garage_rpc::system::ClusterHealthStatus; + let health = garage.system.health(); + let health = GetClusterHealthResponse { + status: match health.status { + ClusterHealthStatus::Healthy => "healthy", + ClusterHealthStatus::Degraded => "degraded", + ClusterHealthStatus::Unavailable => "unavailable", + } + .to_string(), + known_nodes: health.known_nodes, + connected_nodes: health.connected_nodes, + storage_nodes: health.storage_nodes, + // Translating storage_nodes_up (admin API context) to storage_nodes_ok (metrics context) + // TODO: when releasing major release, consider renaming all the fields in the metrics to storage_nodes_up + storage_nodes_up: health.storage_nodes_ok, + partitions: health.partitions, + partitions_quorum: health.partitions_quorum, + partitions_all_ok: health.partitions_all_ok, }; - - layout - .staging - .get_mut() - .roles - .merge(&roles.update_mutator(node, layout::NodeRoleV(new_role))); + Ok(health) } - - garage - .system - .layout_manager - .update_cluster_layout(&layout) - .await?; - - let res = format_cluster_layout(&layout); - Ok(json_ok_response(&res)?) } -pub async fn handle_apply_cluster_layout( - garage: &Arc, - req: Request, -) -> Result, Error> { - let param = parse_json_body::(req).await?; +impl RequestHandler for GetClusterStatisticsRequest { + type Response = GetClusterStatisticsResponse; - let layout = garage.system.cluster_layout().inner().clone(); - let (layout, msg) = layout.apply_staged_changes(Some(param.version))?; + // FIXME: return this as a JSON struct instead of text + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut ret = String::new(); - garage - .system - .layout_manager - .update_cluster_layout(&layout) - .await?; + // Gather storage node and free space statistics for current nodes + let layout = &garage.system.cluster_layout(); + let mut node_partition_count = HashMap::::new(); + if let Ok(current_layout) = layout.current() { + for short_id in current_layout.ring_assignment_data.iter() { + let id = current_layout.node_id_vec[*short_id as usize]; + *node_partition_count.entry(id).or_default() += 1; + } + } + let node_info = garage + .system + .get_known_nodes() + .into_iter() + .map(|n| (n.id, n)) + .collect::>(); - let res = ApplyClusterLayoutResponse { - message: msg, - layout: format_cluster_layout(&layout), - }; - Ok(json_ok_response(&res)?) + let mut table = vec![" ID\tHostname\tZone\tCapacity\tPart.\tDataAvail\tMetaAvail".into()]; + for (id, parts) in node_partition_count.iter() { + let info = node_info.get(id); + let status = info.map(|x| &x.status); + let role = layout + .current() + .ok() + .and_then(|l| l.roles.get(id)) + .and_then(|x| x.0.as_ref()); + let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?"); + let zone = role.map(|x| x.zone.as_str()).unwrap_or("?"); + let capacity = role + .map(|x| x.capacity_string()) + .unwrap_or_else(|| "?".into()); + let avail_str = |x| match x { + Some((avail, total)) => { + let pct = (avail as f64) / (total as f64) * 100.; + let avail = bytesize::ByteSize::b(avail); + let total = bytesize::ByteSize::b(total); + format!("{}/{} ({:.1}%)", avail, total, pct) + } + None => "?".into(), + }; + let data_avail = avail_str(status.and_then(|x| x.data_disk_avail)); + let meta_avail = avail_str(status.and_then(|x| x.meta_disk_avail)); + table.push(format!( + " {:?}\t{}\t{}\t{}\t{}\t{}\t{}", + id, hostname, zone, capacity, parts, data_avail, meta_avail + )); + } + write!( + &mut ret, + "Storage nodes:\n{}", + format_table_to_string(table) + ) + .unwrap(); + + let meta_part_avail = node_partition_count + .iter() + .filter_map(|(id, parts)| { + node_info + .get(id) + .and_then(|x| x.status.meta_disk_avail) + .map(|c| c.0 / *parts) + }) + .collect::>(); + let data_part_avail = node_partition_count + .iter() + .filter_map(|(id, parts)| { + node_info + .get(id) + .and_then(|x| x.status.data_disk_avail) + .map(|c| c.0 / *parts) + }) + .collect::>(); + if !meta_part_avail.is_empty() && !data_part_avail.is_empty() { + let meta_avail = + bytesize::ByteSize(meta_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS)); + let data_avail = + bytesize::ByteSize(data_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS)); + writeln!( + &mut ret, + "\nEstimated available storage space cluster-wide (might be lower in practice):" + ) + .unwrap(); + if meta_part_avail.len() < node_partition_count.len() + || data_part_avail.len() < node_partition_count.len() + { + ret += &format_table_to_string(vec![ + format!(" data: < {}", data_avail), + format!(" metadata: < {}", meta_avail), + ]); + writeln!(&mut ret, "A precise estimate could not be given as information is missing for some storage nodes.").unwrap(); + } else { + ret += &format_table_to_string(vec![ + format!(" data: {}", data_avail), + format!(" metadata: {}", meta_avail), + ]); + } + } + + Ok(GetClusterStatisticsResponse { freeform: ret }) + } } -pub async fn handle_revert_cluster_layout( - garage: &Arc, -) -> Result, Error> { - let layout = garage.system.cluster_layout().inner().clone(); - let layout = layout.revert_staged_changes()?; - garage - .system - .layout_manager - .update_cluster_layout(&layout) - .await?; +impl RequestHandler for ConnectClusterNodesRequest { + type Response = ConnectClusterNodesResponse; - let res = format_cluster_layout(&layout); - Ok(json_ok_response(&res)?) -} - -// ---- - -type UpdateClusterLayoutRequest = Vec; - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct ApplyLayoutRequest { - version: u64, -} - -// ---- - -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -struct NodeRoleChange { - id: String, - #[serde(flatten)] - action: NodeRoleChangeEnum, -} - -#[derive(Serialize, Deserialize)] -#[serde(untagged)] -enum NodeRoleChangeEnum { - #[serde(rename_all = "camelCase")] - Remove { remove: bool }, - #[serde(rename_all = "camelCase")] - Update { - zone: String, - capacity: Option, - tags: Vec, - }, + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let res = futures::future::join_all(self.0.iter().map(|node| garage.system.connect(node))) + .await + .into_iter() + .map(|r| match r { + Ok(()) => ConnectNodeResponse { + success: true, + error: None, + }, + Err(e) => ConnectNodeResponse { + success: false, + error: Some(format!("{}", e)), + }, + }) + .collect::>(); + Ok(ConnectClusterNodesResponse(res)) + } } diff --git a/src/api/admin/error.rs b/src/api/admin/error.rs index 17d4c200..b8be278e 100644 --- a/src/api/admin/error.rs +++ b/src/api/admin/error.rs @@ -21,10 +21,26 @@ pub enum Error { Common(#[from] CommonError), // Category: cannot process + /// The admin API token does not exist + #[error("Admin token not found: {0}")] + NoSuchAdminToken(String), + /// The API access key does not exist #[error("Access key not found: {0}")] NoSuchAccessKey(String), + /// The requested block does not exist + #[error("Block not found: {0}")] + NoSuchBlock(String), + + /// The requested worker does not exist + #[error("Worker not found: {0}")] + NoSuchWorker(u64), + + /// The object requested don't exists + #[error("Key not found")] + NoSuchKey, + /// In Import key, the key already exists #[error("Key {0} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.")] KeyAlreadyExists(String), @@ -46,11 +62,15 @@ impl From for Error { } impl Error { - fn code(&self) -> &'static str { + pub fn code(&self) -> &'static str { match self { Error::Common(c) => c.aws_code(), + Error::NoSuchAdminToken(_) => "NoSuchAdminToken", Error::NoSuchAccessKey(_) => "NoSuchAccessKey", + Error::NoSuchWorker(_) => "NoSuchWorker", + Error::NoSuchBlock(_) => "NoSuchBlock", Error::KeyAlreadyExists(_) => "KeyAlreadyExists", + Error::NoSuchKey => "NoSuchKey", } } } @@ -60,7 +80,11 @@ impl ApiError for Error { fn http_status_code(&self) -> StatusCode { match self { Error::Common(c) => c.http_status_code(), - Error::NoSuchAccessKey(_) => StatusCode::NOT_FOUND, + Error::NoSuchAdminToken(_) + | Error::NoSuchAccessKey(_) + | Error::NoSuchWorker(_) + | Error::NoSuchBlock(_) + | Error::NoSuchKey => StatusCode::NOT_FOUND, Error::KeyAlreadyExists(_) => StatusCode::CONFLICT, } } @@ -68,6 +92,7 @@ impl ApiError for Error { fn add_http_headers(&self, header_map: &mut HeaderMap) { use hyper::header; header_map.append(header::CONTENT_TYPE, "application/json".parse().unwrap()); + header_map.append(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*".parse().unwrap()); } fn http_body(&self, garage_region: &str, path: &str) -> ErrorBody { diff --git a/src/api/admin/key.rs b/src/api/admin/key.rs index bebf3063..857adca1 100644 --- a/src/api/admin/key.rs +++ b/src/api/admin/key.rs @@ -1,173 +1,190 @@ use std::collections::HashMap; use std::sync::Arc; -use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode}; -use serde::{Deserialize, Serialize}; +use chrono::DateTime; use garage_table::*; +use garage_util::time::now_msec; use garage_model::garage::Garage; use garage_model::key_table::*; -use garage_api_common::helpers::*; - -use crate::api_server::ResBody; +use crate::api::*; use crate::error::*; +use crate::{Admin, RequestHandler}; -pub async fn handle_list_keys(garage: &Arc) -> Result, Error> { - let res = garage - .key_table - .get_range( - &EmptyKey, - None, - Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), - 10000, - EnumerationOrder::Forward, - ) - .await? - .iter() - .map(|k| ListKeyResultItem { - id: k.key_id.to_string(), - name: k.params().unwrap().name.get().clone(), - }) - .collect::>(); +impl RequestHandler for ListKeysRequest { + type Response = ListKeysResponse; - Ok(json_ok_response(&res)?) -} + async fn handle(self, garage: &Arc, _admin: &Admin) -> Result { + let now = now_msec(); -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct ListKeyResultItem { - id: String, - name: String, -} - -pub async fn handle_get_key_info( - garage: &Arc, - id: Option, - search: Option, - show_secret_key: bool, -) -> Result, Error> { - let key = if let Some(id) = id { - garage.key_helper().get_existing_key(&id).await? - } else if let Some(search) = search { - garage - .key_helper() - .get_existing_matching_key(&search) + let res = garage + .key_table + .get_range( + &EmptyKey, + None, + Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), + 10000, + EnumerationOrder::Forward, + ) .await? - } else { - unreachable!(); - }; + .iter() + .map(|k| { + let p = k.params().unwrap(); - key_info_results(garage, key, show_secret_key).await -} + ListKeysResponseItem { + id: k.key_id.to_string(), + name: p.name.get().clone(), + created: p.created.map(|x| { + DateTime::from_timestamp_millis(x as i64) + .expect("invalid timestamp stored in db") + }), + expiration: p.expiration.get().map(|x| { + DateTime::from_timestamp_millis(x as i64) + .expect("invalid timestamp stored in db") + }), + expired: p.is_expired(now), + } + }) + .collect::>(); -pub async fn handle_create_key( - garage: &Arc, - req: Request, -) -> Result, Error> { - let req = parse_json_body::(req).await?; - - let key = Key::new(req.name.as_deref().unwrap_or("Unnamed key")); - garage.key_table.insert(&key).await?; - - key_info_results(garage, key, true).await -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct CreateKeyRequest { - name: Option, -} - -pub async fn handle_import_key( - garage: &Arc, - req: Request, -) -> Result, Error> { - let req = parse_json_body::(req).await?; - - let prev_key = garage.key_table.get(&EmptyKey, &req.access_key_id).await?; - if prev_key.is_some() { - return Err(Error::KeyAlreadyExists(req.access_key_id.to_string())); + Ok(ListKeysResponse(res)) } - - let imported_key = Key::import( - &req.access_key_id, - &req.secret_access_key, - req.name.as_deref().unwrap_or("Imported key"), - ) - .ok_or_bad_request("Invalid key format")?; - garage.key_table.insert(&imported_key).await?; - - key_info_results(garage, imported_key, false).await } -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct ImportKeyRequest { - access_key_id: String, - secret_access_key: String, - name: Option, -} +impl RequestHandler for GetKeyInfoRequest { + type Response = GetKeyInfoResponse; -pub async fn handle_update_key( - garage: &Arc, - id: String, - req: Request, -) -> Result, Error> { - let req = parse_json_body::(req).await?; + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let key = match (self.id, self.search) { + (Some(id), None) => garage.key_helper().get_existing_key(&id).await?, + (None, Some(search)) => { + let candidates = garage + .key_table + .get_range( + &EmptyKey, + None, + Some(KeyFilter::MatchesAndNotDeleted(search.to_string())), + 10, + EnumerationOrder::Forward, + ) + .await? + .into_iter() + .collect::>(); + if candidates.len() != 1 { + return Err(Error::bad_request(format!( + "{} matching keys", + candidates.len() + ))); + } + candidates.into_iter().next().unwrap() + } + _ => { + return Err(Error::bad_request( + "Either id or search must be provided (but not both)", + )); + } + }; - let mut key = garage.key_helper().get_existing_key(&id).await?; - - let key_state = key.state.as_option_mut().unwrap(); - - if let Some(new_name) = req.name { - key_state.name.update(new_name); + key_info_results(garage, key, self.show_secret_key).await } - if let Some(allow) = req.allow { - if allow.create_bucket { - key_state.allow_create_bucket.update(true); +} + +impl RequestHandler for CreateKeyRequest { + type Response = CreateKeyResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut key = Key::new("Unnamed key"); + + apply_key_updates(&mut key, self.0)?; + + garage.key_table.insert(&key).await?; + + Ok(CreateKeyResponse( + key_info_results(garage, key, true).await?, + )) + } +} + +impl RequestHandler for ImportKeyRequest { + type Response = ImportKeyResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let prev_key = garage.key_table.get(&EmptyKey, &self.access_key_id).await?; + if prev_key.is_some() { + return Err(Error::KeyAlreadyExists(self.access_key_id.to_string())); } - } - if let Some(deny) = req.deny { - if deny.create_bucket { - key_state.allow_create_bucket.update(false); - } - } - garage.key_table.insert(&key).await?; + let imported_key = Key::import( + &self.access_key_id, + &self.secret_access_key, + self.name.as_deref().unwrap_or("Imported key"), + ) + .ok_or_bad_request("Invalid key format")?; + garage.key_table.insert(&imported_key).await?; - key_info_results(garage, key, false).await + Ok(ImportKeyResponse( + key_info_results(garage, imported_key, false).await?, + )) + } } -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct UpdateKeyRequest { - name: Option, - allow: Option, - deny: Option, +impl RequestHandler for UpdateKeyRequest { + type Response = UpdateKeyResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut key = garage.key_helper().get_existing_key(&self.id).await?; + + apply_key_updates(&mut key, self.body)?; + + garage.key_table.insert(&key).await?; + + Ok(UpdateKeyResponse( + key_info_results(garage, key, false).await?, + )) + } } -pub async fn handle_delete_key( - garage: &Arc, - id: String, -) -> Result, Error> { - let helper = garage.locked_helper().await; +impl RequestHandler for DeleteKeyRequest { + type Response = DeleteKeyResponse; - let mut key = helper.key().get_existing_key(&id).await?; + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let helper = garage.locked_helper().await; - helper.delete_key(&mut key).await?; + let mut key = helper.key().get_existing_key(&self.id).await?; - Ok(Response::builder() - .status(StatusCode::NO_CONTENT) - .body(empty_body())?) + helper.delete_key(&mut key).await?; + + Ok(DeleteKeyResponse) + } } async fn key_info_results( garage: &Arc, key: Key, show_secret: bool, -) -> Result, Error> { +) -> Result { let mut relevant_buckets = HashMap::new(); let key_state = key.state.as_option().unwrap(); @@ -193,8 +210,15 @@ async fn key_info_results( } } - let res = GetKeyInfoResult { + let res = GetKeyInfoResponse { name: key_state.name.get().clone(), + created: key_state.created.map(|x| { + DateTime::from_timestamp_millis(x as i64).expect("invalid timestamp stored in db") + }), + expiration: key_state.expiration.get().map(|x| { + DateTime::from_timestamp_millis(x as i64).expect("invalid timestamp stored in db") + }), + expired: key_state.is_expired(now_msec()), access_key_id: key.key_id.clone(), secret_access_key: if show_secret { Some(key_state.secret_key.clone()) @@ -206,9 +230,18 @@ async fn key_info_results( }, buckets: relevant_buckets .into_values() - .map(|bucket| { + .filter_map(|bucket| { let state = bucket.state.as_option().unwrap(); - KeyInfoBucketResult { + let permissions = key_state + .authorized_buckets + .get(&bucket.id) + .filter(|p| p.is_any()) + .map(|p| ApiBucketKeyPerm { + read: p.allow_read, + write: p.allow_write, + owner: p.allow_owner, + })?; + Some(KeyInfoBucketResponse { id: hex::encode(bucket.id), global_aliases: state .aliases @@ -224,57 +257,45 @@ async fn key_info_results( .filter(|((k, _), _, a)| *a && *k == key.key_id) .map(|((_, n), _, _)| n.to_string()) .collect::>(), - permissions: key_state - .authorized_buckets - .get(&bucket.id) - .map(|p| ApiBucketKeyPerm { - read: p.allow_read, - write: p.allow_write, - owner: p.allow_owner, - }) - .unwrap_or_default(), - } + permissions, + }) }) .collect::>(), }; - Ok(json_ok_response(&res)?) + Ok(res) } -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct GetKeyInfoResult { - name: String, - access_key_id: String, - #[serde(skip_serializing_if = "is_default")] - secret_access_key: Option, - permissions: KeyPerm, - buckets: Vec, -} +fn apply_key_updates(key: &mut Key, updates: UpdateKeyRequestBody) -> Result<(), Error> { + if updates.never_expires && updates.expiration.is_some() { + return Err(Error::bad_request( + "cannot specify `expiration` and `never_expires`", + )); + } -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -struct KeyPerm { - #[serde(default)] - create_bucket: bool, -} + let key_state = key.state.as_option_mut().unwrap(); -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct KeyInfoBucketResult { - id: String, - global_aliases: Vec, - local_aliases: Vec, - permissions: ApiBucketKeyPerm, -} + if let Some(new_name) = updates.name { + key_state.name.update(new_name); + } + if let Some(expiration) = updates.expiration { + key_state + .expiration + .update(Some(expiration.timestamp_millis() as u64)); + } + if updates.never_expires { + key_state.expiration.update(None); + } + if let Some(allow) = updates.allow { + if allow.create_bucket { + key_state.allow_create_bucket.update(true); + } + } + if let Some(deny) = updates.deny { + if deny.create_bucket { + key_state.allow_create_bucket.update(false); + } + } -#[derive(Serialize, Deserialize, Default)] -#[serde(rename_all = "camelCase")] -pub(crate) struct ApiBucketKeyPerm { - #[serde(default)] - pub(crate) read: bool, - #[serde(default)] - pub(crate) write: bool, - #[serde(default)] - pub(crate) owner: bool, + Ok(()) } diff --git a/src/api/admin/layout.rs b/src/api/admin/layout.rs new file mode 100644 index 00000000..1979c11a --- /dev/null +++ b/src/api/admin/layout.rs @@ -0,0 +1,408 @@ +use std::sync::Arc; + +use garage_util::crdt::*; +use garage_util::data::*; +use garage_util::error::Error as GarageError; + +use garage_rpc::layout; + +use garage_model::garage::Garage; + +use crate::api::*; +use crate::error::*; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for GetClusterLayoutRequest { + type Response = GetClusterLayoutResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + Ok(format_cluster_layout( + garage.system.cluster_layout().inner(), + )) + } +} + +fn format_cluster_layout(layout: &layout::LayoutHistory) -> GetClusterLayoutResponse { + let current = layout.current(); + + let roles = current + .roles + .items() + .iter() + .filter_map(|(k, _, v)| v.0.clone().map(|x| (k, x))) + .map(|(k, v)| { + let stored_partitions = current.get_node_usage(k).ok().map(|x| x as u64); + LayoutNodeRole { + id: hex::encode(k), + zone: v.zone.clone(), + capacity: v.capacity, + stored_partitions, + usable_capacity: stored_partitions.map(|x| x * current.partition_size), + tags: v.tags.clone(), + } + }) + .collect::>(); + + let staged_role_changes = layout + .staging + .get() + .roles + .items() + .iter() + .filter(|(k, _, v)| current.roles.get(k) != Some(v)) + .map(|(k, _, v)| match &v.0 { + None => NodeRoleChange { + id: hex::encode(k), + action: NodeRoleChangeEnum::Remove { remove: true }, + }, + Some(r) => NodeRoleChange { + id: hex::encode(k), + action: NodeRoleChangeEnum::Update(NodeAssignedRole { + zone: r.zone.clone(), + capacity: r.capacity, + tags: r.tags.clone(), + }), + }, + }) + .collect::>(); + + let staged_parameters = if *layout.staging.get().parameters.get() != current.parameters { + Some((*layout.staging.get().parameters.get()).into()) + } else { + None + }; + + GetClusterLayoutResponse { + version: current.version, + roles, + partition_size: current.partition_size, + parameters: current.parameters.into(), + staged_role_changes, + staged_parameters, + } +} + +impl RequestHandler for GetClusterLayoutHistoryRequest { + type Response = GetClusterLayoutHistoryResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let layout_helper = garage.system.cluster_layout(); + let layout = layout_helper.inner(); + let min_stored = layout.min_stored(); + + let versions = layout + .versions + .iter() + .rev() + .chain(layout.old_versions.iter().rev()) + .map(|ver| { + let status = if ver.version == layout.current().version { + ClusterLayoutVersionStatus::Current + } else if ver.version >= min_stored { + ClusterLayoutVersionStatus::Draining + } else { + ClusterLayoutVersionStatus::Historical + }; + ClusterLayoutVersion { + version: ver.version, + status, + storage_nodes: ver + .roles + .items() + .iter() + .filter( + |(_, _, x)| matches!(x, layout::NodeRoleV(Some(c)) if c.capacity.is_some()), + ) + .count() as u64, + gateway_nodes: ver + .roles + .items() + .iter() + .filter( + |(_, _, x)| matches!(x, layout::NodeRoleV(Some(c)) if c.capacity.is_none()), + ) + .count() as u64, + } + }) + .collect::>(); + + let all_nodes = layout.get_all_nodes(); + let min_ack = layout_helper.ack_map_min(); + + let update_trackers = if layout.versions.len() > 1 { + Some( + all_nodes + .iter() + .map(|node| { + ( + hex::encode(node), + NodeUpdateTrackers { + ack: layout.update_trackers.ack_map.get(node, min_stored), + sync: layout.update_trackers.sync_map.get(node, min_stored), + sync_ack: layout.update_trackers.sync_ack_map.get(node, min_stored), + }, + ) + }) + .collect(), + ) + } else { + None + }; + + Ok(GetClusterLayoutHistoryResponse { + current_version: layout.current().version, + min_ack, + versions, + update_trackers, + }) + } +} + +// ---- + +// ---- update functions ---- + +impl RequestHandler for UpdateClusterLayoutRequest { + type Response = UpdateClusterLayoutResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut layout = garage.system.cluster_layout().inner().clone(); + + let mut roles = layout.current().roles.clone(); + roles.merge(&layout.staging.get().roles); + + for change in self.roles { + let node = hex::decode(&change.id).ok_or_bad_request("Invalid node identifier")?; + let node = Uuid::try_from(&node).ok_or_bad_request("Invalid node identifier")?; + + let new_role = match change.action { + NodeRoleChangeEnum::Remove { remove: true } => None, + NodeRoleChangeEnum::Update(NodeAssignedRole { + zone, + capacity, + tags, + }) => { + if matches!(capacity, Some(cap) if cap < 1024) { + return Err(Error::bad_request("Capacity should be at least 1K (1024)")); + } + Some(layout::NodeRole { + zone, + capacity, + tags, + }) + } + _ => return Err(Error::bad_request("Invalid layout change")), + }; + + layout + .staging + .get_mut() + .roles + .merge(&roles.update_mutator(node, layout::NodeRoleV(new_role))); + } + + if let Some(param) = self.parameters { + if let ZoneRedundancy::AtLeast(r_int) = param.zone_redundancy { + if r_int > layout.current().replication_factor { + return Err(Error::bad_request(format!( + "The zone redundancy must be smaller or equal to the replication factor ({}).", + layout.current().replication_factor + ))); + } else if r_int < 1 { + return Err(Error::bad_request( + "The zone redundancy must be at least 1.", + )); + } + } + layout.staging.get_mut().parameters.update(param.into()); + } + + garage + .system + .layout_manager + .update_cluster_layout(&layout) + .await?; + + let res = format_cluster_layout(&layout); + Ok(UpdateClusterLayoutResponse(res)) + } +} + +impl RequestHandler for PreviewClusterLayoutChangesRequest { + type Response = PreviewClusterLayoutChangesResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let layout = garage.system.cluster_layout().inner().clone(); + let new_ver = layout.current().version + 1; + match layout.apply_staged_changes(new_ver) { + Err(GarageError::Message(error)) => { + Ok(PreviewClusterLayoutChangesResponse::Error { error }) + } + Err(e) => Err(e.into()), + Ok((new_layout, msg)) => Ok(PreviewClusterLayoutChangesResponse::Success { + message: msg, + new_layout: format_cluster_layout(&new_layout), + }), + } + } +} + +impl RequestHandler for ApplyClusterLayoutRequest { + type Response = ApplyClusterLayoutResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let layout = garage.system.cluster_layout().inner().clone(); + let (layout, msg) = layout.apply_staged_changes(self.version)?; + + garage + .system + .layout_manager + .update_cluster_layout(&layout) + .await?; + + Ok(ApplyClusterLayoutResponse { + message: msg, + layout: format_cluster_layout(&layout), + }) + } +} + +impl RequestHandler for RevertClusterLayoutRequest { + type Response = RevertClusterLayoutResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let layout = garage.system.cluster_layout().inner().clone(); + let layout = layout.revert_staged_changes()?; + garage + .system + .layout_manager + .update_cluster_layout(&layout) + .await?; + + let res = format_cluster_layout(&layout); + Ok(RevertClusterLayoutResponse(res)) + } +} + +impl RequestHandler for ClusterLayoutSkipDeadNodesRequest { + type Response = ClusterLayoutSkipDeadNodesResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let status = garage.system.get_known_nodes(); + + let mut layout = garage.system.cluster_layout().inner().clone(); + let mut ack_updated = vec![]; + let mut sync_updated = vec![]; + + if layout.versions.len() == 1 { + return Err(Error::bad_request( + "This command cannot be called when there is only one live cluster layout version", + )); + } + + let min_v = layout.min_stored(); + if self.version <= min_v || self.version > layout.current().version { + return Err(Error::bad_request(format!( + "Invalid version, you may use the following version numbers: {}", + (min_v + 1..=layout.current().version) + .map(|x| x.to_string()) + .collect::>() + .join(" ") + ))); + } + + let all_nodes = layout.get_all_nodes(); + for node in all_nodes.iter() { + // Update ACK tracker for dead nodes or for all nodes if --allow-missing-data + if self.allow_missing_data || !status.iter().any(|x| x.id == *node && x.is_up) { + let ack_changed = layout.update_trackers.ack_map.set_max(*node, self.version); + if ack_changed { + ack_updated.push(hex::encode(node)); + } + } + + // If --allow-missing-data, update SYNC tracker for all nodes. + if self.allow_missing_data { + let sync_changed = layout.update_trackers.sync_map.set_max(*node, self.version); + if sync_changed { + sync_updated.push(hex::encode(node)); + } + } + } + + garage + .system + .layout_manager + .update_cluster_layout(&layout) + .await?; + + Ok(ClusterLayoutSkipDeadNodesResponse { + ack_updated, + sync_updated, + }) + } +} + +// ---- + +impl From for ZoneRedundancy { + fn from(x: layout::ZoneRedundancy) -> Self { + match x { + layout::ZoneRedundancy::Maximum => ZoneRedundancy::Maximum, + layout::ZoneRedundancy::AtLeast(x) => ZoneRedundancy::AtLeast(x), + } + } +} + +impl From for layout::ZoneRedundancy { + fn from(val: ZoneRedundancy) -> Self { + match val { + ZoneRedundancy::Maximum => layout::ZoneRedundancy::Maximum, + ZoneRedundancy::AtLeast(x) => layout::ZoneRedundancy::AtLeast(x), + } + } +} + +impl From for LayoutParameters { + fn from(x: layout::LayoutParameters) -> Self { + LayoutParameters { + zone_redundancy: x.zone_redundancy.into(), + } + } +} + +impl From for layout::LayoutParameters { + fn from(val: LayoutParameters) -> Self { + layout::LayoutParameters { + zone_redundancy: val.zone_redundancy.into(), + } + } +} diff --git a/src/api/admin/lib.rs b/src/api/admin/lib.rs index 599e9b44..dd164497 100644 --- a/src/api/admin/lib.rs +++ b/src/api/admin/lib.rs @@ -3,9 +3,44 @@ extern crate tracing; pub mod api_server; mod error; +mod macros; + +pub mod api; +pub mod openapi; mod router_v0; mod router_v1; +mod router_v2; +mod admin_token; mod bucket; mod cluster; mod key; +mod layout; +mod special; + +mod block; +mod node; +mod repair; +mod worker; + +use std::sync::Arc; + +use garage_model::garage::Garage; + +pub use api_server::AdminApiServer as Admin; + +pub enum Authorization { + None, + MetricsToken, + AdminToken, +} + +pub trait RequestHandler { + type Response; + + fn handle( + self, + garage: &Arc, + admin: &Admin, + ) -> impl std::future::Future> + Send; +} diff --git a/src/api/admin/macros.rs b/src/api/admin/macros.rs new file mode 100644 index 00000000..f11a2a25 --- /dev/null +++ b/src/api/admin/macros.rs @@ -0,0 +1,208 @@ +macro_rules! admin_endpoints { + [ + $(@special $special_endpoint:ident,)* + $($endpoint:ident,)* + ] => { + paste! { + #[derive(Debug, Clone, Serialize, Deserialize)] + pub enum AdminApiRequest { + $( + $special_endpoint( [<$special_endpoint Request>] ), + )* + $( + $endpoint( [<$endpoint Request>] ), + )* + } + + #[derive(Debug, Clone, Serialize)] + #[serde(untagged)] + pub enum AdminApiResponse { + $( + $endpoint( [<$endpoint Response>] ), + )* + } + + #[derive(Debug, Clone, Serialize, Deserialize)] + pub enum TaggedAdminApiResponse { + $( + $endpoint( [<$endpoint Response>] ), + )* + } + + impl AdminApiRequest { + pub fn name(&self) -> &'static str { + match self { + $( + Self::$special_endpoint(_) => stringify!($special_endpoint), + )* + $( + Self::$endpoint(_) => stringify!($endpoint), + )* + } + } + } + + impl AdminApiResponse { + pub fn tagged(self) -> TaggedAdminApiResponse { + match self { + $( + Self::$endpoint(res) => TaggedAdminApiResponse::$endpoint(res), + )* + } + } + } + + $( + impl From< [< $endpoint Request >] > for AdminApiRequest { + fn from(req: [< $endpoint Request >]) -> AdminApiRequest { + AdminApiRequest::$endpoint(req) + } + } + + impl TryFrom for [< $endpoint Response >] { + type Error = TaggedAdminApiResponse; + fn try_from(resp: TaggedAdminApiResponse) -> Result< [< $endpoint Response >], TaggedAdminApiResponse> { + match resp { + TaggedAdminApiResponse::$endpoint(v) => Ok(v), + x => Err(x), + } + } + } + )* + + impl RequestHandler for AdminApiRequest { + type Response = AdminApiResponse; + + async fn handle(self, garage: &Arc, admin: &Admin) -> Result { + match self { + $( + AdminApiRequest::$special_endpoint(_) => Err( + Error::Common(CommonError::BadRequest( + concat!(stringify!($special_endpoint), " cannot be used outside of the HTTP Admin API").into() + )) + ), + )* + $( + AdminApiRequest::$endpoint(req) => Ok(AdminApiResponse::$endpoint(req.handle(garage, admin).await?)), + )* + } + } + } + } + }; +} + +macro_rules! local_admin_endpoints { + [ + $($endpoint:ident,)* + ] => { + paste! { + #[derive(Debug, Clone, Serialize, Deserialize)] + pub enum LocalAdminApiRequest { + $( + $endpoint( [] ), + )* + } + + #[derive(Debug, Clone, Serialize, Deserialize)] + pub enum LocalAdminApiResponse { + $( + $endpoint( [] ), + )* + } + + $( + pub type [< $endpoint Request >] = MultiRequest< [< Local $endpoint Request >] >; + + pub type [< $endpoint RequestBody >] = [< Local $endpoint Request >]; + + pub type [< $endpoint Response >] = MultiResponse< [< Local $endpoint Response >] >; + + impl From< [< Local $endpoint Request >] > for LocalAdminApiRequest { + fn from(req: [< Local $endpoint Request >]) -> LocalAdminApiRequest { + LocalAdminApiRequest::$endpoint(req) + } + } + + impl TryFrom for [< Local $endpoint Response >] { + type Error = LocalAdminApiResponse; + fn try_from(resp: LocalAdminApiResponse) -> Result< [< Local $endpoint Response >], LocalAdminApiResponse> { + match resp { + LocalAdminApiResponse::$endpoint(v) => Ok(v), + x => Err(x), + } + } + } + + impl RequestHandler for [< $endpoint Request >] { + type Response = [< $endpoint Response >]; + + async fn handle(self, garage: &Arc, admin: &Admin) -> Result { + let to = find_matching_nodes(garage, self.node.as_str())?; + + let resps = garage.system.rpc_helper().call_many(&admin.endpoint, + &to, + AdminRpc::Internal(self.body.into()), + RequestStrategy::with_priority(PRIO_NORMAL), + ).await?; + + let mut ret = [< $endpoint Response >] { + success: HashMap::new(), + error: HashMap::new(), + }; + for (node, resp) in resps { + match resp { + Ok(AdminRpcResponse::InternalApiOkResponse(r)) => { + match [< Local $endpoint Response >]::try_from(r) { + Ok(r) => { + ret.success.insert(hex::encode(node), r); + } + Err(_) => { + ret.error.insert(hex::encode(node), "returned invalid value".to_string()); + } + } + } + Ok(AdminRpcResponse::ApiErrorResponse{error_code, http_code, message}) => { + ret.error.insert(hex::encode(node), format!("{} ({}): {}", error_code, http_code, message)); + } + Ok(_) => { + ret.error.insert(hex::encode(node), "returned invalid value".to_string()); + } + Err(e) => { + ret.error.insert(hex::encode(node), e.to_string()); + } + } + } + + Ok(ret) + } + } + )* + + impl LocalAdminApiRequest { + pub fn name(&self) -> &'static str { + match self { + $( + Self::$endpoint(_) => stringify!($endpoint), + )* + } + } + } + + impl RequestHandler for LocalAdminApiRequest { + type Response = LocalAdminApiResponse; + + async fn handle(self, garage: &Arc, admin: &Admin) -> Result { + Ok(match self { + $( + LocalAdminApiRequest::$endpoint(req) => LocalAdminApiResponse::$endpoint(req.handle(garage, admin).await?), + )* + }) + } + } + } + }; +} + +pub(crate) use admin_endpoints; +pub(crate) use local_admin_endpoints; diff --git a/src/api/admin/node.rs b/src/api/admin/node.rs new file mode 100644 index 00000000..12963aab --- /dev/null +++ b/src/api/admin/node.rs @@ -0,0 +1,149 @@ +use std::fmt::Write; +use std::sync::Arc; + +use format_table::format_table_to_string; + +use garage_util::error::Error as GarageError; + +use garage_table::replication::*; +use garage_table::*; + +use garage_model::garage::Garage; + +use crate::api::*; +use crate::error::Error; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for LocalGetNodeInfoRequest { + type Response = LocalGetNodeInfoResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + Ok(LocalGetNodeInfoResponse { + node_id: hex::encode(garage.system.id), + garage_version: garage_util::version::garage_version().to_string(), + garage_features: garage_util::version::garage_features() + .map(|features| features.iter().map(ToString::to_string).collect()), + rust_version: garage_util::version::rust_version().to_string(), + db_engine: garage.db.engine(), + }) + } +} + +impl RequestHandler for LocalCreateMetadataSnapshotRequest { + type Response = LocalCreateMetadataSnapshotResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + garage_model::snapshot::async_snapshot_metadata(garage).await?; + Ok(LocalCreateMetadataSnapshotResponse) + } +} + +impl RequestHandler for LocalGetNodeStatisticsRequest { + type Response = LocalGetNodeStatisticsResponse; + + // FIXME: return this as a JSON struct instead of text + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let sys_status = garage.system.local_status(); + + let mut ret = format_table_to_string(vec![ + format!("Node ID:\t{:?}", garage.system.id), + format!("Hostname:\t{}", sys_status.hostname.unwrap_or_default(),), + format!( + "Garage version:\t{}", + garage_util::version::garage_version(), + ), + format!( + "Garage features:\t{}", + garage_util::version::garage_features() + .map(|list| list.join(", ")) + .unwrap_or_else(|| "(unknown)".into()), + ), + format!( + "Rust compiler version:\t{}", + garage_util::version::rust_version(), + ), + format!("Database engine:\t{}", garage.db.engine()), + ]); + + // Gather table statistics + let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tInsQueue\tGcTodo".into()]; + table.push(gather_table_stats(&garage.admin_token_table)?); + table.push(gather_table_stats(&garage.bucket_table)?); + table.push(gather_table_stats(&garage.bucket_alias_table)?); + table.push(gather_table_stats(&garage.key_table)?); + + table.push(gather_table_stats(&garage.object_table)?); + table.push(gather_table_stats(&garage.object_counter_table.table)?); + table.push(gather_table_stats(&garage.mpu_table)?); + table.push(gather_table_stats(&garage.mpu_counter_table.table)?); + table.push(gather_table_stats(&garage.version_table)?); + table.push(gather_table_stats(&garage.block_ref_table)?); + + #[cfg(feature = "k2v")] + { + table.push(gather_table_stats(&garage.k2v.item_table)?); + table.push(gather_table_stats(&garage.k2v.counter_table.table)?); + } + + write!( + &mut ret, + "\nTable stats:\n{}", + format_table_to_string(table) + ) + .unwrap(); + + // Gather block manager statistics + writeln!(&mut ret, "\nBlock manager stats:").unwrap(); + let rc_len = garage.block_manager.rc_approximate_len()?.to_string(); + + ret += &format_table_to_string(vec![ + format!(" number of RC entries:\t{} (~= number of blocks)", rc_len), + format!( + " resync queue length:\t{}", + garage.block_manager.resync.queue_approximate_len()? + ), + format!( + " blocks with resync errors:\t{}", + garage.block_manager.resync.errors_approximate_len()? + ), + ]); + + Ok(LocalGetNodeStatisticsResponse { freeform: ret }) + } +} + +fn gather_table_stats(t: &Arc>) -> Result +where + F: TableSchema + 'static, + R: TableReplication + 'static, +{ + let data_len = t + .data + .store + .approximate_len() + .map_err(GarageError::from)? + .to_string(); + let mkl_len = t.merkle_updater.merkle_tree_approximate_len()?.to_string(); + + Ok(format!( + " {}\t{}\t{}\t{}\t{}\t{}", + F::TABLE_NAME, + data_len, + mkl_len, + t.merkle_updater.todo_approximate_len()?, + t.data.insert_queue_approximate_len()?, + t.data.gc_todo_approximate_len()? + )) +} diff --git a/src/api/admin/openapi.rs b/src/api/admin/openapi.rs new file mode 100644 index 00000000..c5936ade --- /dev/null +++ b/src/api/admin/openapi.rs @@ -0,0 +1,958 @@ +#![allow(dead_code)] +#![allow(non_snake_case)] + +use serde::{Deserialize, Serialize}; +use utoipa::{Modify, OpenApi, ToSchema}; + +use crate::api::*; + +// ********************************************** +// Special endpoints +// ********************************************** + +#[utoipa::path(get, + path = "/metrics", + tag = "Special endpoints", + description = "Prometheus metrics endpoint", + security((), ("bearerAuth" = [])), + responses( + (status = 200, description = "Garage daemon metrics exported in Prometheus format"), + ), +)] +fn Metrics() {} + +#[utoipa::path(get, + path = "/health", + tag = "Special endpoints", + description = " +Check cluster health. The status code returned by this function indicates +whether this Garage daemon can answer API requests. +Garage will return `200 OK` even if some storage nodes are disconnected, +as long as it is able to have a quorum of nodes for read and write operations. + ", + security(()), + responses( + (status = 200, description = "Garage is able to answer requests"), + (status = 503, description = "This Garage daemon is not able to handle requests") + ), +)] +fn Health() {} + +#[utoipa::path(get, + path = "/check", + tag = "Special endpoints", + description = " +Static website domain name check. Checks whether a bucket is configured to serve +a static website for the requested domain. This is used by reverse proxies such +as Caddy or Tricot, to avoid requesting TLS certificates for domain names that +do not correspond to an actual website. + ", + params(CheckDomainRequest), + security(()), + responses( + (status = 200, description = "The domain name redirects to a static website bucket"), + (status = 400, description = "No static website bucket exists for this domain") + ), +)] +fn CheckDomain() {} + +// ********************************************** +// Cluster operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/GetClusterStatus", + tag = "Cluster", + description = " +Returns the cluster's current status, including: + +- ID of the node being queried and its version of the Garage daemon +- Live nodes +- Currently configured cluster layout +- Staged changes to the cluster layout + +*Capacity is given in bytes* + ", + responses( + (status = 200, description = "Cluster status report", body = GetClusterStatusResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetClusterStatus() {} + +#[utoipa::path(get, + path = "/v2/GetClusterHealth", + tag = "Cluster", + description = "Returns the global status of the cluster, the number of connected nodes (over the number of known ones), the number of healthy storage nodes (over the declared ones), and the number of healthy partitions (over the total).", + responses( + (status = 200, description = "Cluster health report", body = GetClusterHealthResponse), + ), +)] +fn GetClusterHealth() {} + +#[utoipa::path(get, + path = "/v2/GetClusterStatistics", + tag = "Cluster", + description = " +Fetch global cluster statistics. + +*Note: do not try to parse the `freeform` field of the response, it is given as a string specifically because its format is not stable.* + ", + responses( + (status = 200, description = "Global cluster statistics", body = GetClusterStatisticsResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetClusterStatistics() {} + +#[utoipa::path(post, + path = "/v2/ConnectClusterNodes", + tag = "Cluster", + description = "Instructs this Garage node to connect to other Garage nodes at specified `@`. `node_id` is generated automatically on node start.", + request_body=ConnectClusterNodesRequest, + responses( + (status = 200, description = "The request has been handled correctly but it does not mean that all connection requests succeeded; some might have fail, you need to check the body!", body = ConnectClusterNodesResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ConnectClusterNodes() {} + +// ********************************************** +// Admin API token operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/ListAdminTokens", + tag = "Admin API token", + description = "Returns all admin API tokens in the cluster.", + responses( + (status = 200, description = "Returns info about all admin API tokens", body = ListAdminTokensResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ListAdminTokens() {} + +#[utoipa::path(get, + path = "/v2/GetAdminTokenInfo", + tag = "Admin API token", + description = " +Return information about a specific admin API token. +You can search by specifying the exact token identifier (`id`) or by specifying a pattern (`search`). + ", + params(GetAdminTokenInfoRequest), + responses( + (status = 200, description = "Information about the admin token", body = GetAdminTokenInfoResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetAdminTokenInfo() {} + +#[utoipa::path(post, + path = "/v2/CreateAdminToken", + tag = "Admin API token", + description = "Creates a new admin API token", + request_body = UpdateAdminTokenRequestBody, + responses( + (status = 200, description = "Admin token has been created", body = CreateAdminTokenResponse), + (status = 500, description = "Internal server error") + ), +)] +fn CreateAdminToken() {} + +#[utoipa::path(post, + path = "/v2/UpdateAdminToken", + tag = "Admin API token", + description = " +Updates information about the specified admin API token. + ", + request_body = UpdateAdminTokenRequestBody, + params(UpdateAdminTokenRequest), + responses( + (status = 200, description = "Admin token has been updated", body = UpdateAdminTokenResponse), + (status = 500, description = "Internal server error") + ), +)] +fn UpdateAdminToken() {} + +#[utoipa::path(post, + path = "/v2/DeleteAdminToken", + tag = "Admin API token", + description = "Delete an admin API token from the cluster, revoking all its permissions.", + params(DeleteAdminTokenRequest), + responses( + (status = 200, description = "Admin token has been deleted"), + (status = 500, description = "Internal server error") + ), +)] +fn DeleteAdminToken() {} + +#[utoipa::path(get, + path = "/v2/GetCurrentAdminTokenInfo", + tag = "Admin API token", + description = " +Return information about the calling admin API token. + ", + responses( + (status = 200, description = "Information about the admin token", body = GetCurrentAdminTokenInfoResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetCurrentAdminTokenInfo() {} + +// ********************************************** +// Layout operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/GetClusterLayout", + tag = "Cluster layout", + description = " +Returns the cluster's current layout, including: + +- Currently configured cluster layout +- Staged changes to the cluster layout + +*Capacity is given in bytes* + ", + responses( + (status = 200, description = "Current cluster layout", body = GetClusterLayoutResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetClusterLayout() {} + +#[utoipa::path(get, + path = "/v2/GetClusterLayoutHistory", + tag = "Cluster layout", + description = " +Returns the history of layouts in the cluster + ", + responses( + (status = 200, description = "Cluster layout history", body = GetClusterLayoutHistoryResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetClusterLayoutHistory() {} + +#[utoipa::path(post, + path = "/v2/UpdateClusterLayout", + tag = "Cluster layout", + description = " +Send modifications to the cluster layout. These modifications will be included in the staged role changes, visible in subsequent calls of `GET /GetClusterHealth`. Once the set of staged changes is satisfactory, the user may call `POST /ApplyClusterLayout` to apply the changed changes, or `POST /RevertClusterLayout` to clear all of the staged changes in the layout. + +Setting the capacity to `null` will configure the node as a gateway. +Otherwise, capacity must be now set in bytes (before Garage 0.9 it was arbitrary weights). +For example to declare 100GB, you must set `capacity: 100000000000`. + +Garage uses internally the International System of Units (SI), it assumes that 1kB = 1000 bytes, and displays storage as kB, MB, GB (and not KiB, MiB, GiB that assume 1KiB = 1024 bytes). + ", + request_body( + content=UpdateClusterLayoutRequestOpenapi, + description=" +To add a new node to the layout or to change the configuration of an existing node, simply set the values you want (`zone`, `capacity`, and `tags`). +To remove a node, simply pass the `remove: true` field. +This logic is represented in OpenAPI with a 'One Of' object. + +Contrary to the CLI that may update only a subset of the fields capacity, zone and tags, when calling this API all of these values must be specified. + " + ), + responses( + (status = 200, description = "Proposed changes have been added to the list of pending changes", body = UpdateClusterLayoutResponse), + (status = 500, description = "Internal server error") + ), +)] +fn UpdateClusterLayout() {} + +// Hack: we cannot use the UpdateClusterLayoutRequest from api.rs, +// as it contains (via NodeRoleChange) an untagged enum flattenned into +// a struct, which breaks the openapi generator. +// See issue #1249. +// Instead, we use a rewritten version of the NodeRoleChange struct where +// the struct fields are distributed into the enum variants (this is an equivalent +// representation, but this way we avoid having to rewrite all uses of the original +// struct in the Garage codebase). +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[schema(as = UpdateClusterLayoutRequest)] +pub struct UpdateClusterLayoutRequestOpenapi { + /// New node roles to assign or remove in the cluster layout + #[serde(default)] + pub roles: Vec, + /// New layout computation parameters to use + #[serde(default)] + pub parameters: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[schema(as = NodeRoleChangeRequest)] +#[serde(untagged)] +pub enum NodeRoleChangeOpenapi { + #[serde(rename_all = "camelCase")] + Remove { + /// ID of the node for which this change applies + id: String, + /// Set `remove` to `true` to remove the node from the layout + remove: bool, + }, + #[serde(rename_all = "camelCase")] + Update { + /// ID of the node for which this change applies + id: String, + #[serde(flatten)] + role: NodeAssignedRole, + }, +} + +#[utoipa::path(post, + path = "/v2/PreviewClusterLayoutChanges", + tag = "Cluster layout", + description = " +Computes a new layout taking into account the staged parameters, and returns it with detailed statistics. The new layout is not applied in the cluster. + +*Note: do not try to parse the `message` field of the response, it is given as an array of string specifically because its format is not stable.* + ", + responses( + (status = 200, description = "Information about the new layout", body = PreviewClusterLayoutChangesResponse), + (status = 500, description = "Internal server error") + ), +)] +fn PreviewClusterLayoutChanges() {} + +#[utoipa::path(post, + path = "/v2/ApplyClusterLayout", + tag = "Cluster layout", + description = " +Applies to the cluster the layout changes currently registered as staged layout changes. + +*Note: do not try to parse the `message` field of the response, it is given as an array of string specifically because its format is not stable.* + ", + request_body=ApplyClusterLayoutRequest, + responses( + (status = 200, description = "The updated cluster layout has been applied in the cluster", body = ApplyClusterLayoutResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ApplyClusterLayout() {} + +#[utoipa::path(post, + path = "/v2/RevertClusterLayout", + tag = "Cluster layout", + description = "Clear staged layout changes", + responses( + (status = 200, description = "All pending changes to the cluster layout have been erased", body = RevertClusterLayoutResponse), + (status = 500, description = "Internal server error") + ), +)] +fn RevertClusterLayout() {} + +#[utoipa::path(post, + path = "/v2/ClusterLayoutSkipDeadNodes", + tag = "Cluster layout", + description = "Force progress in layout update trackers", + request_body = ClusterLayoutSkipDeadNodesRequest, + responses( + (status = 200, description = "Request has been taken into account", body = ClusterLayoutSkipDeadNodesResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ClusterLayoutSkipDeadNodes() {} + +// ********************************************** +// Access key operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/ListKeys", + tag = "Access key", + description = "Returns all API access keys in the cluster.", + responses( + (status = 200, description = "Returns the key identifier (aka `AWS_ACCESS_KEY_ID`) and its associated, human friendly, name if any (otherwise return an empty string)", body = ListKeysResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ListKeys() {} + +#[utoipa::path(get, + path = "/v2/GetKeyInfo", + tag = "Access key", + description = " +Return information about a specific key like its identifiers, its permissions and buckets on which it has permissions. +You can search by specifying the exact key identifier (`id`) or by specifying a pattern (`search`). + +For confidentiality reasons, the secret key is not returned by default: you must pass the `showSecretKey` query parameter to get it. + ", + params(GetKeyInfoRequest), + responses( + (status = 200, description = "Information about the access key", body = GetKeyInfoResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetKeyInfo() {} + +#[utoipa::path(post, + path = "/v2/CreateKey", + tag = "Access key", + description = "Creates a new API access key.", + request_body = CreateKeyRequest, + responses( + (status = 200, description = "Access key has been created", body = CreateKeyResponse), + (status = 500, description = "Internal server error") + ), +)] +fn CreateKey() {} + +#[utoipa::path(post, + path = "/v2/ImportKey", + tag = "Access key", + description = " +Imports an existing API key. This feature must only be used for migrations and backup restore. + +**Do not use it to generate custom key identifiers or you will break your Garage cluster.** + ", + request_body = ImportKeyRequest, + responses( + (status = 200, description = "Access key has been imported", body = ImportKeyResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ImportKey() {} + +#[utoipa::path(post, + path = "/v2/UpdateKey", + tag = "Access key", + description = " +Updates information about the specified API access key. + +*Note: the secret key is not returned in the response, `null` is sent instead.* + ", + request_body = UpdateKeyRequestBody, + params(UpdateKeyRequest), + responses( + (status = 200, description = "Access key has been updated", body = UpdateKeyResponse), + (status = 500, description = "Internal server error") + ), +)] +fn UpdateKey() {} + +#[utoipa::path(post, + path = "/v2/DeleteKey", + tag = "Access key", + description = "Delete a key from the cluster. Its access will be removed from all the buckets. Buckets are not automatically deleted and can be dangling. You should manually delete them before. ", + params(DeleteKeyRequest), + responses( + (status = 200, description = "Access key has been deleted"), + (status = 500, description = "Internal server error") + ), +)] +fn DeleteKey() {} + +// ********************************************** +// Bucket operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/ListBuckets", + tag = "Bucket", + description = "List all the buckets on the cluster with their UUID and their global and local aliases.", + responses( + (status = 200, description = "Returns the UUID of all the buckets and all their aliases", body = ListBucketsResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ListBuckets() {} + +#[utoipa::path(get, + path = "/v2/GetBucketInfo", + tag = "Bucket", + description = " +Given a bucket identifier (`id`) or a global alias (`alias`), get its information. +It includes its aliases, its web configuration, keys that have some permissions +on it, some statistics (number of objects, size), number of dangling multipart uploads, +and its quotas (if any). + ", + params(GetBucketInfoRequest), + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = GetBucketInfoResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetBucketInfo() {} + +#[utoipa::path(post, + path = "/v2/CreateBucket", + tag = "Bucket", + description = " +Creates a new bucket, either with a global alias, a local one, or no alias at all. +Technically, you can also specify both `globalAlias` and `localAlias` and that would create two aliases. + ", + request_body = CreateBucketRequest, + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = CreateBucketResponse), + (status = 500, description = "Internal server error") + ), +)] +fn CreateBucket() {} + +#[utoipa::path(post, + path = "/v2/UpdateBucket", + tag = "Bucket", + description = " +All fields (`websiteAccess` and `quotas`) are optional. +If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed. + +In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified. +The field `errorDocument` is optional, if no error document is set a generic +error message is displayed when errors happen. Conversely, if `enabled` is +`false`, neither `indexDocument` nor `errorDocument` must be specified. + +In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null` +to remove the quotas. An absent value will be considered the same as a `null`. It is not possible +to change only one of the two quotas. + ", + params(UpdateBucketRequest), + request_body = UpdateBucketRequestBody, + responses( + (status = 200, description = "Bucket has been updated", body = UpdateBucketResponse), + (status = 404, description = "Bucket not found"), + (status = 500, description = "Internal server error") + ), +)] +fn UpdateBucket() {} + +#[utoipa::path(post, + path = "/v2/DeleteBucket", + tag = "Bucket", + description = " +Deletes a storage bucket. A bucket cannot be deleted if it is not empty. + +**Warning:** this will delete all aliases associated with the bucket! + ", + params(DeleteBucketRequest), + responses( + (status = 200, description = "Bucket has been deleted"), + (status = 400, description = "Bucket is not empty"), + (status = 404, description = "Bucket not found"), + (status = 500, description = "Internal server error") + ), +)] +fn DeleteBucket() {} + +#[utoipa::path(post, + path = "/v2/CleanupIncompleteUploads", + tag = "Bucket", + description = "Removes all incomplete multipart uploads that are older than the specified number of seconds.", + request_body = CleanupIncompleteUploadsRequest, + responses( + (status = 200, description = "The bucket was cleaned up successfully", body = CleanupIncompleteUploadsResponse), + (status = 500, description = "Internal server error") + ), +)] +fn CleanupIncompleteUploads() {} + +#[utoipa::path(get, + path = "/v2/InspectObject", + tag = "Bucket", + description = " +Returns detailed information about an object in a bucket, including its internal state in Garage. + +This API call can be used to list the data blocks referenced by an object, +as well as to view metadata associated to the object. + +This call may return a list of more than one version for the object, for instance in the +case where there is a currently stored version of the object, and a newer version whose +upload is in progress and not yet finished. + ", + params(InspectObjectRequest), + responses( + (status = 200, description = "Returns exhaustive information about the object", body = InspectObjectResponse), + (status = 404, description = "Object not found"), + (status = 500, description = "Internal server error") + ), +)] +fn InspectObject() {} + +// ********************************************** +// Operations on permissions for keys on buckets +// ********************************************** + +#[utoipa::path(post, + path = "/v2/AllowBucketKey", + tag = "Permission", + description = " +⚠️ **DISCLAIMER**: Garage's developers are aware that this endpoint has an unconventional semantic. Be extra careful when implementing it, its behavior is not obvious. + +Allows a key to do read/write/owner operations on a bucket. + +Flags in permissions which have the value true will be activated. Other flags will remain unchanged (ie. they will keep their internal value). + +For example, if you set read to true, the key will be allowed to read the bucket. +If you set it to false, the key will keeps its previous read permission. +If you want to disallow read for the key, check the DenyBucketKey operation. + ", + request_body = AllowBucketKeyRequest, + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = AllowBucketKeyResponse), + (status = 500, description = "Internal server error") + ), +)] +fn AllowBucketKey() {} + +#[utoipa::path(post, + path = "/v2/DenyBucketKey", + tag = "Permission", + description = " +⚠️ **DISCLAIMER**: Garage's developers are aware that this endpoint has an unconventional semantic. Be extra careful when implementing it, its behavior is not obvious. + +Denies a key from doing read/write/owner operations on a bucket. + +Flags in permissions which have the value true will be deactivated. Other flags will remain unchanged. + +For example, if you set read to true, the key will be denied from reading. +If you set read to false, the key will keep its previous permissions. +If you want the key to have the reading permission, check the AllowBucketKey operation. + ", + request_body = DenyBucketKeyRequest, + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = DenyBucketKeyResponse), + (status = 500, description = "Internal server error") + ), +)] +fn DenyBucketKey() {} + +// ********************************************** +// Operations on bucket aliases +// ********************************************** + +#[utoipa::path(post, + path = "/v2/AddBucketAlias", + tag = "Bucket alias", + description = "Add an alias for the target bucket. This can be either a global or a local alias, depending on which fields are specified.", + request_body = BucketAliasEnumOpenapi, + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = AddBucketAliasResponse), + (status = 500, description = "Internal server error") + ), +)] +fn AddBucketAlias() {} + +#[utoipa::path(post, + path = "/v2/RemoveBucketAlias", + tag = "Bucket alias", + description = "Remove an alias for the target bucket. This can be either a global or a local alias, depending on which fields are specified.", + request_body = BucketAliasEnumOpenapi, + responses( + (status = 200, description = "Returns exhaustive information about the bucket", body = RemoveBucketAliasResponse), + (status = 500, description = "Internal server error") + ), +)] +fn RemoveBucketAlias() {} + +// Hack for issue #1249 (see UpdateClusterLayout) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged)] +#[schema(as = BucketAliasEnum)] +pub enum BucketAliasEnumOpenapi { + #[serde(rename_all = "camelCase")] + Global { + bucket_id: String, + global_alias: String, + }, + #[serde(rename_all = "camelCase")] + Local { + bucket_id: String, + local_alias: String, + access_key_id: String, + }, +} + +// ********************************************** +// Node operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/GetNodeInfo", + tag = "Node", + description = " +Return information about the Garage daemon running on one or several nodes. + ", + params(MultiRequestQueryParams), + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetNodeInfo() {} + +#[utoipa::path(get, + path = "/v2/GetNodeStatistics", + tag = "Node", + description = " +Fetch statistics for one or several Garage nodes. + +*Note: do not try to parse the `freeform` field of the response, it is given as a string specifically because its format is not stable.* + ", + params(MultiRequestQueryParams), + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetNodeStatistics() {} + +#[utoipa::path(post, + path = "/v2/CreateMetadataSnapshot", + tag = "Node", + description = " +Instruct one or several nodes to take a snapshot of their metadata databases. + ", + params(MultiRequestQueryParams), + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn CreateMetadataSnapshot() {} + +#[utoipa::path(post, + path = "/v2/LaunchRepairOperation", + tag = "Node", + description = " +Launch a repair operation on one or several cluster nodes. + ", + params(MultiRequestQueryParams), + request_body = LocalLaunchRepairOperationRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn LaunchRepairOperation() {} + +// ********************************************** +// Worker operations +// ********************************************** + +#[utoipa::path(post, + path = "/v2/ListWorkers", + tag = "Worker", + description = " +List background workers currently running on one or several cluster nodes. + ", + params(MultiRequestQueryParams), + request_body = LocalListWorkersRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ListWorkers() {} + +#[utoipa::path(post, + path = "/v2/GetWorkerInfo", + tag = "Worker", + description = " +Get information about the specified background worker on one or several cluster nodes. + ", + params(MultiRequestQueryParams), + request_body = LocalGetWorkerInfoRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetWorkerInfo() {} + +#[utoipa::path(post, + path = "/v2/GetWorkerVariable", + tag = "Worker", + description = " +Fetch values of one or several worker variables, from one or several cluster nodes. + ", + params(MultiRequestQueryParams), + request_body = LocalGetWorkerVariableRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetWorkerVariable() {} + +#[utoipa::path(post, + path = "/v2/SetWorkerVariable", + tag = "Worker", + description = " +Set the value for a worker variable, on one or several cluster nodes. + ", + params(MultiRequestQueryParams), + request_body = LocalSetWorkerVariableRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn SetWorkerVariable() {} + +// ********************************************** +// Block operations +// ********************************************** + +#[utoipa::path(get, + path = "/v2/ListBlockErrors", + tag = "Block", + description = " +List data blocks that are currently in an errored state on one or several Garage nodes. + ", + params(MultiRequestQueryParams), + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn ListBlockErrors() {} + +#[utoipa::path(post, + path = "/v2/GetBlockInfo", + tag = "Block", + description = " +Get detailed information about a data block stored on a Garage node, including all object versions and in-progress multipart uploads that contain a reference to this block. + ", + params(MultiRequestQueryParams), + request_body = LocalGetBlockInfoRequest, + responses( + (status = 200, description = "Detailed block information", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn GetBlockInfo() {} + +#[utoipa::path(post, + path = "/v2/RetryBlockResync", + tag = "Block", + description = " +Instruct Garage node(s) to retry the resynchronization of one or several missing data block(s). + ", + params(MultiRequestQueryParams), + request_body = LocalRetryBlockResyncRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn RetryBlockResync() {} + +#[utoipa::path(post, + path = "/v2/PurgeBlocks", + tag = "Block", + description = " +Purge references to one or several missing data blocks. + +This will remove all objects and in-progress multipart uploads that contain the specified data block(s). The objects will be permanently deleted from the buckets in which they appear. Use with caution. + ", + params(MultiRequestQueryParams), + request_body = LocalPurgeBlocksRequest, + responses( + (status = 200, description = "Responses from individual cluster nodes", body = MultiResponse), + (status = 500, description = "Internal server error") + ), +)] +fn PurgeBlocks() {} + +// ********************************************** +// ********************************************** +// ********************************************** + +struct SecurityAddon; + +impl Modify for SecurityAddon { + fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) { + use utoipa::openapi::security::*; + let components = openapi.components.as_mut().unwrap(); // we can unwrap safely since there already is components registered. + components.add_security_scheme( + "bearerAuth", + SecurityScheme::Http(Http::builder().scheme(HttpAuthScheme::Bearer).build()), + ) + } +} + +#[derive(OpenApi)] +#[openapi( + info( + version = "v2.2.0", + title = "Garage administration API", + description = "Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks. + +*Disclaimer: This API may change in future Garage versions. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is early stage and can contain bugs, so be careful and please report any issues on our issue tracker.*", + contact( + name = "The Garage team", + email = "garagehq@deuxfleurs.fr", + url = "https://garagehq.deuxfleurs.fr/", + ), + ), + modifiers(&SecurityAddon), + security(("bearerAuth" = [])), + paths( + // Special ops + Metrics, + Health, + CheckDomain, + // Cluster operations + GetClusterHealth, + GetClusterStatus, + GetClusterStatistics, + ConnectClusterNodes, + // Admin token operations + ListAdminTokens, + GetAdminTokenInfo, + CreateAdminToken, + UpdateAdminToken, + DeleteAdminToken, + GetCurrentAdminTokenInfo, + // Layout operations + GetClusterLayout, + GetClusterLayoutHistory, + UpdateClusterLayout, + PreviewClusterLayoutChanges, + ApplyClusterLayout, + RevertClusterLayout, + ClusterLayoutSkipDeadNodes, + // Key operations + ListKeys, + GetKeyInfo, + CreateKey, + ImportKey, + UpdateKey, + DeleteKey, + // Bucket operations + ListBuckets, + GetBucketInfo, + CreateBucket, + UpdateBucket, + DeleteBucket, + CleanupIncompleteUploads, + InspectObject, + // Operations on permissions + AllowBucketKey, + DenyBucketKey, + // Operations on aliases + AddBucketAlias, + RemoveBucketAlias, + // Node operations + GetNodeInfo, + GetNodeStatistics, + CreateMetadataSnapshot, + LaunchRepairOperation, + // Worker operations + ListWorkers, + GetWorkerInfo, + GetWorkerVariable, + SetWorkerVariable, + // Block operations + ListBlockErrors, + GetBlockInfo, + RetryBlockResync, + PurgeBlocks, + ), + servers( + (url = "http://localhost:3903/", description = "A local server") + ), +)] +pub struct ApiDoc; diff --git a/src/garage/repair/online.rs b/src/api/admin/repair.rs similarity index 65% rename from src/garage/repair/online.rs rename to src/api/admin/repair.rs index 6a7dafcf..1d5665d1 100644 --- a/src/garage/repair/online.rs +++ b/src/api/admin/repair.rs @@ -5,6 +5,14 @@ use std::time::Duration; use async_trait::async_trait; use tokio::sync::watch; +use garage_util::background::*; +use garage_util::data::*; +use garage_util::error::{Error as GarageError, OkOrMessage}; +use garage_util::migrate::Migrate; + +use garage_table::replication::*; +use garage_table::*; + use garage_block::manager::BlockManager; use garage_block::repair::ScrubWorkerCommand; @@ -14,91 +22,89 @@ use garage_model::s3::mpu_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; -use garage_table::replication::*; -use garage_table::*; - -use garage_util::background::*; -use garage_util::data::*; -use garage_util::error::Error; -use garage_util::migrate::Migrate; - -use crate::*; +use crate::api::*; +use crate::error::Error; +use crate::{Admin, RequestHandler}; const RC_REPAIR_ITER_COUNT: usize = 64; -pub async fn launch_online_repair( - garage: &Arc, - bg: &BackgroundRunner, - opt: RepairOpt, -) -> Result<(), Error> { - match opt.what { - RepairWhat::Tables => { - info!("Launching a full sync of tables"); - garage.bucket_table.syncer.add_full_sync()?; - garage.object_table.syncer.add_full_sync()?; - garage.version_table.syncer.add_full_sync()?; - garage.block_ref_table.syncer.add_full_sync()?; - garage.key_table.syncer.add_full_sync()?; - } - RepairWhat::Versions => { - info!("Repairing the versions table"); - bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairVersions)); - } - RepairWhat::MultipartUploads => { - info!("Repairing the multipart uploads table"); - bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairMpu)); - } - RepairWhat::BlockRefs => { - info!("Repairing the block refs table"); - bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairBlockRefs)); - } - RepairWhat::BlockRc => { - info!("Repairing the block reference counters"); - bg.spawn_worker(BlockRcRepair::new( - garage.block_manager.clone(), - garage.block_ref_table.clone(), - )); - } - RepairWhat::Blocks => { - info!("Repairing the stored blocks"); - bg.spawn_worker(garage_block::repair::RepairWorker::new( - garage.block_manager.clone(), - )); - } - RepairWhat::Scrub { cmd } => { - let cmd = match cmd { - ScrubCmd::Start => ScrubWorkerCommand::Start, - ScrubCmd::Pause => ScrubWorkerCommand::Pause(Duration::from_secs(3600 * 24)), - ScrubCmd::Resume => ScrubWorkerCommand::Resume, - ScrubCmd::Cancel => ScrubWorkerCommand::Cancel, - ScrubCmd::SetTranquility { tranquility } => { - garage - .block_manager - .scrub_persister - .set_with(|x| x.tranquility = tranquility)?; - return Ok(()); - } - }; - info!("Sending command to scrub worker: {:?}", cmd); - garage.block_manager.send_scrub_command(cmd).await?; - } - RepairWhat::Rebalance => { - info!("Rebalancing the stored blocks among storage locations"); - bg.spawn_worker(garage_block::repair::RebalanceWorker::new( - garage.block_manager.clone(), - )); - } - RepairWhat::Aliases => { - info!("Repairing bucket aliases (foreground)"); - garage.locked_helper().await.repair_aliases().await?; - } - RepairWhat::ClearResyncQueue => { - let garage = garage.clone(); - tokio::task::spawn_blocking(move || garage.block_manager.resync.clear_resync_queue()) - .await?? +impl RequestHandler for LocalLaunchRepairOperationRequest { + type Response = LocalLaunchRepairOperationResponse; + + async fn handle( + self, + garage: &Arc, + admin: &Admin, + ) -> Result { + let bg = &admin.background; + match self.repair_type { + RepairType::Tables => { + info!("Launching a full sync of tables"); + garage.bucket_table.syncer.add_full_sync()?; + garage.object_table.syncer.add_full_sync()?; + garage.version_table.syncer.add_full_sync()?; + garage.block_ref_table.syncer.add_full_sync()?; + garage.key_table.syncer.add_full_sync()?; + } + RepairType::Versions => { + info!("Repairing the versions table"); + bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairVersions)); + } + RepairType::MultipartUploads => { + info!("Repairing the multipart uploads table"); + bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairMpu)); + } + RepairType::BlockRefs => { + info!("Repairing the block refs table"); + bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairBlockRefs)); + } + RepairType::BlockRc => { + info!("Repairing the block reference counters"); + bg.spawn_worker(BlockRcRepair::new( + garage.block_manager.clone(), + garage.block_ref_table.clone(), + )); + } + RepairType::Blocks => { + info!("Repairing the stored blocks"); + bg.spawn_worker(garage_block::repair::RepairWorker::new( + garage.block_manager.clone(), + )); + } + RepairType::Scrub(cmd) => { + let cmd = match cmd { + ScrubCommand::Start => ScrubWorkerCommand::Start, + ScrubCommand::Pause => { + ScrubWorkerCommand::Pause(Duration::from_secs(3600 * 24)) + } + ScrubCommand::Resume => ScrubWorkerCommand::Resume, + ScrubCommand::Cancel => ScrubWorkerCommand::Cancel, + }; + info!("Sending command to scrub worker: {:?}", cmd); + garage.block_manager.send_scrub_command(cmd).await?; + } + RepairType::Rebalance => { + info!("Rebalancing the stored blocks among storage locations"); + bg.spawn_worker(garage_block::repair::RebalanceWorker::new( + garage.block_manager.clone(), + )); + } + RepairType::Aliases => { + info!("Repairing bucket aliases (foreground)"); + garage.locked_helper().await.repair_aliases().await?; + } + RepairType::ClearResyncQueue => { + info!("Clearing resync queue (foreground)"); + let garage = garage.clone(); + tokio::task::spawn_blocking(move || { + garage.block_manager.resync.clear_resync_queue() + }) + .await + .map_err(garage_util::error::Error::from)??; + } } + Ok(LocalLaunchRepairOperationResponse) } - Ok(()) } // ---- @@ -112,7 +118,7 @@ trait TableRepair: Send + Sync + 'static { &mut self, garage: &Garage, entry: <::T as TableSchema>::E, - ) -> impl Future> + Send; + ) -> impl Future> + Send; } struct TableRepairWorker { @@ -148,7 +154,10 @@ impl Worker for TableRepairWorker { } } - async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { + async fn work( + &mut self, + _must_exit: &mut watch::Receiver, + ) -> Result { let (item_bytes, next_pos) = match R::table(&self.garage).data.store.get_gt(&self.pos)? { Some((k, v)) => (v, k), None => { @@ -190,7 +199,7 @@ impl TableRepair for RepairVersions { &garage.version_table } - async fn process(&mut self, garage: &Garage, version: Version) -> Result { + async fn process(&mut self, garage: &Garage, version: Version) -> Result { if !version.deleted.get() { let ref_exists = match &version.backlink { VersionBacklink::Object { bucket_id, key } => garage @@ -236,7 +245,11 @@ impl TableRepair for RepairBlockRefs { &garage.block_ref_table } - async fn process(&mut self, garage: &Garage, mut block_ref: BlockRef) -> Result { + async fn process( + &mut self, + garage: &Garage, + mut block_ref: BlockRef, + ) -> Result { if !block_ref.deleted.get() { let ref_exists = garage .version_table @@ -271,7 +284,11 @@ impl TableRepair for RepairMpu { &garage.mpu_table } - async fn process(&mut self, garage: &Garage, mut mpu: MultipartUpload) -> Result { + async fn process( + &mut self, + garage: &Garage, + mut mpu: MultipartUpload, + ) -> Result { if !mpu.deleted.get() { let ref_exists = garage .object_table @@ -328,7 +345,7 @@ impl BlockRcRepair { #[async_trait] impl Worker for BlockRcRepair { fn name(&self) -> String { - format!("Block refcount repair worker") + "Block refcount repair worker".into() } fn status(&self) -> WorkerStatus { @@ -338,7 +355,10 @@ impl Worker for BlockRcRepair { } } - async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { + async fn work( + &mut self, + _must_exit: &mut watch::Receiver, + ) -> Result { for _i in 0..RC_REPAIR_ITER_COUNT { let next1 = self .block_manager diff --git a/src/api/admin/router_v1.rs b/src/api/admin/router_v1.rs index 0b4901ea..138a801d 100644 --- a/src/api/admin/router_v1.rs +++ b/src/api/admin/router_v1.rs @@ -7,12 +7,6 @@ use garage_api_common::router_macros::*; use crate::error::*; use crate::router_v0; -pub enum Authorization { - None, - MetricsToken, - AdminToken, -} - router_match! {@func /// List of all Admin API endpoints. @@ -211,15 +205,6 @@ impl Endpoint { ))), } } - /// Get the kind of authorization which is required to perform the operation. - pub fn authorization_type(&self) -> Authorization { - match self { - Self::Health => Authorization::None, - Self::CheckDomain => Authorization::None, - Self::Metrics => Authorization::MetricsToken, - _ => Authorization::AdminToken, - } - } } generateQueryParameters! { diff --git a/src/api/admin/router_v2.rs b/src/api/admin/router_v2.rs new file mode 100644 index 00000000..3009d128 --- /dev/null +++ b/src/api/admin/router_v2.rs @@ -0,0 +1,276 @@ +use std::borrow::Cow; + +use hyper::body::Incoming as IncomingBody; +use hyper::{Method, Request}; +use paste::paste; + +use garage_api_common::helpers::*; +use garage_api_common::router_macros::*; + +use crate::api::*; +use crate::error::*; +use crate::router_v1; +use crate::Authorization; + +impl AdminApiRequest { + /// Determine which S3 endpoint a request is for using the request, and a bucket which was + /// possibly extracted from the Host header. + /// Returns Self plus bucket name, if endpoint is not Endpoint::ListBuckets + pub async fn from_request(req: Request) -> Result { + let uri = req.uri().clone(); + let path = uri.path(); + let query = uri.query(); + + let method = req.method().clone(); + + let mut query = QueryParameters::from_query(query.unwrap_or_default())?; + + let res = router_match!(@gen_path_parser_v2 (&method, path, "/v2/", query, req) [ + @special OPTIONS _ => Options (), + @special GET "/check" => CheckDomain (query::domain), + @special GET "/health" => Health (), + @special GET "/metrics" => Metrics (), + // Cluster endpoints + GET GetClusterStatus (), + GET GetClusterHealth (), + POST ConnectClusterNodes (body), + // Admin token endpoints + GET ListAdminTokens (), + GET GetAdminTokenInfo (query_opt::id, query_opt::search), + POST CreateAdminToken (body), + POST UpdateAdminToken (body_field, query::id), + POST DeleteAdminToken (query::id), + GET GetCurrentAdminTokenInfo (admin_token), + // Layout endpoints + GET GetClusterLayout (), + GET GetClusterLayoutHistory (), + POST UpdateClusterLayout (body), + POST PreviewClusterLayoutChanges (), + POST ApplyClusterLayout (body), + POST RevertClusterLayout (), + POST ClusterLayoutSkipDeadNodes (body), + // API key endpoints + GET GetKeyInfo (query_opt::id, query_opt::search, parse_default(false)::show_secret_key), + POST UpdateKey (body_field, query::id), + POST CreateKey (body), + POST ImportKey (body), + POST DeleteKey (query::id), + GET ListKeys (), + // Bucket endpoints + GET GetBucketInfo (query_opt::id, query_opt::global_alias, query_opt::search), + GET ListBuckets (), + POST CreateBucket (body), + POST DeleteBucket (query::id), + POST UpdateBucket (body_field, query::id), + POST CleanupIncompleteUploads (body), + GET InspectObject (query::bucket_id, query::key), + // Bucket-key permissions + POST AllowBucketKey (body), + POST DenyBucketKey (body), + // Bucket aliases + POST AddBucketAlias (body), + POST RemoveBucketAlias (body), + // Node APIs + GET GetNodeInfo (default::body, query::node), + POST CreateMetadataSnapshot (default::body, query::node), + GET GetNodeStatistics (default::body, query::node), + GET GetClusterStatistics (), + POST LaunchRepairOperation (body_field, query::node), + // Worker APIs + POST ListWorkers (body_field, query::node), + POST GetWorkerInfo (body_field, query::node), + POST GetWorkerVariable (body_field, query::node), + POST SetWorkerVariable (body_field, query::node), + // Block APIs + GET ListBlockErrors (default::body, query::node), + POST GetBlockInfo (body_field, query::node), + POST RetryBlockResync (body_field, query::node), + POST PurgeBlocks (body_field, query::node), + ]); + + if let Some(message) = query.nonempty_message() { + debug!("Unused query parameter: {}", message) + } + + Ok(res) + } + + /// Some endpoints work exactly the same in their v2/ version as they did in their v1/ version. + /// For these endpoints, we can convert a v1/ call to its equivalent as if it was made using + /// its v2/ URL. + pub async fn from_v1( + v1_endpoint: router_v1::Endpoint, + req: Request, + ) -> Result { + use router_v1::Endpoint; + + match v1_endpoint { + // GetClusterStatus semantics changed: + // info about local node is no longer returned + Endpoint::GetClusterHealth => { + Ok(AdminApiRequest::GetClusterHealth(GetClusterHealthRequest)) + } + Endpoint::ConnectClusterNodes => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::ConnectClusterNodes(req)) + } + + // Layout + Endpoint::GetClusterLayout => { + Ok(AdminApiRequest::GetClusterLayout(GetClusterLayoutRequest)) + } + // UpdateClusterLayout semantics changed + Endpoint::ApplyClusterLayout => { + let param = parse_json_body::(req).await?; + Ok(AdminApiRequest::ApplyClusterLayout(param)) + } + Endpoint::RevertClusterLayout => Ok(AdminApiRequest::RevertClusterLayout( + RevertClusterLayoutRequest, + )), + + // Keys + Endpoint::ListKeys => Ok(AdminApiRequest::ListKeys(ListKeysRequest)), + Endpoint::GetKeyInfo { + id, + search, + show_secret_key, + } => { + let show_secret_key = show_secret_key.map(|x| x == "true").unwrap_or(false); + Ok(AdminApiRequest::GetKeyInfo(GetKeyInfoRequest { + id, + search, + show_secret_key, + })) + } + Endpoint::CreateKey => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::CreateKey(req)) + } + Endpoint::ImportKey => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::ImportKey(req)) + } + Endpoint::UpdateKey { id } => { + let body = parse_json_body::(req).await?; + Ok(AdminApiRequest::UpdateKey(UpdateKeyRequest { id, body })) + } + + // DeleteKey semantics changed: + // - in v1/ : HTTP DELETE => HTTP 204 No Content + // - in v2/ : HTTP POST => HTTP 200 Ok + // Endpoint::DeleteKey { id } => Ok(AdminApiRequest::DeleteKey(DeleteKeyRequest { id })), + + // Buckets + Endpoint::ListBuckets => Ok(AdminApiRequest::ListBuckets(ListBucketsRequest)), + Endpoint::GetBucketInfo { id, global_alias } => { + Ok(AdminApiRequest::GetBucketInfo(GetBucketInfoRequest { + id, + global_alias, + search: None, + })) + } + Endpoint::CreateBucket => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::CreateBucket(req)) + } + + // DeleteBucket semantics changed:: + // - in v1/ : HTTP DELETE => HTTP 204 No Content + // - in v2/ : HTTP POST => HTTP 200 Ok + // Endpoint::DeleteBucket { id } => { + // Ok(AdminApiRequest::DeleteBucket(DeleteBucketRequest { id })) + // } + Endpoint::UpdateBucket { id } => { + let body = parse_json_body::(req).await?; + Ok(AdminApiRequest::UpdateBucket(UpdateBucketRequest { + id, + body, + })) + } + + // Bucket-key permissions + Endpoint::BucketAllowKey => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::AllowBucketKey(AllowBucketKeyRequest(req))) + } + Endpoint::BucketDenyKey => { + let req = parse_json_body::(req).await?; + Ok(AdminApiRequest::DenyBucketKey(DenyBucketKeyRequest(req))) + } + // Bucket aliasing + Endpoint::GlobalAliasBucket { id, alias } => { + Ok(AdminApiRequest::AddBucketAlias(AddBucketAliasRequest { + bucket_id: id, + alias: BucketAliasEnum::Global { + global_alias: alias, + }, + })) + } + Endpoint::GlobalUnaliasBucket { id, alias } => Ok(AdminApiRequest::RemoveBucketAlias( + RemoveBucketAliasRequest { + bucket_id: id, + alias: BucketAliasEnum::Global { + global_alias: alias, + }, + }, + )), + Endpoint::LocalAliasBucket { + id, + access_key_id, + alias, + } => Ok(AdminApiRequest::AddBucketAlias(AddBucketAliasRequest { + bucket_id: id, + alias: BucketAliasEnum::Local { + local_alias: alias, + access_key_id, + }, + })), + Endpoint::LocalUnaliasBucket { + id, + access_key_id, + alias, + } => Ok(AdminApiRequest::RemoveBucketAlias( + RemoveBucketAliasRequest { + bucket_id: id, + alias: BucketAliasEnum::Local { + local_alias: alias, + access_key_id, + }, + }, + )), + + // For endpoints that have different body content syntax, issue + // deprecation warning + _ => Err(Error::bad_request(format!( + "v1/ endpoint is no longer supported: {}", + v1_endpoint.name() + ))), + } + } + + /// Get the kind of authorization which is required to perform the operation. + pub fn authorization_type(&self) -> Authorization { + match self { + Self::Options(_) | Self::Health(_) | Self::CheckDomain(_) => Authorization::None, + Self::Metrics(_) => Authorization::MetricsToken, + _ => Authorization::AdminToken, + } + } +} + +generateQueryParameters! { + keywords: [], + fields: [ + "node" => node, + "domain" => domain, + "format" => format, + "id" => id, + "search" => search, + "globalAlias" => global_alias, + "alias" => alias, + "accessKeyId" => access_key_id, + "showSecretKey" => show_secret_key, + "bucketId" => bucket_id, + "key" => key + ] +} diff --git a/src/api/admin/special.rs b/src/api/admin/special.rs new file mode 100644 index 00000000..0a4e6705 --- /dev/null +++ b/src/api/admin/special.rs @@ -0,0 +1,173 @@ +use std::sync::Arc; + +use http::header::{ + ACCESS_CONTROL_ALLOW_HEADERS, ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN, ALLOW, +}; +use hyper::{Response, StatusCode}; + +#[cfg(feature = "metrics")] +use prometheus::{Encoder, TextEncoder}; + +use garage_model::garage::Garage; +use garage_rpc::system::ClusterHealthStatus; + +use garage_api_common::helpers::*; + +use crate::api::{CheckDomainRequest, HealthRequest, MetricsRequest, OptionsRequest}; +use crate::api_server::ResBody; +use crate::error::*; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for OptionsRequest { + type Response = Response; + + async fn handle( + self, + _garage: &Arc, + _admin: &Admin, + ) -> Result, Error> { + Ok(Response::builder() + .status(StatusCode::OK) + .header(ALLOW, "OPTIONS,GET,POST") + .header(ACCESS_CONTROL_ALLOW_METHODS, "OPTIONS,GET,POST") + .header(ACCESS_CONTROL_ALLOW_HEADERS, "authorization,content-type") + .header(ACCESS_CONTROL_ALLOW_ORIGIN, "*") + .body(empty_body())?) + } +} + +impl RequestHandler for MetricsRequest { + type Response = Response; + + async fn handle( + self, + _garage: &Arc, + admin: &Admin, + ) -> Result, Error> { + #[cfg(feature = "metrics")] + { + use opentelemetry::trace::Tracer; + + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + + let tracer = opentelemetry::global::tracer("garage"); + let metric_families = tracer.in_span("admin/gather_metrics", |_| { + admin.exporter.registry().gather() + }); + + encoder + .encode(&metric_families, &mut buffer) + .ok_or_internal_error("Could not serialize metrics")?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, encoder.format_type()) + .body(bytes_body(buffer.into()))?) + } + #[cfg(not(feature = "metrics"))] + Err(Error::bad_request( + "Garage was built without the metrics feature".to_string(), + )) + } +} + +impl RequestHandler for HealthRequest { + type Response = Response; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result, Error> { + let health = garage.system.health(); + + let (status, status_str) = match health.status { + ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"), + ClusterHealthStatus::Degraded => ( + StatusCode::OK, + "Garage is operational but some storage nodes are unavailable", + ), + ClusterHealthStatus::Unavailable => ( + StatusCode::SERVICE_UNAVAILABLE, + "Quorum is not available for some/all partitions, reads and writes will fail", + ), + }; + let status_str = format!( + "{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n", + status_str + ); + + Ok(Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "text/plain") + .body(string_body(status_str))?) + } +} + +impl RequestHandler for CheckDomainRequest { + type Response = Response; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result, Error> { + if check_domain(garage, &self.domain).await? { + Ok(Response::builder() + .status(StatusCode::OK) + .body(string_body(format!( + "Domain '{}' is managed by Garage", + self.domain + )))?) + } else { + Err(Error::bad_request(format!( + "Domain '{}' is not managed by Garage", + self.domain + ))) + } + } +} + +async fn check_domain(garage: &Arc, domain: &str) -> Result { + // Resolve bucket from domain name, inferring if the website must be activated for the + // domain to be valid. + let (bucket_name, must_check_website) = if let Some(bname) = garage + .config + .s3_api + .root_domain + .as_ref() + .and_then(|rd| host_to_bucket(domain, rd)) + { + (bname.to_string(), false) + } else if let Some(bname) = garage + .config + .s3_web + .as_ref() + .and_then(|sw| host_to_bucket(domain, sw.root_domain.as_str())) + { + (bname.to_string(), true) + } else { + (domain.to_string(), true) + }; + + let bucket = match garage + .bucket_helper() + .resolve_global_bucket_fast(&bucket_name)? + { + Some(b) => b, + None => return Ok(false), + }; + + if !must_check_website { + return Ok(true); + } + + let bucket_state = bucket.state.as_option().unwrap(); + let bucket_website_config = bucket_state.website_config.get(); + + match bucket_website_config { + Some(_v) => Ok(true), + None => Ok(false), + } +} diff --git a/src/api/admin/worker.rs b/src/api/admin/worker.rs new file mode 100644 index 00000000..b3f4537b --- /dev/null +++ b/src/api/admin/worker.rs @@ -0,0 +1,118 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use garage_util::background::*; +use garage_util::time::now_msec; + +use garage_model::garage::Garage; + +use crate::api::*; +use crate::error::Error; +use crate::{Admin, RequestHandler}; + +impl RequestHandler for LocalListWorkersRequest { + type Response = LocalListWorkersResponse; + + async fn handle( + self, + _garage: &Arc, + admin: &Admin, + ) -> Result { + let workers = admin.background.get_worker_info(); + let info = workers + .into_iter() + .filter(|(_, w)| { + (!self.busy_only + || matches!(w.state, WorkerState::Busy | WorkerState::Throttled(_))) + && (!self.error_only || w.errors > 0) + }) + .map(|(id, w)| worker_info_to_api(id as u64, w)) + .collect::>(); + Ok(LocalListWorkersResponse(info)) + } +} + +impl RequestHandler for LocalGetWorkerInfoRequest { + type Response = LocalGetWorkerInfoResponse; + + async fn handle( + self, + _garage: &Arc, + admin: &Admin, + ) -> Result { + let info = admin + .background + .get_worker_info() + .get(&(self.id as usize)) + .ok_or(Error::NoSuchWorker(self.id))? + .clone(); + Ok(LocalGetWorkerInfoResponse(worker_info_to_api( + self.id, info, + ))) + } +} + +impl RequestHandler for LocalGetWorkerVariableRequest { + type Response = LocalGetWorkerVariableResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + let mut res = HashMap::new(); + if let Some(k) = self.variable { + res.insert(k.clone(), garage.bg_vars.get(&k)?); + } else { + let vars = garage.bg_vars.get_all(); + for (k, v) in vars.iter() { + res.insert(k.to_string(), v.to_string()); + } + } + Ok(LocalGetWorkerVariableResponse(res)) + } +} + +impl RequestHandler for LocalSetWorkerVariableRequest { + type Response = LocalSetWorkerVariableResponse; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result { + garage.bg_vars.set(&self.variable, &self.value)?; + + Ok(LocalSetWorkerVariableResponse { + variable: self.variable, + value: self.value, + }) + } +} + +// ---- helper functions ---- + +fn worker_info_to_api(id: u64, info: WorkerInfo) -> WorkerInfoResp { + WorkerInfoResp { + id, + name: info.name, + state: match info.state { + WorkerState::Busy => WorkerStateResp::Busy, + WorkerState::Throttled(t) => WorkerStateResp::Throttled { duration_secs: t }, + WorkerState::Idle => WorkerStateResp::Idle, + WorkerState::Done => WorkerStateResp::Done, + }, + errors: info.errors as u64, + consecutive_errors: info.consecutive_errors as u64, + last_error: info.last_error.map(|(message, t)| WorkerLastError { + message, + secs_ago: now_msec().saturating_sub(t) / 1000, + }), + + tranquility: info.status.tranquility, + progress: info.status.progress, + queue_length: info.status.queue_length, + persistent_errors: info.status.persistent_errors, + freeform: info.status.freeform, + } +} diff --git a/src/api/common/Cargo.toml b/src/api/common/Cargo.toml index df01d59a..ba38c63b 100644 --- a/src/api/common/Cargo.toml +++ b/src/api/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_api_common" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -21,8 +21,7 @@ garage_util.workspace = true base64.workspace = true bytes.workspace = true chrono.workspace = true -crc32fast.workspace = true -crc32c.workspace = true +crc-fast.workspace = true crypto-common.workspace = true thiserror.workspace = true hex.workspace = true diff --git a/src/api/common/common_error.rs b/src/api/common/common_error.rs index e596a6e9..e110b1d6 100644 --- a/src/api/common/common_error.rs +++ b/src/api/common/common_error.rs @@ -150,7 +150,7 @@ impl TryFrom for CommonError { pub fn pass_helper_error(err: HelperError) -> CommonError { match CommonError::try_from(err) { Ok(e) => e, - Err(e) => panic!("Helper error `{}` should hot have happenned here", e), + Err(e) => panic!("Helper error `{}` should hot have happened here", e), } } diff --git a/src/api/common/cors.rs b/src/api/common/cors.rs index 09b55c13..6f524bf4 100644 --- a/src/api/common/cors.rs +++ b/src/api/common/cors.rs @@ -9,9 +9,7 @@ use hyper::{body::Body, body::Incoming as IncomingBody, Request, Response, Statu use garage_model::bucket_table::{BucketParams, CorsRule as GarageCorsRule}; use garage_model::garage::Garage; -use crate::common_error::{ - helper_error_as_internal, CommonError, OkOrBadRequest, OkOrInternalError, -}; +use crate::common_error::{CommonError, OkOrBadRequest, OkOrInternalError}; use crate::helpers::*; pub fn find_matching_cors_rule<'a, B>( @@ -76,7 +74,7 @@ pub fn add_cors_headers( Ok(()) } -pub async fn handle_options_api( +pub fn handle_options_api( garage: Arc, req: &Request, bucket_name: Option, @@ -90,19 +88,11 @@ pub async fn handle_options_api( // the same name, its CORS rules won't be applied // and will be shadowed by the rules of the globally // existing bucket (but this is inevitable because - // OPTIONS calls are not auhtenticated). + // OPTIONS calls are not authenticated). if let Some(bn) = bucket_name { let helper = garage.bucket_helper(); - let bucket_id = helper - .resolve_global_bucket_name(&bn) - .await - .map_err(helper_error_as_internal)?; - if let Some(id) = bucket_id { - let bucket = garage - .bucket_helper() - .get_existing_bucket(id) - .await - .map_err(helper_error_as_internal)?; + let bucket_opt = helper.resolve_global_bucket_fast(&bn)?; + if let Some(bucket) = bucket_opt { let bucket_params = bucket.state.into_option().unwrap(); handle_options_for_bucket(req, &bucket_params) } else { diff --git a/src/api/common/generic_server.rs b/src/api/common/generic_server.rs index 3f14c07d..181b6231 100644 --- a/src/api/common/generic_server.rs +++ b/src/api/common/generic_server.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::convert::Infallible; use std::fs::{self, Permissions}; use std::os::unix::fs::PermissionsExt; @@ -35,7 +36,7 @@ use garage_util::socket_address::UnixOrTCPSocketAddress; use crate::helpers::{BoxBody, ErrorBody}; pub trait ApiEndpoint: Send + Sync + 'static { - fn name(&self) -> &'static str; + fn name(&self) -> Cow<'static, str>; fn add_span_attributes(&self, span: SpanRef<'_>); } @@ -153,7 +154,7 @@ impl ApiServer { { format!("{forwarded_for_ip_addr} (via {addr})") } else { - format!("{addr}") + addr }; // we only do this to log the access key, so we can discard any error let key = self diff --git a/src/api/common/router_macros.rs b/src/api/common/router_macros.rs index d9fe86db..79749955 100644 --- a/src/api/common/router_macros.rs +++ b/src/api/common/router_macros.rs @@ -45,6 +45,83 @@ macro_rules! router_match { } } }}; + (@gen_path_parser_v2 ($method:expr, $reqpath:expr, $pathprefix:literal, $query:expr, $req:expr) + [ + $(@special $spec_meth:ident $spec_path:pat => $spec_api:ident $spec_params:tt,)* + $($meth:ident $api:ident $params:tt,)* + ]) => {{ + { + #[allow(unused_parens)] + match ($method, $reqpath) { + $( + (&Method::$spec_meth, $spec_path) => AdminApiRequest::$spec_api ( + router_match!(@@gen_parse_request $spec_api, $spec_params, $query, $req) + ), + )* + $( + (&Method::$meth, concat!($pathprefix, stringify!($api))) + => AdminApiRequest::$api ( + router_match!(@@gen_parse_request $api, $params, $query, $req) + ), + )* + (m, p) => { + return Err(Error::bad_request(format!( + "Unknown API endpoint: {} {}", + m, p + ))) + } + } + } + }}; + (@@gen_parse_request $api:ident, (), $query: expr, $req:expr) => {{ + paste!( + [< $api Request >] + ) + }}; + (@@gen_parse_request $api:ident, (body), $query: expr, $req:expr) => {{ + paste!({ + parse_json_body::< [<$api Request>], _, Error>($req).await? + }) + }}; + (@@gen_parse_request $api:ident, (admin_token), $query: expr, $req:expr) => {{ + paste!({ + let auth_header = $req.headers() + .get(hyper::header::AUTHORIZATION) + .ok_or_else(|| Error::bad_request("Missing Authorization header"))? + .to_str() + .map_err(|_| Error::bad_request("Invalid Authorization header"))?; + + let admin_token = auth_header.strip_prefix("Bearer ") + .ok_or_else(|| Error::bad_request("Authorization header must be Bearer token"))? + .to_string(); + + [< $api Request >] { admin_token } + }) + }}; + (@@gen_parse_request $api:ident, (body_field, $($conv:ident $(($conv_arg:expr))? :: $param:ident),*), $query: expr, $req:expr) + => + {{ + paste!({ + let body = parse_json_body::< [<$api RequestBody>], _, Error>($req).await?; + [< $api Request >] { + body, + $( + $param: router_match!(@@parse_param $query, $conv $(($conv_arg))?, $param), + )+ + } + }) + }}; + (@@gen_parse_request $api:ident, ($($conv:ident $(($conv_arg:expr))? :: $param:ident),*), $query: expr, $req:expr) + => + {{ + paste!({ + [< $api Request >] { + $( + $param: router_match!(@@parse_param $query, $conv $(($conv_arg))?, $param), + )+ + } + }) + }}; (@gen_parser ($keyword:expr, $key:ident, $query:expr, $header:expr), key: [$($kw_k:ident $(if $required_k:ident)? $(header $header_k:expr)? => $api_k:ident $(($($conv_k:ident :: $param_k:ident),*))?,)*], no_key: [$($kw_nk:ident $(if $required_nk:ident)? $(if_header $header_nk:expr)? => $api_nk:ident $(($($conv_nk:ident :: $param_nk:ident),*))?,)*]) => {{ @@ -79,13 +156,19 @@ macro_rules! router_match { } }}; + (@@parse_param $query:expr, default, $param:ident) => {{ + Default::default() + }}; (@@parse_param $query:expr, query_opt, $param:ident) => {{ // extract optional query parameter $query.$param.take().map(|param| param.into_owned()) }}; (@@parse_param $query:expr, query, $param:ident) => {{ // extract mendatory query parameter - $query.$param.take().ok_or_bad_request("Missing argument for endpoint")?.into_owned() + $query.$param.take() + .ok_or_bad_request( + format!("Missing argument `{}` for endpoint", stringify!($param)) + )?.into_owned() }}; (@@parse_param $query:expr, opt_parse, $param:ident) => {{ // extract and parse optional query parameter @@ -99,10 +182,22 @@ macro_rules! router_match { (@@parse_param $query:expr, parse, $param:ident) => {{ // extract and parse mandatory query parameter // both missing and un-parseable parameters are reported as errors - $query.$param.take().ok_or_bad_request("Missing argument for endpoint")? + $query.$param.take() + .ok_or_bad_request( + format!("Missing argument `{}` for endpoint", stringify!($param)) + )? .parse() .map_err(|_| Error::bad_request("Failed to parse query parameter"))? }}; + (@@parse_param $query:expr, parse_default($default:expr), $param:ident) => {{ + // extract and parse optional query parameter + // using provided value as default if parameter is missing + $query.$param.take().map(|x| x + .parse() + .map_err(|_| Error::bad_request("Failed to parse query parameter"))) + .transpose()? + .unwrap_or($default) + }}; (@func $(#[$doc:meta])* pub enum Endpoint { @@ -187,6 +282,7 @@ macro_rules! generateQueryParameters { }, )* $( + // FIXME: remove if !v.is_empty() ? $f_param => if !v.is_empty() { if res.$f_name.replace(v).is_some() { return Err(Error::bad_request(format!( diff --git a/src/api/common/signature/checksum.rs b/src/api/common/signature/checksum.rs index 3c5e7c53..88d72347 100644 --- a/src/api/common/signature/checksum.rs +++ b/src/api/common/signature/checksum.rs @@ -1,9 +1,7 @@ -use std::convert::{TryFrom, TryInto}; -use std::hash::Hasher; +use std::convert::TryInto; use base64::prelude::*; -use crc32c::Crc32cHasher as Crc32c; -use crc32fast::Hasher as Crc32; +use crc_fast::{CrcAlgorithm, Digest as CrcDigest}; use md5::{Digest, Md5}; use sha1::Sha1; use sha2::Sha256; @@ -21,17 +19,40 @@ pub const CONTENT_MD5: HeaderName = HeaderName::from_static("content-md5"); pub const X_AMZ_CHECKSUM_ALGORITHM: HeaderName = HeaderName::from_static("x-amz-checksum-algorithm"); pub const X_AMZ_CHECKSUM_MODE: HeaderName = HeaderName::from_static("x-amz-checksum-mode"); +pub const X_AMZ_CHECKSUM_TYPE: HeaderName = HeaderName::from_static("x-amz-checksum-type"); pub const X_AMZ_CHECKSUM_CRC32: HeaderName = HeaderName::from_static("x-amz-checksum-crc32"); pub const X_AMZ_CHECKSUM_CRC32C: HeaderName = HeaderName::from_static("x-amz-checksum-crc32c"); +pub const X_AMZ_CHECKSUM_CRC64NVME: HeaderName = + HeaderName::from_static("x-amz-checksum-crc64nvme"); pub const X_AMZ_CHECKSUM_SHA1: HeaderName = HeaderName::from_static("x-amz-checksum-sha1"); pub const X_AMZ_CHECKSUM_SHA256: HeaderName = HeaderName::from_static("x-amz-checksum-sha256"); +// Values for x-amz-checksum-type +pub const COMPOSITE: &str = "COMPOSITE"; +pub const FULL_OBJECT: &str = "FULL_OBJECT"; + pub type Crc32Checksum = [u8; 4]; pub type Crc32cChecksum = [u8; 4]; +pub type Crc64NvmeChecksum = [u8; 8]; pub type Md5Checksum = [u8; 16]; pub type Sha1Checksum = [u8; 20]; pub type Sha256Checksum = [u8; 32]; +// -- MAP OF CRC ALGORITHMS : +// CRC32 -> CrcAlgorithm::Crc32IsoHdlc +// CRC32C -> CrcAlgorithm::Crc32Iscsi +// CRC64NVME -> CrcAlgorithm::Crc64Nvme + +pub fn new_crc32() -> CrcDigest { + CrcDigest::new(CrcAlgorithm::Crc32IsoHdlc) +} +pub fn new_crc32c() -> CrcDigest { + CrcDigest::new(CrcAlgorithm::Crc32Iscsi) +} +pub fn new_crc64nvme() -> CrcDigest { + CrcDigest::new(CrcAlgorithm::Crc64Nvme) +} + #[derive(Debug, Default, Clone)] pub struct ExpectedChecksums { // base64-encoded md5 (content-md5 header) @@ -42,9 +63,11 @@ pub struct ExpectedChecksums { pub extra: Option, } +#[derive(Default)] pub struct Checksummer { - pub crc32: Option, - pub crc32c: Option, + pub crc32: Option, + pub crc32c: Option, + pub crc64nvme: Option, pub md5: Option, pub sha1: Option, pub sha256: Option, @@ -54,6 +77,7 @@ pub struct Checksummer { pub struct Checksums { pub crc32: Option, pub crc32c: Option, + pub crc64nvme: Option, pub md5: Option, pub sha1: Option, pub sha256: Option, @@ -61,13 +85,7 @@ pub struct Checksums { impl Checksummer { pub fn new() -> Self { - Self { - crc32: None, - crc32c: None, - md5: None, - sha1: None, - sha256: None, - } + Default::default() } pub fn init(expected: &ExpectedChecksums, add_md5: bool) -> Self { @@ -91,23 +109,29 @@ impl Checksummer { self.sha256 = Some(Sha256::new()); } if matches!(&expected.extra, Some(ChecksumValue::Crc32(_))) { - self.crc32 = Some(Crc32::new()); + self.crc32 = Some(new_crc32()); } if matches!(&expected.extra, Some(ChecksumValue::Crc32c(_))) { - self.crc32c = Some(Crc32c::default()); + self.crc32c = Some(new_crc32c()); + } + if matches!(&expected.extra, Some(ChecksumValue::Crc64Nvme(_))) { + self.crc64nvme = Some(new_crc64nvme()); } if matches!(&expected.extra, Some(ChecksumValue::Sha1(_))) { self.sha1 = Some(Sha1::new()); } } - pub fn add(mut self, algo: Option) -> Self { + pub fn add_algorithm(mut self, algo: Option) -> Self { match algo { Some(ChecksumAlgorithm::Crc32) => { - self.crc32 = Some(Crc32::new()); + self.crc32 = Some(new_crc32()); } Some(ChecksumAlgorithm::Crc32c) => { - self.crc32c = Some(Crc32c::default()); + self.crc32c = Some(new_crc32c()); + } + Some(ChecksumAlgorithm::Crc64Nvme) => { + self.crc64nvme = Some(new_crc64nvme()); } Some(ChecksumAlgorithm::Sha1) => { self.sha1 = Some(Sha1::new()); @@ -125,7 +149,10 @@ impl Checksummer { crc32.update(bytes); } if let Some(crc32c) = &mut self.crc32c { - crc32c.write(bytes); + crc32c.update(bytes); + } + if let Some(crc64nvme) = &mut self.crc64nvme { + crc64nvme.update(bytes); } if let Some(md5) = &mut self.md5 { md5.update(bytes); @@ -140,10 +167,9 @@ impl Checksummer { pub fn finalize(self) -> Checksums { Checksums { - crc32: self.crc32.map(|x| u32::to_be_bytes(x.finalize())), - crc32c: self - .crc32c - .map(|x| u32::to_be_bytes(u32::try_from(x.finish()).unwrap())), + crc32: self.crc32.map(|x| u32::to_be_bytes(x.finalize() as u32)), + crc32c: self.crc32c.map(|x| u32::to_be_bytes(x.finalize() as u32)), + crc64nvme: self.crc64nvme.map(|x| u64::to_be_bytes(x.finalize())), md5: self.md5.map(|x| x.finalize()[..].try_into().unwrap()), sha1: self.sha1.map(|x| x.finalize()[..].try_into().unwrap()), sha256: self.sha256.map(|x| x.finalize()[..].try_into().unwrap()), @@ -155,7 +181,7 @@ impl Checksums { pub fn verify(&self, expected: &ExpectedChecksums) -> Result<(), Error> { if let Some(expected_md5) = &expected.md5 { match self.md5 { - Some(md5) if BASE64_STANDARD.encode(&md5) == expected_md5.trim_matches('"') => (), + Some(md5) if BASE64_STANDARD.encode(md5) == expected_md5.trim_matches('"') => (), _ => { return Err(Error::InvalidDigest( "MD5 checksum verification failed (from content-md5)".into(), @@ -175,10 +201,11 @@ impl Checksums { } if let Some(extra) = expected.extra { let algo = extra.algorithm(); - if self.extract(Some(algo)) != Some(extra) { + let calculated = self.extract(Some(algo)); + if calculated != Some(extra) { return Err(Error::InvalidDigest(format!( - "Failed to validate checksum for algorithm {:?}", - algo + "Failed to validate checksum for algorithm {:?}: calculated {:?}, expected {:?}", + algo, calculated, extra ))); } } @@ -190,6 +217,9 @@ impl Checksums { None => None, Some(ChecksumAlgorithm::Crc32) => Some(ChecksumValue::Crc32(self.crc32.unwrap())), Some(ChecksumAlgorithm::Crc32c) => Some(ChecksumValue::Crc32c(self.crc32c.unwrap())), + Some(ChecksumAlgorithm::Crc64Nvme) => { + Some(ChecksumValue::Crc64Nvme(self.crc64nvme.unwrap())) + } Some(ChecksumAlgorithm::Sha1) => Some(ChecksumValue::Sha1(self.sha1.unwrap())), Some(ChecksumAlgorithm::Sha256) => Some(ChecksumValue::Sha256(self.sha256.unwrap())), } @@ -202,6 +232,7 @@ pub fn parse_checksum_algorithm(algo: &str) -> Result match algo { "CRC32" => Ok(ChecksumAlgorithm::Crc32), "CRC32C" => Ok(ChecksumAlgorithm::Crc32c), + "CRC64NVME" => Ok(ChecksumAlgorithm::Crc64Nvme), "SHA1" => Ok(ChecksumAlgorithm::Sha1), "SHA256" => Ok(ChecksumAlgorithm::Sha256), _ => Err(Error::bad_request("invalid checksum algorithm")), @@ -225,6 +256,7 @@ pub fn request_trailer_checksum_algorithm( None => Ok(None), Some(x) if x == X_AMZ_CHECKSUM_CRC32 => Ok(Some(ChecksumAlgorithm::Crc32)), Some(x) if x == X_AMZ_CHECKSUM_CRC32C => Ok(Some(ChecksumAlgorithm::Crc32c)), + Some(x) if x == X_AMZ_CHECKSUM_CRC64NVME => Ok(Some(ChecksumAlgorithm::Crc64Nvme)), Some(x) if x == X_AMZ_CHECKSUM_SHA1 => Ok(Some(ChecksumAlgorithm::Sha1)), Some(x) if x == X_AMZ_CHECKSUM_SHA256 => Ok(Some(ChecksumAlgorithm::Sha256)), _ => Err(Error::bad_request("invalid checksum algorithm")), @@ -243,6 +275,12 @@ pub fn request_checksum_value( if headers.contains_key(X_AMZ_CHECKSUM_CRC32C) { ret.push(extract_checksum_value(headers, ChecksumAlgorithm::Crc32c)?); } + if headers.contains_key(X_AMZ_CHECKSUM_CRC64NVME) { + ret.push(extract_checksum_value( + headers, + ChecksumAlgorithm::Crc64Nvme, + )?); + } if headers.contains_key(X_AMZ_CHECKSUM_SHA1) { ret.push(extract_checksum_value(headers, ChecksumAlgorithm::Sha1)?); } @@ -268,7 +306,7 @@ pub fn extract_checksum_value( ChecksumAlgorithm::Crc32 => { let crc32 = headers .get(X_AMZ_CHECKSUM_CRC32) - .and_then(|x| BASE64_STANDARD.decode(&x).ok()) + .and_then(|x| BASE64_STANDARD.decode(x).ok()) .and_then(|x| x.try_into().ok()) .ok_or_bad_request("invalid x-amz-checksum-crc32 header")?; Ok(ChecksumValue::Crc32(crc32)) @@ -276,15 +314,23 @@ pub fn extract_checksum_value( ChecksumAlgorithm::Crc32c => { let crc32c = headers .get(X_AMZ_CHECKSUM_CRC32C) - .and_then(|x| BASE64_STANDARD.decode(&x).ok()) + .and_then(|x| BASE64_STANDARD.decode(x).ok()) .and_then(|x| x.try_into().ok()) .ok_or_bad_request("invalid x-amz-checksum-crc32c header")?; Ok(ChecksumValue::Crc32c(crc32c)) } + ChecksumAlgorithm::Crc64Nvme => { + let crc64nvme = headers + .get(X_AMZ_CHECKSUM_CRC64NVME) + .and_then(|x| BASE64_STANDARD.decode(x).ok()) + .and_then(|x| x.try_into().ok()) + .ok_or_bad_request("invalid x-amz-checksum-crc64nvme header")?; + Ok(ChecksumValue::Crc64Nvme(crc64nvme)) + } ChecksumAlgorithm::Sha1 => { let sha1 = headers .get(X_AMZ_CHECKSUM_SHA1) - .and_then(|x| BASE64_STANDARD.decode(&x).ok()) + .and_then(|x| BASE64_STANDARD.decode(x).ok()) .and_then(|x| x.try_into().ok()) .ok_or_bad_request("invalid x-amz-checksum-sha1 header")?; Ok(ChecksumValue::Sha1(sha1)) @@ -292,7 +338,7 @@ pub fn extract_checksum_value( ChecksumAlgorithm::Sha256 => { let sha256 = headers .get(X_AMZ_CHECKSUM_SHA256) - .and_then(|x| BASE64_STANDARD.decode(&x).ok()) + .and_then(|x| BASE64_STANDARD.decode(x).ok()) .and_then(|x| x.try_into().ok()) .ok_or_bad_request("invalid x-amz-checksum-sha256 header")?; Ok(ChecksumValue::Sha256(sha256)) @@ -306,16 +352,19 @@ pub fn add_checksum_response_headers( ) -> http::response::Builder { match checksum { Some(ChecksumValue::Crc32(crc32)) => { - resp = resp.header(X_AMZ_CHECKSUM_CRC32, BASE64_STANDARD.encode(&crc32)); + resp = resp.header(X_AMZ_CHECKSUM_CRC32, BASE64_STANDARD.encode(crc32)); } Some(ChecksumValue::Crc32c(crc32c)) => { - resp = resp.header(X_AMZ_CHECKSUM_CRC32C, BASE64_STANDARD.encode(&crc32c)); + resp = resp.header(X_AMZ_CHECKSUM_CRC32C, BASE64_STANDARD.encode(crc32c)); + } + Some(ChecksumValue::Crc64Nvme(crc64nvme)) => { + resp = resp.header(X_AMZ_CHECKSUM_CRC64NVME, BASE64_STANDARD.encode(crc64nvme)); } Some(ChecksumValue::Sha1(sha1)) => { - resp = resp.header(X_AMZ_CHECKSUM_SHA1, BASE64_STANDARD.encode(&sha1)); + resp = resp.header(X_AMZ_CHECKSUM_SHA1, BASE64_STANDARD.encode(sha1)); } Some(ChecksumValue::Sha256(sha256)) => { - resp = resp.header(X_AMZ_CHECKSUM_SHA256, BASE64_STANDARD.encode(&sha256)); + resp = resp.header(X_AMZ_CHECKSUM_SHA256, BASE64_STANDARD.encode(sha256)); } None => (), } diff --git a/src/api/common/signature/mod.rs b/src/api/common/signature/mod.rs index 50fbd304..bae63d1b 100644 --- a/src/api/common/signature/mod.rs +++ b/src/api/common/signature/mod.rs @@ -64,12 +64,12 @@ pub struct VerifiedRequest { pub content_sha256_header: ContentSha256Header, } -pub async fn verify_request( +pub fn verify_request( garage: &Garage, mut req: Request, service: &'static str, ) -> Result { - let checked_signature = payload::check_payload_signature(&garage, &mut req, service).await?; + let checked_signature = payload::check_payload_signature(garage, &mut req, service)?; let request = streaming::parse_streaming_body( req, diff --git a/src/api/common/signature/payload.rs b/src/api/common/signature/payload.rs index 3939da19..67e58811 100644 --- a/src/api/common/signature/payload.rs +++ b/src/api/common/signature/payload.rs @@ -9,6 +9,7 @@ use sha2::{Digest, Sha256}; use garage_table::*; use garage_util::data::Hash; +use garage_util::time::now_msec; use garage_model::garage::Garage; use garage_model::key_table::*; @@ -32,7 +33,7 @@ pub struct CheckedSignature { pub signature_header: Option, } -pub async fn check_payload_signature( +pub fn check_payload_signature( garage: &Garage, request: &mut Request, service: &'static str, @@ -43,9 +44,9 @@ pub async fn check_payload_signature( // We check for presigned-URL-style authentication first, because // the browser or something else could inject an Authorization header // that is totally unrelated to AWS signatures. - check_presigned_signature(garage, service, request, query).await + check_presigned_signature(garage, service, request, query) } else if request.headers().contains_key(AUTHORIZATION) { - check_standard_signature(garage, service, request, query).await + check_standard_signature(garage, service, request, query) } else { // Unsigned (anonymous) request let content_sha256 = request @@ -93,7 +94,7 @@ fn parse_x_amz_content_sha256(header: Option<&str>) -> Result, @@ -128,7 +129,7 @@ async fn check_standard_signature( trace!("canonical request:\n{}", canonical_request); trace!("string to sign:\n{}", string_to_sign); - let key = verify_v4(garage, service, &authorization, string_to_sign.as_bytes()).await?; + let key = verify_v4(garage, service, &authorization, string_to_sign.as_bytes())?; let content_sha256_header = parse_x_amz_content_sha256(Some(&authorization.content_sha256))?; @@ -139,7 +140,7 @@ async fn check_standard_signature( }) } -async fn check_presigned_signature( +fn check_presigned_signature( garage: &Garage, service: &'static str, request: &mut Request, @@ -178,7 +179,7 @@ async fn check_presigned_signature( trace!("canonical request (presigned url):\n{}", canonical_request); trace!("string to sign (presigned url):\n{}", string_to_sign); - let key = verify_v4(garage, service, &authorization, string_to_sign.as_bytes()).await?; + let key = verify_v4(garage, service, &authorization, string_to_sign.as_bytes())?; // In the page on presigned URLs, AWS specifies that if a signed query // parameter and a signed header of the same name have different values, @@ -186,7 +187,7 @@ async fn check_presigned_signature( let headers_mut = request.headers_mut(); for (name, value) in query.iter() { if let Some(existing) = headers_mut.get(name) { - if signed_headers.contains(&name) && existing.as_bytes() != value.value.as_bytes() { + if signed_headers.contains(name) && existing.as_bytes() != value.value.as_bytes() { return Err(Error::bad_request(format!( "Conflicting values for `{}` in query parameters and request headers", name @@ -268,20 +269,24 @@ fn verify_signed_headers(headers: &HeaderMap, signed_headers: &[HeaderName]) -> return Err(Error::bad_request("Header `Host` should be signed")); } for (name, _) in headers.iter() { - // Enforce signature of all x-amz-* headers, except x-amz-content-sh256 - // because it is included in the canonical request in all cases - if name.as_str().starts_with("x-amz-") && name != X_AMZ_CONTENT_SHA256 { - if !signed_headers.contains(name) { - return Err(Error::bad_request(format!( - "Header `{}` should be signed", - name - ))); - } + // Enforce signature of some headers + if header_should_be_signed(name) && !signed_headers.contains(name) { + return Err(Error::bad_request(format!( + "Header `{}` should be signed", + name + ))); } } Ok(()) } +// Indicates whether a header is required to be signed +fn header_should_be_signed(name: &HeaderName) -> bool { + // Enforce signature of all x-amz-* headers, except x-amz-content-sh256 + // because it is included in the canonical request in all cases + name.as_str().starts_with("x-amz-") && name != X_AMZ_CONTENT_SHA256 +} + pub fn string_to_sign(datetime: &DateTime, scope_string: &str, canonical_req: &str) -> String { let mut hasher = Sha256::default(); hasher.update(canonical_req.as_bytes()); @@ -342,7 +347,7 @@ pub fn canonical_request( let canonical_query_string = { let mut items = Vec::with_capacity(query.len()); for (_, QueryValue { key, value }) in query.iter() { - items.push(uri_encode(&key, true) + "=" + &uri_encode(&value, true)); + items.push(uri_encode(key, true) + "=" + &uri_encode(value, true)); } items.sort(); items.join("&") @@ -380,7 +385,7 @@ pub fn parse_date(date: &str) -> Result, Error> { Ok(Utc.from_utc_datetime(&date)) } -pub async fn verify_v4( +pub fn verify_v4( garage: &Garage, service: &str, auth: &Authorization, @@ -393,12 +398,18 @@ pub async fn verify_v4( let key = garage .key_table - .get(&EmptyKey, &auth.key_id) - .await? + .get_local(&EmptyKey, &auth.key_id)? .filter(|k| !k.state.is_deleted()) .ok_or_else(|| Error::forbidden(format!("No such key: {}", &auth.key_id)))?; let key_p = key.params().unwrap(); + if key_p.is_expired(now_msec()) { + return Err(Error::forbidden(format!( + "Access key {} has expired", + key.key_id + ))); + } + let mut hmac = signing_hmac( &auth.date, &key_p.secret_key, diff --git a/src/api/common/signature/streaming.rs b/src/api/common/signature/streaming.rs index 64362727..06b551eb 100644 --- a/src/api/common/signature/streaming.rs +++ b/src/api/common/signature/streaming.rs @@ -60,7 +60,7 @@ pub fn parse_streaming_body( request_trailer_checksum_algorithm(req.headers())? .ok_or_bad_request("Missing x-amz-trailer header")?, ); - checksummer = checksummer.add(algo); + checksummer = checksummer.add_algorithm(algo); algo } else { None diff --git a/src/api/k2v/Cargo.toml b/src/api/k2v/Cargo.toml index 28f74ea3..d99c3411 100644 --- a/src/api/k2v/Cargo.toml +++ b/src/api/k2v/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_api_k2v" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -14,9 +14,9 @@ path = "lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -garage_model = { workspace = true, features = [ "k2v" ] } +garage_model = { workspace = true, features = ["k2v"] } garage_table.workspace = true -garage_util = { workspace = true, features = [ "k2v" ] } +garage_util = { workspace = true, features = ["k2v"] } garage_api_common.workspace = true base64.workspace = true diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 8e10d9a6..8c89c35d 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::sync::Arc; use hyper::{body::Incoming as IncomingBody, Method, Request, Response}; @@ -76,25 +77,19 @@ impl ApiHandler for K2VApiServer { // The OPTIONS method is processed early, before we even check for an API key if let Endpoint::Options = endpoint { let options_res = handle_options_api(garage, &req, Some(bucket_name)) - .await .ok_or_bad_request("Error handling OPTIONS")?; return Ok(options_res.map(|_empty_body: EmptyBody| empty_body())); } - let verified_request = verify_request(&garage, req, "k2v").await?; + let verified_request = verify_request(&garage, req, "k2v")?; let req = verified_request.request; let api_key = verified_request.access_key; - let bucket_id = garage - .bucket_helper() - .resolve_bucket(&bucket_name, &api_key) - .await - .map_err(pass_helper_error)?; let bucket = garage .bucket_helper() - .get_existing_bucket(bucket_id) - .await - .map_err(helper_error_as_internal)?; + .resolve_bucket_fast(&bucket_name, &api_key) + .map_err(pass_helper_error)?; + let bucket_id = bucket.id; let bucket_params = bucket.state.into_option().unwrap(); let allowed = match endpoint.authorization_type() { @@ -185,8 +180,8 @@ impl ApiHandler for K2VApiServer { } impl ApiEndpoint for K2VApiEndpoint { - fn name(&self) -> &'static str { - self.endpoint.name() + fn name(&self) -> Cow<'static, str> { + Cow::Borrowed(self.endpoint.name()) } fn add_span_attributes(&self, span: SpanRef<'_>) { diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 7a03d836..5f38cce1 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -61,7 +61,7 @@ pub async fn handle_read_batch( resps.push(resp?); } - Ok(json_ok_response(&resps)?) + json_ok_response(&resps) } async fn handle_read_batch_query( @@ -155,7 +155,7 @@ pub async fn handle_delete_batch( resps.push(resp?); } - Ok(json_ok_response(&resps)?) + json_ok_response(&resps) } async fn handle_delete_batch_query( diff --git a/src/api/k2v/error.rs b/src/api/k2v/error.rs index f1937fe5..7ce3b073 100644 --- a/src/api/k2v/error.rs +++ b/src/api/k2v/error.rs @@ -2,8 +2,8 @@ use hyper::header::HeaderValue; use hyper::{HeaderMap, StatusCode}; use thiserror::Error; +pub(crate) use garage_api_common::common_error::pass_helper_error; use garage_api_common::common_error::{commonErrorDerivative, CommonError}; -pub(crate) use garage_api_common::common_error::{helper_error_as_internal, pass_helper_error}; pub use garage_api_common::common_error::{ CommonErrorDerivative, OkOrBadRequest, OkOrInternalError, }; @@ -99,6 +99,7 @@ impl ApiError for Error { fn add_http_headers(&self, header_map: &mut HeaderMap) { use hyper::header; header_map.append(header::CONTENT_TYPE, "application/json".parse().unwrap()); + header_map.append(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*".parse().unwrap()); } fn http_body(&self, garage_region: &str, path: &str) -> ErrorBody { diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index fbfaad98..5188c32f 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -28,12 +28,12 @@ pub async fn handle_read_index( let node_id_vec = garage .system .cluster_layout() - .all_nongateway_nodes() + .all_nongateway_nodes()? .to_vec(); let (partition_keys, more, next_start) = read_range( &garage.k2v.counter_table.table, - &bucket_id, + bucket_id, &prefix, &start, &end, @@ -66,7 +66,7 @@ pub async fn handle_read_index( bytes: *vals.get(&s_bytes).unwrap_or(&0), } }) - .collect::>(), + .collect(), more, next_start, }; diff --git a/src/api/s3/Cargo.toml b/src/api/s3/Cargo.toml index 88630866..e12df677 100644 --- a/src/api/s3/Cargo.toml +++ b/src/api/s3/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_api_s3" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -27,10 +27,10 @@ async-compression.workspace = true base64.workspace = true bytes.workspace = true chrono.workspace = true -crc32fast.workspace = true -crc32c.workspace = true +crc-fast.workspace = true thiserror.workspace = true hex.workspace = true +hmac.workspace = true tracing.workspace = true md-5.workspace = true pin-project.workspace = true diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index acb0cf56..fba32ec9 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::sync::Arc; use hyper::header; @@ -117,11 +118,11 @@ impl ApiHandler for S3ApiServer { return handle_post_object(garage, req, bucket_name.unwrap()).await; } if let Endpoint::Options = endpoint { - let options_res = handle_options_api(garage, &req, bucket_name).await?; + let options_res = handle_options_api(garage, &req, bucket_name)?; return Ok(options_res.map(|_empty_body: EmptyBody| empty_body())); } - let verified_request = verify_request(&garage, req, "s3").await?; + let verified_request = verify_request(&garage, req, "s3")?; let req = verified_request.request; let api_key = verified_request.access_key; @@ -139,15 +140,11 @@ impl ApiHandler for S3ApiServer { return handle_create_bucket(&garage, req, &api_key.key_id, bucket_name).await; } - let bucket_id = garage - .bucket_helper() - .resolve_bucket(&bucket_name, &api_key) - .await - .map_err(pass_helper_error)?; let bucket = garage .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; + .resolve_bucket_fast(&bucket_name, &api_key) + .map_err(pass_helper_error)?; + let bucket_id = bucket.id; let bucket_params = bucket.state.into_option().unwrap(); let allowed = match endpoint.authorization_type() { @@ -352,8 +349,8 @@ impl ApiHandler for S3ApiServer { } impl ApiEndpoint for S3ApiEndpoint { - fn name(&self) -> &'static str { - self.endpoint.name() + fn name(&self) -> Cow<'static, str> { + Cow::Borrowed(self.endpoint.name()) } fn add_span_attributes(&self, span: SpanRef<'_>) { diff --git a/src/api/s3/bucket.rs b/src/api/s3/bucket.rs index 55caa6c8..b84a06bb 100644 --- a/src/api/s3/bucket.rs +++ b/src/api/s3/bucket.rs @@ -57,23 +57,23 @@ pub fn handle_get_bucket_acl(ctx: ReqCtx) -> Result, Error> { if kp.allow_owner { grants.push(s3_xml::Grant { - grantee: create_grantee(&key_p, &api_key), + grantee: create_grantee(key_p, &api_key), permission: s3_xml::Value("FULL_CONTROL".to_string()), }); } else { if kp.allow_read { grants.push(s3_xml::Grant { - grantee: create_grantee(&key_p, &api_key), + grantee: create_grantee(key_p, &api_key), permission: s3_xml::Value("READ".to_string()), }); grants.push(s3_xml::Grant { - grantee: create_grantee(&key_p, &api_key), + grantee: create_grantee(key_p, &api_key), permission: s3_xml::Value("READ_ACP".to_string()), }); } if kp.allow_write { grants.push(s3_xml::Grant { - grantee: create_grantee(&key_p, &api_key), + grantee: create_grantee(key_p, &api_key), permission: s3_xml::Value("WRITE".to_string()), }); } @@ -192,21 +192,16 @@ pub async fn handle_create_bucket( let api_key = helper.key().get_existing_key(api_key_id).await?; let key_params = api_key.params().unwrap(); - let existing_bucket = if let Some(Some(bucket_id)) = key_params.local_aliases.get(&bucket_name) - { - Some(*bucket_id) - } else { - helper - .bucket() - .resolve_global_bucket_name(&bucket_name) - .await? - }; + let existing_bucket = helper + .bucket() + .resolve_bucket(&bucket_name, &api_key.key_id) + .await?; - if let Some(bucket_id) = existing_bucket { + if let Some(bucket) = existing_bucket { // Check we have write or owner permission on the bucket, // in that case it's fine, return 200 OK, bucket exists; // otherwise return a forbidden error. - let kp = api_key.bucket_permissions(&bucket_id); + let kp = api_key.bucket_permissions(&bucket.id); if !(kp.allow_write || kp.allow_owner) { return Err(CommonError::BucketAlreadyExists.into()); } diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index 47a63c82..7ee1847d 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -24,7 +24,7 @@ use garage_api_common::helpers::*; use garage_api_common::signature::checksum::*; use crate::api_server::{ReqBody, ResBody}; -use crate::encryption::EncryptionParams; +use crate::encryption::{EncryptionParams, OekDerivationInfo}; use crate::error::*; use crate::get::{check_version_not_deleted, full_object_byte_stream, PreconditionHeaders}; use crate::multipart; @@ -66,11 +66,37 @@ pub async fn handle_copy( &ctx.garage, req.headers(), &source_version_meta.encryption, + OekDerivationInfo::for_object(&source_object, source_version), )?; - let dest_encryption = EncryptionParams::new_from_headers(&ctx.garage, req.headers())?; + let dest_uuid = gen_uuid(); + let dest_encryption = EncryptionParams::new_from_headers( + &ctx.garage, + req.headers(), + OekDerivationInfo { + bucket_id: ctx.bucket_id, + version_id: dest_uuid, + object_key: dest_key, + }, + )?; + + let was_multipart = source_version_meta.etag.contains('-') // HACK + || source_object_meta_inner.checksum_type == Some(ChecksumType::Composite); // Extract source checksum info before source_object_meta_inner is consumed let source_checksum = source_object_meta_inner.checksum; + let source_checksum_type = match (source_object_meta_inner.checksum_type, source_checksum) { + (Some(ct), _) => Some(ct), + (None, Some(_)) => { + // Migrated object from garage v1.x or older + // determine checksum type depending if this is a multipart upload or not + if was_multipart { + Some(ChecksumType::Composite) + } else { + Some(ChecksumType::FullObject) + } + } + (None, None) => None, + }; let source_checksum_algorithm = source_checksum.map(|x| x.algorithm()); // If source object has a checksum, the destination object must as well. @@ -79,7 +105,6 @@ pub async fn handle_copy( let checksum_algorithm = checksum_algorithm.or(source_checksum_algorithm); // Determine metadata of destination object - let was_multipart = source_version_meta.etag.contains('-'); let dest_object_meta = ObjectVersionMetaInner { headers: match req.headers().get("x-amz-metadata-directive") { Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => { @@ -99,6 +124,7 @@ pub async fn handle_copy( } }, checksum: source_checksum, + checksum_type: source_checksum_type, }; // Do actual object copying @@ -118,40 +144,53 @@ pub async fn handle_copy( // See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html let must_recopy = !EncryptionParams::is_same(&source_encryption, &dest_encryption) - || source_checksum_algorithm != checksum_algorithm - || (was_multipart && checksum_algorithm.is_some()); + || (checksum_algorithm.is_some() + && (was_multipart || checksum_algorithm != source_checksum_algorithm)); let res = if !must_recopy { + let dest_info = DestInfo { + key: dest_key, + uuid: dest_uuid, + object_meta: dest_object_meta, + encryption: dest_encryption, + }; + // In most cases, we can just copy the metadata and link blocks of the // old object from the new object. handle_copy_metaonly( ctx, - dest_key, - dest_object_meta, - dest_encryption, + dest_info, source_version, source_version_data, source_version_meta, ) .await? } else { - let expected_checksum = ExpectedChecksums { - md5: None, - sha256: None, - extra: source_checksum, - }; let checksum_mode = if was_multipart || source_checksum_algorithm != checksum_algorithm { ChecksumMode::Calculate(checksum_algorithm) } else { - ChecksumMode::Verify(&expected_checksum) + ChecksumMode::Verify(ExpectedChecksums { + md5: None, + sha256: None, + extra: source_checksum, + }) + }; + // For multipart uploads that had a composite checksum, set checksum type + // to full object as it will be recalculated. + let dest_object_meta = ObjectVersionMetaInner { + checksum_type: checksum_algorithm.map(|_| ChecksumType::FullObject), + ..dest_object_meta + }; + + let dest_info = DestInfo { + key: dest_key, + uuid: dest_uuid, + object_meta: dest_object_meta, + encryption: dest_encryption, }; - // If source and dest encryption use different keys, - // we must decrypt content and re-encrypt, so rewrite all data blocks. handle_copy_reencrypt( ctx, - dest_key, - dest_object_meta, - dest_encryption, + dest_info, source_version, source_version_data, source_encryption, @@ -178,11 +217,16 @@ pub async fn handle_copy( Ok(resp.body(string_body(xml))?) } +struct DestInfo<'a> { + key: &'a str, + uuid: Uuid, + object_meta: ObjectVersionMetaInner, + encryption: EncryptionParams, +} + async fn handle_copy_metaonly( ctx: ReqCtx, - dest_key: &str, - dest_object_meta: ObjectVersionMetaInner, - dest_encryption: EncryptionParams, + dest_info: DestInfo<'_>, source_version: &ObjectVersion, source_version_data: &ObjectVersionData, source_version_meta: &ObjectVersionMeta, @@ -194,17 +238,16 @@ async fn handle_copy_metaonly( } = ctx; // Generate parameters for copied object - let new_uuid = gen_uuid(); let new_timestamp = now_msec(); let new_meta = ObjectVersionMeta { - encryption: dest_encryption.encrypt_meta(dest_object_meta)?, + encryption: dest_info.encryption.encrypt_meta(dest_info.object_meta)?, size: source_version_meta.size, etag: source_version_meta.etag.clone(), }; let res = SaveStreamResult { - version_uuid: new_uuid, + version_uuid: dest_info.uuid, version_timestamp: new_timestamp, etag: new_meta.etag.clone(), }; @@ -216,7 +259,7 @@ async fn handle_copy_metaonly( // bytes is either plaintext before&after or encrypted with the // same keys, so it's ok to just copy it as is let dest_object_version = ObjectVersion { - uuid: new_uuid, + uuid: dest_info.uuid, timestamp: new_timestamp, state: ObjectVersionState::Complete(ObjectVersionData::Inline( new_meta, @@ -225,7 +268,7 @@ async fn handle_copy_metaonly( }; let dest_object = Object::new( dest_bucket_id, - dest_key.to_string(), + dest_info.key.to_string(), vec![dest_object_version], ); garage.object_table.insert(&dest_object).await?; @@ -243,7 +286,7 @@ async fn handle_copy_metaonly( // This holds a reference to the object in the Version table // so that it won't be deleted, e.g. by repair_versions. let tmp_dest_object_version = ObjectVersion { - uuid: new_uuid, + uuid: dest_info.uuid, timestamp: new_timestamp, state: ObjectVersionState::Uploading { encryption: new_meta.encryption.clone(), @@ -253,20 +296,22 @@ async fn handle_copy_metaonly( }; let tmp_dest_object = Object::new( dest_bucket_id, - dest_key.to_string(), + dest_info.key.to_string(), vec![tmp_dest_object_version], ); garage.object_table.insert(&tmp_dest_object).await?; + let dest_uuid = dest_info.uuid; + // Write version in the version table. Even with empty block list, // this means that the BlockRef entries linked to this version cannot be // marked as deleted (they are marked as deleted only if the Version // doesn't exist or is marked as deleted). let mut dest_version = Version::new( - new_uuid, + dest_uuid, VersionBacklink::Object { bucket_id: dest_bucket_id, - key: dest_key.to_string(), + key: dest_info.key.to_string(), }, false, ); @@ -282,7 +327,7 @@ async fn handle_copy_metaonly( .iter() .map(|b| BlockRef { block: b.1.hash, - version: new_uuid, + version: dest_uuid, deleted: false.into(), }) .collect::>(); @@ -298,7 +343,7 @@ async fn handle_copy_metaonly( // with the stuff before, the block's reference counts could be decremented before // they are incremented again for the new version, leading to data being deleted. let dest_object_version = ObjectVersion { - uuid: new_uuid, + uuid: dest_info.uuid, timestamp: new_timestamp, state: ObjectVersionState::Complete(ObjectVersionData::FirstBlock( new_meta, @@ -307,7 +352,7 @@ async fn handle_copy_metaonly( }; let dest_object = Object::new( dest_bucket_id, - dest_key.to_string(), + dest_info.key.to_string(), vec![dest_object_version], ); garage.object_table.insert(&dest_object).await?; @@ -319,13 +364,11 @@ async fn handle_copy_metaonly( async fn handle_copy_reencrypt( ctx: ReqCtx, - dest_key: &str, - dest_object_meta: ObjectVersionMetaInner, - dest_encryption: EncryptionParams, + dest_info: DestInfo<'_>, source_version: &ObjectVersion, source_version_data: &ObjectVersionData, source_encryption: EncryptionParams, - checksum_mode: ChecksumMode<'_>, + checksum_mode: ChecksumMode, ) -> Result { // basically we will read the source data (decrypt if necessary) // and save that in a new object (encrypt if necessary), @@ -339,10 +382,11 @@ async fn handle_copy_reencrypt( save_stream( &ctx, - dest_object_meta, - dest_encryption, + dest_info.uuid, + dest_info.object_meta, + dest_info.encryption, source_stream.map_err(|e| Error::from(GarageError::from(e))), - &dest_key.to_string(), + &dest_info.key.to_string(), checksum_mode, ) .await @@ -362,7 +406,7 @@ pub async fn handle_upload_part_copy( let dest_upload_id = multipart::decode_upload_id(upload_id)?; let dest_key = dest_key.to_string(); - let (source_object, (_, dest_version, mut dest_mpu)) = futures::try_join!( + let (source_object, (dest_object, dest_version, mut dest_mpu)) = futures::try_join!( get_copy_source(&ctx, req), multipart::get_upload(&ctx, &dest_key, &dest_upload_id) )?; @@ -380,7 +424,10 @@ pub async fn handle_upload_part_copy( &garage, req.headers(), &source_version_meta.encryption, + OekDerivationInfo::for_object(&source_object, source_object_version), )?; + + let dest_oek_params = OekDerivationInfo::for_object(&dest_object, &dest_version); let (dest_object_encryption, dest_object_checksum_algorithm) = match dest_version.state { ObjectVersionState::Uploading { encryption, @@ -389,8 +436,12 @@ pub async fn handle_upload_part_copy( } => (encryption, checksum_algorithm), _ => unreachable!(), }; - let (dest_encryption, _) = - EncryptionParams::check_decrypt(&garage, req.headers(), &dest_object_encryption)?; + let (dest_encryption, _) = EncryptionParams::check_decrypt( + &garage, + req.headers(), + &dest_object_encryption, + dest_oek_params, + )?; let same_encryption = EncryptionParams::is_same(&source_encryption, &dest_encryption); // Check source range is valid @@ -505,7 +556,7 @@ pub async fn handle_upload_part_copy( // Now, actually copy the blocks let mut checksummer = Checksummer::init(&Default::default(), !dest_encryption.is_encrypted()) - .add(dest_object_checksum_algorithm); + .add_algorithm(dest_object_checksum_algorithm.map(|(algo, _)| algo)); // First, create a stream that is able to read the source blocks // and extract the subrange if necessary. @@ -655,7 +706,7 @@ pub async fn handle_upload_part_copy( let checksums = checksummer.finalize(); let etag = dest_encryption.etag_from_md5(&checksums.md5); - let checksum = checksums.extract(dest_object_checksum_algorithm); + let checksum = checksums.extract(dest_object_checksum_algorithm.map(|(algo, _)| algo)); // Put the part's ETag in the Versiontable dest_mpu.parts.put( @@ -695,16 +746,15 @@ async fn get_copy_source(ctx: &ReqCtx, req: &Request) -> Result) -> Result Result, Error> { .body(string_body(xml))?) } else { Ok(Response::builder() - .status(StatusCode::NO_CONTENT) + .status(StatusCode::NOT_FOUND) .body(empty_body())?) } } @@ -88,9 +88,7 @@ pub async fn handle_put_cors( pub struct CorsConfiguration { #[serde(serialize_with = "xmlns_tag", skip_deserializing)] pub xmlns: (), - // "default" is required to be able to parse an empty list of rules, - // cf https://docs.rs/quick-xml/latest/quick_xml/de/#sequences-xsall-and-xssequence-xml-schema-types - #[serde(rename = "CORSRule", default)] + #[serde(rename = "CORSRule")] pub cors_rules: Vec, } @@ -272,26 +270,4 @@ mod tests { Ok(()) } - - #[test] - fn test_deserialize_norules() -> Result<(), Error> { - let message = r#" -"#; - let conf: CorsConfiguration = from_str(message).unwrap(); - let ref_value = CorsConfiguration { - xmlns: (), - cors_rules: vec![], - }; - assert_eq! { - ref_value, - conf - }; - - let message2 = to_xml_with_header(&ref_value)?; - - let cleanup = |c: &str| c.replace(char::is_whitespace, ""); - assert_eq!(cleanup(message), cleanup(&message2)); - - Ok(()) - } } diff --git a/src/api/s3/delete.rs b/src/api/s3/delete.rs index d785b9d8..0ef1bf33 100644 --- a/src/api/s3/delete.rs +++ b/src/api/s3/delete.rs @@ -29,7 +29,7 @@ async fn handle_delete_internal(ctx: &ReqCtx, key: &str) -> Result<(Uuid, Uuid), .iter() .rev() .find(|v| !matches!(&v.state, ObjectVersionState::Aborted)) - .or_else(|| object.versions().iter().rev().next()); + .or_else(|| object.versions().iter().next_back()); let deleted_version = match deleted_version { Some(dv) => dv.uuid, None => { @@ -139,11 +139,7 @@ fn parse_delete_objects_xml(xml: &roxmltree::Document) -> Option key: key_str.to_string(), }); } else if item.has_tag_name("Quiet") { - if item.text()? == "true" { - ret.quiet = true; - } else { - ret.quiet = false; - } + ret.quiet = item.text()? == "true"; } else { return None; } diff --git a/src/api/s3/encryption.rs b/src/api/s3/encryption.rs index fa7285ca..0312754e 100644 --- a/src/api/s3/encryption.rs +++ b/src/api/s3/encryption.rs @@ -11,6 +11,7 @@ use aes_gcm::{ }; use base64::prelude::*; use bytes::Bytes; +use sha2::Sha256; use futures::stream::Stream; use futures::task; @@ -21,12 +22,12 @@ use http::header::{HeaderMap, HeaderName, HeaderValue}; use garage_net::bytes_buf::BytesBuf; use garage_net::stream::{stream_asyncread, ByteStream}; use garage_rpc::rpc_helper::OrderTag; -use garage_util::data::Hash; +use garage_util::data::{Hash, Uuid}; use garage_util::error::Error as GarageError; use garage_util::migrate::Migrate; use garage_model::garage::Garage; -use garage_model::s3::object_table::{ObjectVersionEncryption, ObjectVersionMetaInner}; +use garage_model::s3::object_table::*; use garage_api_common::common_error::*; use garage_api_common::signature::checksum::Md5Checksum; @@ -64,32 +65,42 @@ const STREAM_ENC_CYPER_CHUNK_SIZE: usize = STREAM_ENC_PLAIN_CHUNK_SIZE + 16; pub enum EncryptionParams { Plaintext, SseC { + /// the value of x-amz-server-side-encryption-customer-key client_key: Key, + /// the value of x-amz-server-side-encryption-customer-key-md5 client_key_md5: Md5Output, + /// the object encryption key, for uploads created in garage v2+ + object_key: Option>, + /// the compression level used for compressing data blocks compression_level: Option, }, } +#[derive(Clone, Copy)] +pub struct OekDerivationInfo<'a> { + pub bucket_id: Uuid, + pub version_id: Uuid, + pub object_key: &'a str, +} + impl EncryptionParams { pub fn is_encrypted(&self) -> bool { !matches!(self, Self::Plaintext) } pub fn is_same(a: &Self, b: &Self) -> bool { - let relevant_info = |x: &Self| match x { - Self::Plaintext => None, - Self::SseC { - client_key, - compression_level, - .. - } => Some((*client_key, compression_level.is_some())), - }; - relevant_info(a) == relevant_info(b) + // This function is used in CopyObject and UploadPartCopy to determine + // whether the object must be re-encrypted. If this returns true, + // data blocks are reused as-is. Since Garage v2, we are using + // object-specific encryption keys, so we know that if both source + // and destination are encrypted, it can't be with the same key. + matches!((a, b), (Self::Plaintext, Self::Plaintext)) } pub fn new_from_headers( garage: &Garage, headers: &HeaderMap, + oek_info: OekDerivationInfo<'_>, ) -> Result { let key = parse_request_headers( headers, @@ -101,6 +112,7 @@ impl EncryptionParams { Some((client_key, client_key_md5)) => Ok(EncryptionParams::SseC { client_key, client_key_md5, + object_key: Some(oek_info.derive_oek(&client_key)), compression_level: garage.config.compression_level, }), None => Ok(EncryptionParams::Plaintext), @@ -109,7 +121,7 @@ impl EncryptionParams { pub fn add_response_headers(&self, resp: &mut http::response::Builder) { if let Self::SseC { client_key_md5, .. } = self { - let md5 = BASE64_STANDARD.encode(&client_key_md5); + let md5 = BASE64_STANDARD.encode(client_key_md5); resp.headers_mut().unwrap().insert( X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM, @@ -126,6 +138,7 @@ impl EncryptionParams { garage: &Garage, headers: &HeaderMap, obj_enc: &'a ObjectVersionEncryption, + oek_info: OekDerivationInfo<'_>, ) -> Result<(Self, Cow<'a, ObjectVersionMetaInner>), Error> { let key = parse_request_headers( headers, @@ -133,13 +146,14 @@ impl EncryptionParams { &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY, &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5, )?; - Self::check_decrypt_common(garage, key, obj_enc) + Self::check_decrypt_common(garage, key, obj_enc, oek_info) } pub fn check_decrypt_for_copy_source<'a>( garage: &Garage, headers: &HeaderMap, obj_enc: &'a ObjectVersionEncryption, + oek_info: OekDerivationInfo<'_>, ) -> Result<(Self, Cow<'a, ObjectVersionMetaInner>), Error> { let key = parse_request_headers( headers, @@ -147,29 +161,39 @@ impl EncryptionParams { &X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY, &X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5, )?; - Self::check_decrypt_common(garage, key, obj_enc) + Self::check_decrypt_common(garage, key, obj_enc, oek_info) } fn check_decrypt_common<'a>( garage: &Garage, key: Option<(Key, Md5Output)>, obj_enc: &'a ObjectVersionEncryption, + oek_info: OekDerivationInfo<'_>, ) -> Result<(Self, Cow<'a, ObjectVersionMetaInner>), Error> { match (key, &obj_enc) { ( Some((client_key, client_key_md5)), - ObjectVersionEncryption::SseC { inner, compressed }, + ObjectVersionEncryption::SseC { + inner, + compressed, + use_oek, + }, ) => { let enc = Self::SseC { client_key, client_key_md5, + object_key: if *use_oek { + Some(oek_info.derive_oek(&client_key)) + } else { + None + }, compression_level: if *compressed { Some(garage.config.compression_level.unwrap_or(1)) } else { None }, }; - let plaintext = enc.decrypt_blob(&inner)?; + let plaintext = enc.decrypt_blob(inner)?; let inner = ObjectVersionMetaInner::decode(&plaintext) .ok_or_internal_error("Could not decode encrypted metadata")?; Ok((enc, Cow::Owned(inner))) @@ -193,13 +217,16 @@ impl EncryptionParams { ) -> Result { match self { Self::SseC { - compression_level, .. + compression_level, + object_key, + .. } => { let plaintext = meta.encode().map_err(GarageError::from)?; let ciphertext = self.encrypt_blob(&plaintext)?; Ok(ObjectVersionEncryption::SseC { inner: ciphertext.into_owned(), compressed: compression_level.is_some(), + use_oek: object_key.is_some(), }) } Self::Plaintext => Ok(ObjectVersionEncryption::Plaintext { inner: meta }), @@ -218,7 +245,7 @@ impl EncryptionParams { // So we just put some random bytes. let mut random = [0u8; 16]; OsRng.fill_bytes(&mut random); - hex::encode(&random) + hex::encode(random) } } } @@ -228,24 +255,37 @@ impl EncryptionParams { // This is used for encrypting object metadata and inlined data for small objects. // This does not compress anything. - pub fn encrypt_blob<'a>(&self, blob: &'a [u8]) -> Result, Error> { + fn cipher(&self) -> Option { match self { - Self::SseC { client_key, .. } => { - let cipher = Aes256Gcm::new(&client_key); + Self::SseC { + object_key: Some(oek), + .. + } => Some(Aes256Gcm::new(oek)), + Self::SseC { + client_key, + object_key: None, + .. + } => Some(Aes256Gcm::new(client_key)), + Self::Plaintext => None, + } + } + + pub fn encrypt_blob<'a>(&self, blob: &'a [u8]) -> Result, Error> { + match self.cipher() { + Some(cipher) => { let nonce = Aes256Gcm::generate_nonce(&mut OsRng); let ciphertext = cipher .encrypt(&nonce, blob) .ok_or_internal_error("Encryption failed")?; Ok(Cow::Owned([nonce.to_vec(), ciphertext].concat())) } - Self::Plaintext => Ok(Cow::Borrowed(blob)), + None => Ok(Cow::Borrowed(blob)), } } pub fn decrypt_blob<'a>(&self, blob: &'a [u8]) -> Result, Error> { - match self { - Self::SseC { client_key, .. } => { - let cipher = Aes256Gcm::new(&client_key); + match self.cipher() { + Some(cipher) => { let nonce_size = ::NonceSize::to_usize(); let nonce = Nonce::from_slice( blob.get(..nonce_size) @@ -258,7 +298,7 @@ impl EncryptionParams { )?; Ok(Cow::Owned(plaintext)) } - Self::Plaintext => Ok(Cow::Borrowed(blob)), + None => Ok(Cow::Borrowed(blob)), } } @@ -284,10 +324,12 @@ impl EncryptionParams { Self::Plaintext => stream, Self::SseC { client_key, + object_key, compression_level, .. } => { - let plaintext = DecryptStream::new(stream, *client_key); + let key = object_key.as_ref().unwrap_or(client_key); + let plaintext = DecryptStream::new(stream, *key); if compression_level.is_some() { let reader = stream_asyncread(Box::pin(plaintext)); let reader = BufReader::new(reader); @@ -307,9 +349,12 @@ impl EncryptionParams { Self::Plaintext => Ok(block), Self::SseC { client_key, + object_key, compression_level, .. } => { + let key = object_key.as_ref().unwrap_or(client_key); + let block = if let Some(level) = compression_level { Cow::Owned( garage_block::zstd_encode(block.as_ref(), *level) @@ -325,7 +370,7 @@ impl EncryptionParams { OsRng.fill_bytes(&mut nonce); ret.extend_from_slice(nonce.as_slice()); - let mut cipher = EncryptorLE31::::new(&client_key, &nonce); + let mut cipher = EncryptorLE31::::new(key, &nonce); let mut iter = block.chunks(STREAM_ENC_PLAIN_CHUNK_SIZE).peekable(); if iter.peek().is_none() { @@ -361,6 +406,13 @@ impl EncryptionParams { } } +pub fn has_encryption_header(headers: &HeaderMap) -> bool { + match headers.get(X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM) { + Some(h) => h.as_bytes() == CUSTOMER_ALGORITHM_AES256, + None => false, + } +} + fn parse_request_headers( headers: &HeaderMap, alg_header: &HeaderName, @@ -378,7 +430,7 @@ fn parse_request_headers( let key_b64 = key.ok_or_bad_request("Missing server-side-encryption-customer-key header")?; let key_bytes: [u8; 32] = BASE64_STANDARD - .decode(&key_b64) + .decode(key_b64) .ok_or_bad_request( "Invalid server-side-encryption-customer-key header: invalid base64", )? @@ -390,7 +442,7 @@ fn parse_request_headers( let md5_b64 = md5.ok_or_bad_request("Missing server-side-encryption-customer-key-md5 header")?; - let md5_bytes = BASE64_STANDARD.decode(&md5_b64).ok_or_bad_request( + let md5_bytes = BASE64_STANDARD.decode(md5_b64).ok_or_bad_request( "Invalid server-side-encryption-customer-key-md5 header: invalid bass64", )?; @@ -420,6 +472,30 @@ fn parse_request_headers( } } +impl<'a> OekDerivationInfo<'a> { + pub fn for_object<'b>(object: &'a Object, version: &'b ObjectVersion) -> Self { + Self { + bucket_id: object.bucket_id, + version_id: version.uuid, + object_key: &object.key, + } + } + + fn derive_oek(&self, client_key: &Key) -> Key { + use hmac::{Hmac, Mac}; + + // info = bucket_id + object_name + version_uuid + "garage-object-encryption-key" + // oek = hmac_sha256(ssec_key, info) + let mut hmac = as Mac>::new_from_slice(client_key.as_slice()) + .expect("create hmac-sha256"); + hmac.update(b"garage-object-encryption-key"); + hmac.update(self.bucket_id.as_slice()); + hmac.update(self.version_id.as_slice()); + hmac.update(self.object_key.as_bytes()); + hmac.finalize().into_bytes() + } +} + // ---- encrypt & decrypt streams ---- #[pin_project::pin_project] @@ -432,6 +508,7 @@ struct DecryptStream { state: DecryptStreamState, } +#[expect(clippy::large_enum_variant)] enum DecryptStreamState { Starting, Running(DecryptorLE31), @@ -468,7 +545,7 @@ impl Stream for DecryptStream { let nonce_size = StreamNonceSize::to_usize(); if let Some(nonce) = this.buf.take_exact(nonce_size) { let nonce = Nonce::from_slice(nonce.as_ref()); - *this.state = DecryptStreamState::Running(DecryptorLE31::new(&this.key, nonce)); + *this.state = DecryptStreamState::Running(DecryptorLE31::new(this.key, nonce)); break; } @@ -508,8 +585,7 @@ impl Stream for DecryptStream { if matches!(this.state, DecryptStreamState::Done) { if !this.buf.is_empty() { - return Poll::Ready(Some(Err(std::io::Error::new( - std::io::ErrorKind::Other, + return Poll::Ready(Some(Err(std::io::Error::other( "Decrypt: unexpected bytes after last encrypted chunk", )))); } @@ -543,10 +619,7 @@ impl Stream for DecryptStream { match res { Ok(bytes) if bytes.is_empty() => Poll::Ready(None), Ok(bytes) => Poll::Ready(Some(Ok(bytes.into()))), - Err(_) => Poll::Ready(Some(Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Decryption failed", - )))), + Err(_) => Poll::Ready(Some(Err(std::io::Error::other("Decryption failed")))), } } } @@ -569,6 +642,7 @@ mod tests { let enc = EncryptionParams::SseC { client_key: Aes256Gcm::generate_key(&mut OsRng), client_key_md5: Default::default(), // not needed + object_key: Some(Aes256Gcm::generate_key(&mut OsRng)), compression_level, }; diff --git a/src/api/s3/error.rs b/src/api/s3/error.rs index 64112084..f36078f3 100644 --- a/src/api/s3/error.rs +++ b/src/api/s3/error.rs @@ -182,6 +182,7 @@ impl ApiError for Error { use hyper::header; header_map.append(header::CONTENT_TYPE, "application/xml".parse().unwrap()); + header_map.append(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*".parse().unwrap()); #[allow(clippy::single_match)] match self { diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index a1e4ce10..5854bc80 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -31,7 +31,7 @@ use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AM use crate::api_server::ResBody; use crate::copy::*; -use crate::encryption::EncryptionParams; +use crate::encryption::{EncryptionParams, OekDerivationInfo}; use crate::error::*; const X_AMZ_MP_PARTS_COUNT: HeaderName = HeaderName::from_static("x-amz-mp-parts-count"); @@ -93,7 +93,7 @@ fn object_headers( /// Override headers according to specific query parameters, see /// section "Overriding response header values through the request" in -/// https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html +/// fn getobject_override_headers( overrides: GetObjectOverrides, resp: &mut http::response::Builder, @@ -124,7 +124,7 @@ fn handle_http_precondition( ) -> Result>, Error> { let precondition_headers = PreconditionHeaders::parse(req)?; - if let Some(status_code) = precondition_headers.check(&version, &version_meta.etag)? { + if let Some(status_code) = precondition_headers.check(version, &version_meta.etag)? { Ok(Some( Response::builder() .status(status_code) @@ -182,15 +182,19 @@ pub async fn handle_head_without_ctx( return Ok(res); } - let (encryption, headers) = - EncryptionParams::check_decrypt(&garage, req.headers(), &version_meta.encryption)?; + let (encryption, headers) = EncryptionParams::check_decrypt( + &garage, + req.headers(), + &version_meta.encryption, + OekDerivationInfo::for_object(&object, object_version), + )?; - let checksum_mode = checksum_mode(&req); + let checksum_mode = checksum_mode(req); - if let Some(pn) = part_number { + if let Some(part_number) = part_number { match version_data { ObjectVersionData::Inline(_, _) => { - if pn != 1 { + if part_number != 1 { return Err(Error::InvalidPart); } let bytes_len = version_meta.size; @@ -219,7 +223,7 @@ pub async fn handle_head_without_ctx( check_version_not_deleted(&version)?; let (part_offset, part_end) = - calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?; + calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?; Ok(object_headers( object_version, @@ -305,10 +309,23 @@ pub async fn handle_get_without_ctx( return Ok(res); } - let (enc, headers) = - EncryptionParams::check_decrypt(&garage, req.headers(), &last_v_meta.encryption)?; + let (enc, headers) = EncryptionParams::check_decrypt( + &garage, + req.headers(), + &last_v_meta.encryption, + OekDerivationInfo::for_object(&object, last_v), + )?; - let checksum_mode = checksum_mode(&req); + let checksum_mode = checksum_mode(req); + + let handle_get_info = HandleGetInfo { + garage, + version: last_v, + version_data: last_v_data, + version_meta: last_v_meta, + encryption: enc, + meta_inner: &headers, + }; match (part_number, parse_range_header(req, last_v_meta.size)?) { (Some(_), Some(_)) => Err(Error::bad_request( @@ -316,12 +333,7 @@ pub async fn handle_get_without_ctx( )), (Some(pn), None) => { handle_get_part( - garage, - last_v, - last_v_data, - last_v_meta, - enc, - &headers, + handle_get_info, pn, ChecksumMode { // TODO: for multipart uploads, checksums of each part should be stored @@ -334,12 +346,7 @@ pub async fn handle_get_without_ctx( } (None, Some(range)) => { handle_get_range( - garage, - last_v, - last_v_data, - last_v_meta, - enc, - &headers, + handle_get_info, range.start, range.start + range.length, ChecksumMode { @@ -351,26 +358,14 @@ pub async fn handle_get_without_ctx( ) .await } - (None, None) => { - handle_get_full( - garage, - last_v, - last_v_data, - last_v_meta, - enc, - &headers, - overrides, - checksum_mode, - ) - .await - } + (None, None) => handle_get_full(handle_get_info, overrides, checksum_mode).await, } } pub(crate) fn check_version_not_deleted(version: &Version) -> Result<(), Error> { if version.deleted.get() { // the version was deleted between when the object_table was consulted - // and now, this could mean the object was deleted, or overriden. + // and now, this could mean the object was deleted, or overridden. // Rather than say the key doesn't exist, return a transient error // to signal the client to try again. return Err(CommonError::InternalError(UtilError::Message( @@ -382,28 +377,37 @@ pub(crate) fn check_version_not_deleted(version: &Version) -> Result<(), Error> Ok(()) } -async fn handle_get_full( +struct HandleGetInfo<'a> { garage: Arc, - version: &ObjectVersion, - version_data: &ObjectVersionData, - version_meta: &ObjectVersionMeta, + version: &'a ObjectVersion, + version_data: &'a ObjectVersionData, + version_meta: &'a ObjectVersionMeta, encryption: EncryptionParams, - meta_inner: &ObjectVersionMetaInner, + meta_inner: &'a ObjectVersionMetaInner, +} + +async fn handle_get_full( + info: HandleGetInfo<'_>, overrides: GetObjectOverrides, checksum_mode: ChecksumMode, ) -> Result, Error> { let mut resp_builder = object_headers( - version, - version_meta, - &meta_inner, - encryption, + info.version, + info.version_meta, + info.meta_inner, + info.encryption, checksum_mode, ) - .header(CONTENT_LENGTH, format!("{}", version_meta.size)) + .header(CONTENT_LENGTH, format!("{}", info.version_meta.size)) .status(StatusCode::OK); getobject_override_headers(overrides, &mut resp_builder)?; - let stream = full_object_byte_stream(garage, version, version_data, encryption); + let stream = full_object_byte_stream( + info.garage, + info.version, + info.version_data, + info.encryption, + ); Ok(resp_builder.body(response_body_from_stream(stream))?) } @@ -483,12 +487,7 @@ pub fn full_object_byte_stream( } async fn handle_get_range( - garage: Arc, - version: &ObjectVersion, - version_data: &ObjectVersionData, - version_meta: &ObjectVersionMeta, - encryption: EncryptionParams, - meta_inner: &ObjectVersionMetaInner, + info: HandleGetInfo<'_>, begin: u64, end: u64, checksum_mode: ChecksumMode, @@ -496,18 +495,24 @@ async fn handle_get_range( // Here we do not use getobject_override_headers because we don't // want to add any overridden headers (those should not be added // when returning PARTIAL_CONTENT) - let resp_builder = object_headers(version, version_meta, meta_inner, encryption, checksum_mode) - .header(CONTENT_LENGTH, format!("{}", end - begin)) - .header( - CONTENT_RANGE, - format!("bytes {}-{}/{}", begin, end - 1, version_meta.size), - ) - .status(StatusCode::PARTIAL_CONTENT); + let resp_builder = object_headers( + info.version, + info.version_meta, + info.meta_inner, + info.encryption, + checksum_mode, + ) + .header(CONTENT_LENGTH, format!("{}", end - begin)) + .header( + CONTENT_RANGE, + format!("bytes {}-{}/{}", begin, end - 1, info.version_meta.size), + ) + .status(StatusCode::PARTIAL_CONTENT); - match &version_data { + match &info.version_data { ObjectVersionData::DeleteMarker => unreachable!(), ObjectVersionData::Inline(_meta, bytes) => { - let bytes = encryption.decrypt_blob(&bytes)?; + let bytes = info.encryption.decrypt_blob(bytes)?; if end as usize <= bytes.len() { let body = bytes_body(bytes[begin as usize..end as usize].to_vec().into()); Ok(resp_builder.body(body)?) @@ -518,46 +523,47 @@ async fn handle_get_range( } } ObjectVersionData::FirstBlock(_meta, _first_block_hash) => { - let version = garage + let version = info + .garage .version_table - .get(&version.uuid, &EmptyKey) + .get(&info.version.uuid, &EmptyKey) .await? .ok_or(Error::NoSuchKey)?; check_version_not_deleted(&version)?; - let body = - body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end); + let body = body_from_blocks_range( + info.garage, + info.encryption, + version.blocks.items(), + begin, + end, + ); Ok(resp_builder.body(body)?) } } } async fn handle_get_part( - garage: Arc, - object_version: &ObjectVersion, - version_data: &ObjectVersionData, - version_meta: &ObjectVersionMeta, - encryption: EncryptionParams, - meta_inner: &ObjectVersionMetaInner, + info: HandleGetInfo<'_>, part_number: u64, checksum_mode: ChecksumMode, ) -> Result, Error> { // Same as for get_range, no getobject_override_headers let resp_builder = object_headers( - object_version, - version_meta, - meta_inner, - encryption, + info.version, + info.version_meta, + info.meta_inner, + info.encryption, checksum_mode, ) .status(StatusCode::PARTIAL_CONTENT); - match version_data { + match info.version_data { ObjectVersionData::Inline(_, bytes) => { if part_number != 1 { return Err(Error::InvalidPart); } - let bytes = encryption.decrypt_blob(&bytes)?; - assert_eq!(bytes.len() as u64, version_meta.size); + let bytes = info.encryption.decrypt_blob(bytes)?; + assert_eq!(bytes.len() as u64, info.version_meta.size); Ok(resp_builder .header(CONTENT_LENGTH, format!("{}", bytes.len())) .header( @@ -568,9 +574,10 @@ async fn handle_get_part( .body(bytes_body(bytes.into_owned().into()))?) } ObjectVersionData::FirstBlock(_, _) => { - let version = garage + let version = info + .garage .version_table - .get(&object_version.uuid, &EmptyKey) + .get(&info.version.uuid, &EmptyKey) .await? .ok_or(Error::NoSuchKey)?; @@ -579,14 +586,19 @@ async fn handle_get_part( let (begin, end) = calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?; - let body = - body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end); + let body = body_from_blocks_range( + info.garage, + info.encryption, + version.blocks.items(), + begin, + end, + ); Ok(resp_builder .header(CONTENT_LENGTH, format!("{}", end - begin)) .header( CONTENT_RANGE, - format!("bytes {}-{}/{}", begin, end - 1, version_meta.size), + format!("bytes {}-{}/{}", begin, end - 1, info.version_meta.size), ) .header(X_AMZ_MP_PARTS_COUNT, format!("{}", version.n_parts()?)) .body(body)?) @@ -700,11 +712,7 @@ fn body_from_blocks_range( Some(None) } else { // The chunk has an intersection with the requested range - let start_in_chunk = if *chunk_offset > begin { - 0 - } else { - begin - *chunk_offset - }; + let start_in_chunk = begin.saturating_sub(*chunk_offset); let end_in_chunk = if *chunk_offset + chunk_len < end { chunk_len } else { @@ -765,10 +773,7 @@ fn error_stream_item(e: E) -> ByteStream { } fn std_error_from_read_error(e: E) -> std::io::Error { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("Error while reading object data: {}", e), - ) + std::io::Error::other(format!("Error while reading object data: {}", e)) } // ---- diff --git a/src/api/s3/list.rs b/src/api/s3/list.rs index 94c2c895..58b66388 100644 --- a/src/api/s3/list.rs +++ b/src/api/s3/list.rs @@ -17,7 +17,7 @@ use garage_api_common::encoding::*; use garage_api_common::helpers::*; use crate::api_server::{ReqBody, ResBody}; -use crate::encryption::EncryptionParams; +use crate::encryption::{EncryptionParams, OekDerivationInfo}; use crate::error::*; use crate::multipart as s3_multipart; use crate::xml as s3_xml; @@ -285,8 +285,16 @@ pub async fn handle_list_parts( ObjectVersionState::Uploading { encryption, .. } => encryption, _ => unreachable!(), }; - let encryption_res = - EncryptionParams::check_decrypt(&ctx.garage, req.headers(), &object_encryption); + let encryption_res = EncryptionParams::check_decrypt( + &ctx.garage, + req.headers(), + &object_encryption, + OekDerivationInfo { + bucket_id: ctx.bucket_id, + version_id: upload_id, + object_key: &query.key, + }, + ); let (info, next) = fetch_part_info(query, &mpu)?; @@ -316,25 +324,31 @@ pub async fn handle_list_parts( size: s3_xml::IntValue(part.size as i64), checksum_crc32: match &checksum { Some(ChecksumValue::Crc32(x)) => { - Some(s3_xml::Value(BASE64_STANDARD.encode(&x))) + Some(s3_xml::Value(BASE64_STANDARD.encode(x))) } _ => None, }, checksum_crc32c: match &checksum { Some(ChecksumValue::Crc32c(x)) => { - Some(s3_xml::Value(BASE64_STANDARD.encode(&x))) + Some(s3_xml::Value(BASE64_STANDARD.encode(x))) + } + _ => None, + }, + checksum_crc64nvme: match &checksum { + Some(ChecksumValue::Crc64Nvme(x)) => { + Some(s3_xml::Value(BASE64_STANDARD.encode(x))) } _ => None, }, checksum_sha1: match &checksum { Some(ChecksumValue::Sha1(x)) => { - Some(s3_xml::Value(BASE64_STANDARD.encode(&x))) + Some(s3_xml::Value(BASE64_STANDARD.encode(x))) } _ => None, }, checksum_sha256: match &checksum { Some(ChecksumValue::Sha256(x)) => { - Some(s3_xml::Value(BASE64_STANDARD.encode(&x))) + Some(s3_xml::Value(BASE64_STANDARD.encode(x))) } _ => None, }, @@ -584,7 +598,7 @@ impl ListObjectsQuery { Some("[") => Ok(RangeBegin::IncludingKey { key: String::from_utf8( BASE64_STANDARD - .decode(token[1..].as_bytes()) + .decode(&token.as_bytes()[1..]) .ok_or_bad_request("Invalid continuation token")?, )?, fallback_key: None, @@ -592,7 +606,7 @@ impl ListObjectsQuery { Some("]") => Ok(RangeBegin::AfterKey { key: String::from_utf8( BASE64_STANDARD - .decode(token[1..].as_bytes()) + .decode(&token.as_bytes()[1..]) .ok_or_bad_request("Invalid continuation token")?, )?, }), @@ -711,10 +725,7 @@ impl Accumulator { let object = objects.peek().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it."); // Check if this is a common prefix (requires a passed delimiter and its value in the key) - let pfx = match common_prefix(object, query) { - Some(p) => p, - None => return None, - }; + let pfx = common_prefix(object, query)?; assert!(pfx.starts_with(&query.prefix)); // Try to register this prefix @@ -988,6 +999,7 @@ mod tests { inner: ObjectVersionMetaInner { headers: vec![], checksum: None, + checksum_type: None, }, }, checksum_algorithm: None, @@ -1002,12 +1014,12 @@ mod tests { query.common.prefix = "a/".to_string(); assert_eq!( - common_prefix(objs.get(0).unwrap(), &query.common), + common_prefix(objs.first().unwrap(), &query.common), Some("a/b/") ); query.common.prefix = "a/b/".to_string(); - assert_eq!(common_prefix(objs.get(0).unwrap(), &query.common), None); + assert_eq!(common_prefix(objs.first().unwrap(), &query.common), None); } #[test] @@ -1028,7 +1040,7 @@ mod tests { #[test] fn test_extract_upload() { - let objs = vec![ + let objs = [ Object::new( bucket(), "b".to_string(), diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index d6eb26cb..fb246041 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -1,13 +1,12 @@ use std::collections::HashMap; -use std::convert::{TryFrom, TryInto}; -use std::hash::Hasher; +use std::convert::TryInto; use std::sync::Arc; use base64::prelude::*; -use crc32c::Crc32cHasher as Crc32c; -use crc32fast::Hasher as Crc32; +use crc_fast::{CrcAlgorithm, Digest as CrcDigest}; use futures::prelude::*; -use hyper::{Request, Response}; +use http::StatusCode; +use hyper::{header::HeaderValue, HeaderMap, Request, Response}; use md5::{Digest, Md5}; use sha1::Sha1; use sha2::Sha256; @@ -26,7 +25,7 @@ use garage_api_common::helpers::*; use garage_api_common::signature::checksum::*; use crate::api_server::{ReqBody, ResBody}; -use crate::encryption::EncryptionParams; +use crate::encryption::{has_encryption_header, EncryptionParams, OekDerivationInfo}; use crate::error::*; use crate::put::*; use crate::xml as s3_xml; @@ -44,7 +43,7 @@ pub async fn handle_create_multipart_upload( bucket_name, .. } = &ctx; - let existing_object = garage.object_table.get(&bucket_id, &key).await?; + let existing_object = garage.object_table.get(bucket_id, key).await?; let upload_id = gen_uuid(); let timestamp = next_timestamp(existing_object.as_ref()); @@ -53,13 +52,25 @@ pub async fn handle_create_multipart_upload( let meta = ObjectVersionMetaInner { headers, checksum: None, + checksum_type: None, }; // Determine whether object should be encrypted, and if so the key - let encryption = EncryptionParams::new_from_headers(&garage, req.headers())?; + let encryption = EncryptionParams::new_from_headers( + garage, + req.headers(), + OekDerivationInfo { + bucket_id: *bucket_id, + version_id: upload_id, + object_key: key, + }, + )?; let object_encryption = encryption.encrypt_meta(meta)?; - let checksum_algorithm = request_checksum_algorithm(req.headers())?; + let checksum_algorithm = request_checksum_algorithm_and_type( + req.headers(), + request_checksum_algorithm(req.headers())?, + )?; // Create object in object table let object_version = ObjectVersion { @@ -120,8 +131,7 @@ pub async fn handle_put_part( // Before we stream the body, configure the needed checksums. req_body.add_expected_checksums(expected_checksums.clone()); - // TODO: avoid parsing encryption headers twice... - if !EncryptionParams::new_from_headers(&garage, &req_head.headers)?.is_encrypted() { + if !has_encryption_header(&req_head.headers) { // For non-encrypted objects, we need to compute the md5sum in all cases // (even if content-md5 is not set), because it is used as an etag of the // part, which is in turn used in the etag computation of the whole object @@ -134,10 +144,11 @@ pub async fn handle_put_part( let mut chunker = StreamChunker::new(stream, garage.config.block_size); // Read first chuck, and at the same time try to get object to see if it exists - let ((_, object_version, mut mpu), first_block) = + let ((object, object_version, mut mpu), first_block) = futures::try_join!(get_upload(&ctx, &key, &upload_id), chunker.next(),)?; // Check encryption params + let oek_params = OekDerivationInfo::for_object(&object, &object_version); let (object_encryption, checksum_algorithm) = match object_version.state { ObjectVersionState::Uploading { encryption, @@ -147,7 +158,7 @@ pub async fn handle_put_part( _ => unreachable!(), }; let (encryption, _) = - EncryptionParams::check_decrypt(&garage, &req_head.headers, &object_encryption)?; + EncryptionParams::check_decrypt(garage, &req_head.headers, &object_encryption, oek_params)?; // Check object is valid and part can be accepted let first_block = first_block.ok_or_bad_request("Empty body")?; @@ -214,7 +225,7 @@ pub async fn handle_put_part( MpuPart { version: version_uuid, etag: Some(etag.clone()), - checksum: checksums.extract(checksum_algorithm), + checksum: checksums.extract(checksum_algorithm.map(|(algo, _)| algo)), size: Some(total_size), }, ); @@ -276,12 +287,23 @@ pub async fn handle_complete_multipart_upload( let (req_head, req_body) = req.into_parts(); let expected_checksum = request_checksum_value(&req_head.headers)?; + let req_checksum_algorithm = request_checksum_algorithm_and_type( + &req_head.headers, + expected_checksum.map(|x| x.algorithm()), + )?; + debug!( + "CompleteMultipartUpload expected checksum: {:?}, request checksum type: {:?}", + expected_checksum, req_checksum_algorithm + ); let body = req_body.collect().await?; let body_xml = roxmltree::Document::parse(std::str::from_utf8(&body)?)?; - let body_list_of_parts = parse_complete_multipart_upload_body(&body_xml) - .ok_or_bad_request("Invalid CompleteMultipartUpload XML")?; + let body_list_of_parts = + parse_complete_multipart_upload_body(&body_xml).ok_or_bad_request(format!( + "Invalid CompleteMultipartUpload XML:\n{}", + String::from_utf8_lossy(&body) + ))?; debug!( "CompleteMultipartUpload list of parts: {:?}", body_list_of_parts @@ -297,6 +319,7 @@ pub async fn handle_complete_multipart_upload( return Err(Error::bad_request("No data was uploaded")); } + let oek_params = OekDerivationInfo::for_object(&object, &object_version); let (object_encryption, checksum_algorithm) = match object_version.state { ObjectVersionState::Uploading { encryption, @@ -305,6 +328,17 @@ pub async fn handle_complete_multipart_upload( } => (encryption, checksum_algorithm), _ => unreachable!(), }; + debug!( + "CompleteMultipartUpload object checksum_algorithm: {:?}", + checksum_algorithm + ); + if req_checksum_algorithm.is_some() && req_checksum_algorithm != checksum_algorithm { + return Err(Error::InvalidDigest(format!( + "checksum algorithm {:?} does not correspond to algorithm specified in CreateMultipartUpload {:?}", + req_checksum_algorithm, + checksum_algorithm + ))); + } // Check that part numbers are an increasing sequence. // (it doesn't need to start at 1 nor to be a continuous sequence, @@ -330,8 +364,7 @@ pub async fn handle_complete_multipart_upload( for req_part in body_list_of_parts.iter() { match have_parts.get(&req_part.part_number) { Some(part) if part.etag.as_ref() == Some(&req_part.etag) && part.size.is_some() => { - // alternative version: if req_part.checksum.is_some() && part.checksum != req_part.checksum { - if part.checksum != req_part.checksum { + if req_part.checksum.is_some() && part.checksum != req_part.checksum { return Err(Error::InvalidDigest(format!( "Invalid checksum for part {}: in request = {:?}, uploaded part = {:?}", req_part.part_number, req_part.checksum, part.checksum @@ -390,7 +423,11 @@ pub async fn handle_complete_multipart_upload( // https://teppen.io/2018/06/23/aws_s3_etags/ let mut checksummer = MultipartChecksummer::init(checksum_algorithm); for part in parts.iter() { - checksummer.update(part.etag.as_ref().unwrap(), part.checksum)?; + checksummer.update( + part.etag.as_ref().unwrap(), + part.checksum, + part.size.unwrap(), + )?; } let (checksum_md5, checksum_extra) = checksummer.finalize(); @@ -417,11 +454,16 @@ pub async fn handle_complete_multipart_upload( let object_encryption = match checksum_algorithm { None => object_encryption, Some(_) => { - let (encryption, meta) = - EncryptionParams::check_decrypt(&garage, &req_head.headers, &object_encryption)?; + let (encryption, meta) = EncryptionParams::check_decrypt( + garage, + &req_head.headers, + &object_encryption, + oek_params, + )?; let new_meta = ObjectVersionMetaInner { headers: meta.into_owned().headers, checksum: checksum_extra, + checksum_type: checksum_algorithm.map(|(_, ty)| ty), }; encryption.encrypt_meta(new_meta)? } @@ -457,21 +499,30 @@ pub async fn handle_complete_multipart_upload( key: s3_xml::Value(key), etag: s3_xml::Value(format!("\"{}\"", etag)), checksum_crc32: match &checksum_extra { - Some(ChecksumValue::Crc32(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(&x))), + Some(ChecksumValue::Crc32(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(x))), _ => None, }, checksum_crc32c: match &checksum_extra { - Some(ChecksumValue::Crc32c(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(&x))), + Some(ChecksumValue::Crc32c(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(x))), + _ => None, + }, + checksum_crc64nvme: match &checksum_extra { + Some(ChecksumValue::Crc64Nvme(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(x))), _ => None, }, checksum_sha1: match &checksum_extra { - Some(ChecksumValue::Sha1(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(&x))), + Some(ChecksumValue::Sha1(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(x))), _ => None, }, checksum_sha256: match &checksum_extra { - Some(ChecksumValue::Sha256(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(&x))), + Some(ChecksumValue::Sha256(x)) => Some(s3_xml::Value(BASE64_STANDARD.encode(x))), _ => None, }, + checksum_type: match checksum_algorithm { + Some((_, ChecksumType::Composite)) => Some(s3_xml::Value(COMPOSITE.into())), + Some((_, ChecksumType::FullObject)) => Some(s3_xml::Value(FULL_OBJECT.into())), + None => None, + }, }; let xml = s3_xml::to_xml_with_header(&result)?; @@ -497,7 +548,9 @@ pub async fn handle_abort_multipart_upload( let final_object = Object::new(*bucket_id, key.to_string(), vec![object_version]); garage.object_table.insert(&final_object).await?; - Ok(Response::new(empty_body())) + Ok(Response::builder() + .status(StatusCode::NO_CONTENT) + .body(empty_body())?) } // ======== helpers ============ @@ -549,6 +602,32 @@ struct CompleteMultipartUploadPart { checksum: Option, } +macro_rules! extract_checksum_from { + ($node:ident { $($name:expr => $variant:ident),* $(,)? }) => { + if false { None } + $( + else if let Some(node) = $node.children().find(|e| e.has_tag_name($name)) { + match node.last_child().map(|x| x.text()) { + // Child is text but empty post-trim, ignore it. + Some(Some(text)) if text.trim().is_empty() => None, + + // Child is non-empty text, parse it. + Some(Some(text)) => Some(ChecksumValue::$variant( + BASE64_STANDARD.decode(text).ok()?[..].try_into().ok()? + )), + + // Child is not text, reject it. + Some(None) => return None, + + // No child, ignore it. + None => None, + } + } + )* + else { None } + } +} + fn parse_complete_multipart_upload_body( xml: &roxmltree::Document, ) -> Option> { @@ -572,37 +651,15 @@ fn parse_complete_multipart_upload_body( .children() .find(|e| e.has_tag_name("PartNumber"))? .text()?; - let checksum = if let Some(crc32) = - item.children().find(|e| e.has_tag_name("ChecksumCRC32")) - { - Some(ChecksumValue::Crc32( - BASE64_STANDARD.decode(crc32.text()?).ok()?[..] - .try_into() - .ok()?, - )) - } else if let Some(crc32c) = item.children().find(|e| e.has_tag_name("ChecksumCRC32C")) - { - Some(ChecksumValue::Crc32c( - BASE64_STANDARD.decode(crc32c.text()?).ok()?[..] - .try_into() - .ok()?, - )) - } else if let Some(sha1) = item.children().find(|e| e.has_tag_name("ChecksumSHA1")) { - Some(ChecksumValue::Sha1( - BASE64_STANDARD.decode(sha1.text()?).ok()?[..] - .try_into() - .ok()?, - )) - } else if let Some(sha256) = item.children().find(|e| e.has_tag_name("ChecksumSHA256")) - { - Some(ChecksumValue::Sha256( - BASE64_STANDARD.decode(sha256.text()?).ok()?[..] - .try_into() - .ok()?, - )) - } else { - None - }; + + let checksum = extract_checksum_from!(item { + "ChecksumCRC32" => Crc32, + "ChecksumCRC32C" => Crc32c, + "ChecksumCRC64NVME" => Crc64Nvme, + "ChecksumSHA1" => Sha1, + "ChecksumSHA256" => Sha256, + }); + parts.push(CompleteMultipartUploadPart { etag: etag.trim_matches('"').to_string(), part_number: part_number.parse().ok()?, @@ -618,36 +675,52 @@ fn parse_complete_multipart_upload_body( // ====== checksummer ==== +pub fn request_checksum_algorithm_and_type( + headers: &HeaderMap, + algo: Option, +) -> Result, Error> { + match (headers.get(X_AMZ_CHECKSUM_TYPE), algo) { + (None, None) => Ok(None), + (None, Some(algo)) => { + let ty = match algo { + ChecksumAlgorithm::Crc64Nvme => ChecksumType::FullObject, + _ => ChecksumType::Composite, + }; + Ok(Some((algo, ty))) + } + (Some(_), None) => Err(Error::bad_request( + "Cannot specify x-amz-checksum-type when no checksum algorithm is in use.", + )), + (Some(x), Some(algo)) => { + let checksum_type = match x.as_bytes() { + x if x == COMPOSITE.as_bytes() => ChecksumType::Composite, + x if x == FULL_OBJECT.as_bytes() => ChecksumType::FullObject, + _ => return Err(Error::bad_request("Invalid x-amz-checksum-type value")), + }; + match (checksum_type, algo) { + (ChecksumType::Composite, ChecksumAlgorithm::Crc64Nvme) + | (ChecksumType::FullObject, ChecksumAlgorithm::Sha1) + | (ChecksumType::FullObject, ChecksumAlgorithm::Sha256) => Err(Error::bad_request(format!( + "checksum type {:?} is not supported for algorithm {:?}", + checksum_type, algo + ))), + (ty, algo) => Ok(Some((algo, ty))), + } + } + } +} + #[derive(Default)] pub(crate) struct MultipartChecksummer { pub md5: Md5, pub extra: Option, } -pub(crate) enum MultipartExtraChecksummer { - Crc32(Crc32), - Crc32c(Crc32c), - Sha1(Sha1), - Sha256(Sha256), -} - impl MultipartChecksummer { - pub(crate) fn init(algo: Option) -> Self { + pub(crate) fn init(algo: Option<(ChecksumAlgorithm, ChecksumType)>) -> Self { Self { md5: Md5::new(), - extra: match algo { - None => None, - Some(ChecksumAlgorithm::Crc32) => { - Some(MultipartExtraChecksummer::Crc32(Crc32::new())) - } - Some(ChecksumAlgorithm::Crc32c) => { - Some(MultipartExtraChecksummer::Crc32c(Crc32c::default())) - } - Some(ChecksumAlgorithm::Sha1) => Some(MultipartExtraChecksummer::Sha1(Sha1::new())), - Some(ChecksumAlgorithm::Sha256) => { - Some(MultipartExtraChecksummer::Sha256(Sha256::new())) - } - }, + extra: algo.map(|(algo, cktype)| MultipartExtraChecksummer::init(algo, cktype)), } } @@ -655,59 +728,130 @@ impl MultipartChecksummer { &mut self, etag: &str, checksum: Option, + part_len: u64, ) -> Result<(), Error> { self.md5 - .update(&hex::decode(&etag).ok_or_message("invalid etag hex")?); - match (&mut self.extra, checksum) { - (None, _) => (), - ( - Some(MultipartExtraChecksummer::Crc32(ref mut crc32)), - Some(ChecksumValue::Crc32(x)), - ) => { - crc32.update(&x); - } - ( - Some(MultipartExtraChecksummer::Crc32c(ref mut crc32c)), - Some(ChecksumValue::Crc32c(x)), - ) => { - crc32c.write(&x); - } - (Some(MultipartExtraChecksummer::Sha1(ref mut sha1)), Some(ChecksumValue::Sha1(x))) => { - sha1.update(&x); - } - ( - Some(MultipartExtraChecksummer::Sha256(ref mut sha256)), - Some(ChecksumValue::Sha256(x)), - ) => { - sha256.update(&x); - } - (Some(_), b) => { - return Err(Error::internal_error(format!( - "part checksum was not computed correctly, got: {:?}", - b - ))) - } + .update(&hex::decode(etag).ok_or_message("invalid etag hex")?); + if let Some(extra) = &mut self.extra { + extra.update(checksum, part_len)?; } Ok(()) } pub(crate) fn finalize(self) -> (Md5Checksum, Option) { let md5 = self.md5.finalize()[..].try_into().unwrap(); - let extra = match self.extra { - None => None, - Some(MultipartExtraChecksummer::Crc32(crc32)) => { - Some(ChecksumValue::Crc32(u32::to_be_bytes(crc32.finalize()))) - } - Some(MultipartExtraChecksummer::Crc32c(crc32c)) => Some(ChecksumValue::Crc32c( - u32::to_be_bytes(u32::try_from(crc32c.finish()).unwrap()), - )), - Some(MultipartExtraChecksummer::Sha1(sha1)) => { - Some(ChecksumValue::Sha1(sha1.finalize()[..].try_into().unwrap())) - } - Some(MultipartExtraChecksummer::Sha256(sha256)) => Some(ChecksumValue::Sha256( - sha256.finalize()[..].try_into().unwrap(), - )), - }; + let extra = self.extra.map(|c| c.finalize()); (md5, extra) } } + +pub(crate) enum MultipartExtraChecksummer { + FullObjectCrc(CrcAlgorithm, Option), + CompositeCrc(ChecksumAlgorithm, CrcDigest), + CompositeSha1(Sha1), + CompositeSha256(Sha256), +} + +impl MultipartExtraChecksummer { + fn init(algo: ChecksumAlgorithm, cktype: ChecksumType) -> Self { + match (algo, cktype) { + (algo, ChecksumType::FullObject) => { + let crc_type = match algo { + ChecksumAlgorithm::Crc32 => CrcAlgorithm::Crc32IsoHdlc, + ChecksumAlgorithm::Crc32c => CrcAlgorithm::Crc32Iscsi, + ChecksumAlgorithm::Crc64Nvme => CrcAlgorithm::Crc64Nvme, + _ => unreachable!(), + }; + Self::FullObjectCrc(crc_type, None) + } + (ChecksumAlgorithm::Crc32, ChecksumType::Composite) => { + Self::CompositeCrc(ChecksumAlgorithm::Crc32, new_crc32()) + } + (ChecksumAlgorithm::Crc32c, ChecksumType::Composite) => { + Self::CompositeCrc(ChecksumAlgorithm::Crc32c, new_crc32c()) + } + (ChecksumAlgorithm::Sha1, ChecksumType::Composite) => Self::CompositeSha1(Sha1::new()), + (ChecksumAlgorithm::Sha256, ChecksumType::Composite) => { + Self::CompositeSha256(Sha256::new()) + } + _ => unreachable!(), + } + } + + fn update(&mut self, checksum: Option, part_len: u64) -> Result<(), Error> { + match (self, checksum) { + (Self::FullObjectCrc(crc_algo, crc_value), Some(ck)) => { + let ck_u64 = match ck { + ChecksumValue::Crc32(x) => u32::from_be_bytes(x) as u64, + ChecksumValue::Crc32c(x) => u32::from_be_bytes(x) as u64, + ChecksumValue::Crc64Nvme(x) => u64::from_be_bytes(x), + _ => { + return Err(Error::internal_error(format!( + "part checksum was not computed correctly, got: {:?}", + ck + ))) + } + }; + *crc_value = match *crc_value { + None => Some(ck_u64), + Some(prev) => Some(crc_fast::checksum_combine( + *crc_algo, prev, ck_u64, part_len, + )), + }; + } + (Self::CompositeCrc(_, digest), Some(ck)) => match ck { + ChecksumValue::Crc32(x) => digest.update(&x), + ChecksumValue::Crc32c(x) => digest.update(&x), + ChecksumValue::Crc64Nvme(x) => digest.update(&x), + _ => { + return Err(Error::internal_error(format!( + "part checksum was not computed correctly, got: {:?}", + ck + ))) + } + }, + (Self::CompositeSha1(sha1), Some(ChecksumValue::Sha1(x))) => { + sha1.update(x); + } + (Self::CompositeSha256(sha256), Some(ChecksumValue::Sha256(x))) => { + sha256.update(x); + } + _ => { + return Err(Error::internal_error(format!( + "part checksum was not computed correctly, got: {:?}", + checksum + ))) + } + } + Ok(()) + } + fn finalize(self) -> ChecksumValue { + match self { + Self::FullObjectCrc(algo, value) => match (algo, value) { + (CrcAlgorithm::Crc32IsoHdlc, Some(v)) => { + ChecksumValue::Crc32(u32::to_be_bytes(v as u32)) + } + (CrcAlgorithm::Crc32Iscsi, Some(v)) => { + ChecksumValue::Crc32c(u32::to_be_bytes(v as u32)) + } + (CrcAlgorithm::Crc64Nvme, Some(v)) => ChecksumValue::Crc64Nvme(u64::to_be_bytes(v)), + _ => unreachable!(), + }, + Self::CompositeCrc(algo, crc) => match algo { + ChecksumAlgorithm::Crc32 => { + ChecksumValue::Crc32(u32::to_be_bytes(crc.finalize() as u32)) + } + ChecksumAlgorithm::Crc32c => { + ChecksumValue::Crc32c(u32::to_be_bytes(crc.finalize() as u32)) + } + _ => unreachable!(), + }, + Self::CompositeSha1(sha1) => { + ChecksumValue::Sha1(sha1.finalize()[..].try_into().unwrap()) + } + Self::CompositeSha256(sha256) => { + ChecksumValue::Sha256(sha256.finalize()[..].try_into().unwrap()) + } + } + } +} diff --git a/src/api/s3/post_object.rs b/src/api/s3/post_object.rs index 09be7e7c..e89e9ea9 100644 --- a/src/api/s3/post_object.rs +++ b/src/api/s3/post_object.rs @@ -15,6 +15,7 @@ use serde::Deserialize; use garage_model::garage::Garage; use garage_model::s3::object_table::*; +use garage_util::data::gen_uuid; use garage_api_common::cors::*; use garage_api_common::helpers::*; @@ -22,7 +23,7 @@ use garage_api_common::signature::checksum::*; use garage_api_common::signature::payload::{verify_v4, Authorization}; use crate::api_server::ResBody; -use crate::encryption::EncryptionParams; +use crate::encryption::{EncryptionParams, OekDerivationInfo}; use crate::error::*; use crate::put::{extract_metadata_headers, save_stream, ChecksumMode}; use crate::xml as s3_xml; @@ -103,22 +104,18 @@ pub async fn handle_post_object( key.to_owned() }; - let api_key = verify_v4(&garage, "s3", &authorization, policy.as_bytes()).await?; + let api_key = verify_v4(&garage, "s3", &authorization, policy.as_bytes())?; - let bucket_id = garage + let bucket = garage .bucket_helper() - .resolve_bucket(&bucket_name, &api_key) - .await + .resolve_bucket_fast(&bucket_name, &api_key) .map_err(pass_helper_error)?; + let bucket_id = bucket.id; if !api_key.allow_write(&bucket_id) { return Err(Error::forbidden("Operation is not allowed for this key.")); } - let bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; let bucket_params = bucket.state.into_option().unwrap(); let matching_cors_rule = find_matching_cors_rule( &bucket_params, @@ -247,12 +244,23 @@ pub async fn handle_post_object( .transpose()?, }; + let version_uuid = gen_uuid(); + let meta = ObjectVersionMetaInner { headers, checksum: expected_checksums.extra, + checksum_type: expected_checksums.extra.map(|_| ChecksumType::FullObject), }; - let encryption = EncryptionParams::new_from_headers(&garage, ¶ms)?; + let encryption = EncryptionParams::new_from_headers( + &garage, + ¶ms, + OekDerivationInfo { + bucket_id, + version_id: version_uuid, + object_key: &key, + }, + )?; let stream = file_field.map(|r| r.map_err(Into::into)); let ctx = ReqCtx { @@ -265,11 +273,12 @@ pub async fn handle_post_object( let res = save_stream( &ctx, + version_uuid, meta, encryption, StreamLimiter::new(stream, conditions.content_length), &key, - ChecksumMode::Verify(&expected_checksums), + ChecksumMode::Verify(expected_checksums), ) .await?; @@ -496,15 +505,15 @@ mod tests { let mut conditions = policy_2.into_conditions().unwrap(); assert_eq!( - conditions.params.remove(&"acl".to_string()), + conditions.params.remove("acl"), Some(vec![Operation::Equal("public-read".into())]) ); assert_eq!( - conditions.params.remove(&"bucket".to_string()), + conditions.params.remove("bucket"), Some(vec![Operation::Equal("johnsmith".into())]) ); assert_eq!( - conditions.params.remove(&"key".to_string()), + conditions.params.remove("key"), Some(vec![Operation::StartsWith("user/eric/".into())]) ); assert!(conditions.params.is_empty()); @@ -527,7 +536,7 @@ mod tests { let mut conditions = policy_2.into_conditions().unwrap(); assert_eq!( - conditions.params.remove(&"acl".to_string()), + conditions.params.remove("acl"), Some(vec![Operation::Equal("public-read".into())]) ); assert_eq!( @@ -535,9 +544,7 @@ mod tests { vec![Operation::StartsWith("image/".into())] ); assert_eq!( - conditions - .params - .remove(&"success_action_redirect".to_string()), + conditions.params.remove("success_action_redirect"), Some(vec![Operation::StartsWith("".into())]) ); assert!(conditions.params.is_empty()); diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index b915f2ec..27f85697 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -35,7 +35,7 @@ use garage_api_common::signature::body::StreamingChecksumReceiver; use garage_api_common::signature::checksum::*; use crate::api_server::{ReqBody, ResBody}; -use crate::encryption::EncryptionParams; +use crate::encryption::{EncryptionParams, OekDerivationInfo}; use crate::error::*; use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION; @@ -46,8 +46,8 @@ pub(crate) struct SaveStreamResult { pub(crate) etag: String, } -pub(crate) enum ChecksumMode<'a> { - Verify(&'a ExpectedChecksums), +pub(crate) enum ChecksumMode { + Verify(ExpectedChecksums), VerifyFrom { checksummer: StreamingChecksumReceiver, trailer_algo: Option, @@ -60,6 +60,10 @@ pub async fn handle_put( req: Request, key: &String, ) -> Result, Error> { + // Generate version uuid now, because it is necessary to compute SSE-C + // encryption parameters + let version_uuid = gen_uuid(); + // Retrieve interesting headers from request let headers = extract_metadata_headers(req.headers())?; debug!("Object headers: {:?}", headers); @@ -77,10 +81,19 @@ pub async fn handle_put( let meta = ObjectVersionMetaInner { headers, checksum: expected_checksums.extra, + checksum_type: expected_checksums.extra.map(|_| ChecksumType::FullObject), }; // Determine whether object should be encrypted, and if so the key - let encryption = EncryptionParams::new_from_headers(&ctx.garage, req.headers())?; + let encryption = EncryptionParams::new_from_headers( + &ctx.garage, + req.headers(), + OekDerivationInfo { + bucket_id: ctx.bucket_id, + version_id: version_uuid, + object_key: key, + }, + )?; // The request body is a special ReqBody object (see garage_api_common::signature::body) // which supports calculating checksums while streaming the data. @@ -98,6 +111,7 @@ pub async fn handle_put( let res = save_stream( &ctx, + version_uuid, meta, encryption, stream, @@ -119,11 +133,12 @@ pub async fn handle_put( pub(crate) async fn save_stream> + Unpin>( ctx: &ReqCtx, + version_uuid: Uuid, mut meta: ObjectVersionMetaInner, encryption: EncryptionParams, body: S, key: &String, - checksum_mode: ChecksumMode<'_>, + checksum_mode: ChecksumMode, ) -> Result { let ReqCtx { garage, bucket_id, .. @@ -138,13 +153,12 @@ pub(crate) async fn save_stream> + Unpin>( let first_block = first_block_opt.unwrap_or_default(); // Generate identity of new version - let version_uuid = gen_uuid(); let version_timestamp = next_timestamp(existing_object.as_ref()); let mut checksummer = match &checksum_mode { ChecksumMode::Verify(expected) => Checksummer::init(expected, !encryption.is_encrypted()), ChecksumMode::Calculate(algo) => { - Checksummer::init(&Default::default(), !encryption.is_encrypted()).add(*algo) + Checksummer::init(&Default::default(), !encryption.is_encrypted()).add_algorithm(*algo) } ChecksumMode::VerifyFrom { .. } => { // Checksums are calculated by the garage_api_common::signature module @@ -540,6 +554,7 @@ pub(crate) async fn read_and_put_blocks> + Ok((total_size, checksums, first_block_hash)) } +#[expect(clippy::too_many_arguments)] async fn put_block_and_meta( ctx: &ReqCtx, version: &Version, @@ -654,7 +669,7 @@ pub(crate) fn extract_metadata_headers( let mut ret = Vec::new(); // Preserve standard headers - let standard_header = vec![ + let standard_header = [ hyper::header::CONTENT_TYPE, hyper::header::CACHE_CONTROL, hyper::header::CONTENT_DISPOSITION, diff --git a/src/api/s3/router.rs b/src/api/s3/router.rs index e3f58490..30af6895 100644 --- a/src/api/s3/router.rs +++ b/src/api/s3/router.rs @@ -355,7 +355,7 @@ impl Endpoint { if let Some(x_id) = query.x_id.take() { if x_id != res.name() { // I think AWS ignores the x-id parameter. - // Let's make this at least be a warnin to help debugging. + // Let's make this at least be a warning to help debugging. warn!( "x-id ({}) does not match parsed endpoint ({})", x_id, @@ -949,7 +949,7 @@ mod tests { GET "/?uploads&delimiter=/&prefix=photos/2006/" => ListMultipartUploads GET "/?uploads&delimiter=D&encoding-type=EncodingType&key-marker=KeyMarker&max-uploads=1&prefix=Prefix&upload-id-marker=UploadIdMarker" => ListMultipartUploads GET "/" => ListObjects - GET "/?prefix=N&marker=Ned&max-keys=40" => ListObjects + GET "/?prefix=N&marker=Need&max-keys=40" => ListObjects GET "/?delimiter=/" => ListObjects GET "/?prefix=photos/2006/&delimiter=/" => ListObjects @@ -1011,7 +1011,7 @@ mod tests { // no bucket, won't work with the rest of the test suite assert!(matches!( parse("GET", "/", None, None).0, - Endpoint::ListBuckets { .. } + Endpoint::ListBuckets )); assert!(matches!( parse("GET", "/", None, None).0.authorization_type(), diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 03cc01d8..c984ce2a 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -3,7 +3,7 @@ use quick_xml::de::from_reader; use hyper::{header::HeaderName, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; -use garage_model::bucket_table::*; +use garage_model::bucket_table::{self, *}; use garage_api_common::helpers::*; @@ -26,7 +26,28 @@ pub async fn handle_get_website(ctx: ReqCtx) -> Result, Error> suffix: Value(website.index_document.to_string()), }), redirect_all_requests_to: None, - routing_rules: None, + routing_rules: RoutingRules { + rules: website + .routing_rules + .clone() + .into_iter() + .map(|rule| RoutingRule { + condition: rule.condition.map(|cond| Condition { + http_error_code: cond.http_error_code.map(|c| IntValue(c as i64)), + prefix: cond.prefix.map(Value), + }), + redirect: Redirect { + hostname: rule.redirect.hostname.map(Value), + http_redirect_code: Some(IntValue( + rule.redirect.http_redirect_code as i64, + )), + protocol: rule.redirect.protocol.map(Value), + replace_full: rule.redirect.replace_key.map(Value), + replace_prefix: rule.redirect.replace_key_prefix.map(Value), + }, + }) + .collect(), + }, }; let xml = to_xml_with_header(&wc)?; Ok(Response::builder() @@ -97,18 +118,28 @@ pub struct WebsiteConfiguration { pub index_document: Option, #[serde(rename = "RedirectAllRequestsTo")] pub redirect_all_requests_to: Option, - #[serde(rename = "RoutingRules")] - pub routing_rules: Option>, + #[serde( + rename = "RoutingRules", + default, + skip_serializing_if = "RoutingRules::is_empty" + )] + pub routing_rules: RoutingRules, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct RoutingRules { + #[serde(rename = "RoutingRule")] + pub rules: Vec, +} + +impl RoutingRules { + fn is_empty(&self) -> bool { + self.rules.is_empty() + } } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct RoutingRule { - #[serde(rename = "RoutingRule")] - pub inner: RoutingRuleInner, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] -pub struct RoutingRuleInner { #[serde(rename = "Condition")] pub condition: Option, #[serde(rename = "Redirect")] @@ -162,7 +193,7 @@ impl WebsiteConfiguration { if self.redirect_all_requests_to.is_some() && (self.error_document.is_some() || self.index_document.is_some() - || self.routing_rules.is_some()) + || !self.routing_rules.is_empty()) { return Err(Error::bad_request( "Bad XML: can't have RedirectAllRequestsTo and other fields", @@ -177,10 +208,15 @@ impl WebsiteConfiguration { if let Some(ref rart) = self.redirect_all_requests_to { rart.validate()?; } - if let Some(ref rrs) = self.routing_rules { - for rr in rrs { - rr.inner.validate()?; - } + for rr in &self.routing_rules.rules { + rr.validate()?; + } + if self.routing_rules.rules.len() > 1000 { + // we will do linear scans, best to avoid overly long configuration. The + // limit was chosen arbitrarily + return Err(Error::bad_request( + "Bad XML: RoutingRules can't have more than 1000 child elements", + )); } Ok(()) @@ -189,11 +225,7 @@ impl WebsiteConfiguration { pub fn into_garage_website_config(self) -> Result { if self.redirect_all_requests_to.is_some() { Err(Error::NotImplemented( - "S3 website redirects are not currently implemented in Garage.".into(), - )) - } else if self.routing_rules.map(|x| !x.is_empty()).unwrap_or(false) { - Err(Error::NotImplemented( - "S3 routing rules are not currently implemented in Garage.".into(), + "RedirectAllRequestsTo is not currently implemented in Garage, however its effect can be emulated using a single unconditional RoutingRule.".into(), )) } else { Ok(WebsiteConfig { @@ -202,6 +234,36 @@ impl WebsiteConfiguration { .map(|x| x.suffix.0) .unwrap_or_else(|| "index.html".to_string()), error_document: self.error_document.map(|x| x.key.0), + redirect_all: None, + routing_rules: self + .routing_rules + .rules + .into_iter() + .map(|rule| { + bucket_table::RoutingRule { + condition: rule.condition.map(|condition| { + bucket_table::RedirectCondition { + http_error_code: condition.http_error_code.map(|c| c.0 as u16), + prefix: condition.prefix.map(|p| p.0), + } + }), + redirect: bucket_table::Redirect { + hostname: rule.redirect.hostname.map(|h| h.0), + protocol: rule.redirect.protocol.map(|p| p.0), + // aws default to 301, which i find punitive in case of + // misconfiguration (can be permanently cached on the + // user agent) + http_redirect_code: rule + .redirect + .http_redirect_code + .map(|c| c.0 as u16) + .unwrap_or(302), + replace_key_prefix: rule.redirect.replace_prefix.map(|k| k.0), + replace_key: rule.redirect.replace_full.map(|k| k.0), + }, + } + }) + .collect(), }) } } @@ -242,37 +304,69 @@ impl Target { } } -impl RoutingRuleInner { +impl RoutingRule { pub fn validate(&self) -> Result<(), Error> { - let has_prefix = self - .condition - .as_ref() - .and_then(|c| c.prefix.as_ref()) - .is_some(); - self.redirect.validate(has_prefix) + if let Some(condition) = &self.condition { + condition.validate()?; + } + self.redirect.validate() + } +} + +impl Condition { + pub fn validate(&self) -> Result { + if let Some(ref error_code) = self.http_error_code { + // TODO do other error codes make sense? Aws only allows 4xx and 5xx + if error_code.0 != 404 { + return Err(Error::bad_request( + "Bad XML: HttpErrorCodeReturnedEquals must be 404 or absent", + )); + } + } + Ok(self.prefix.is_some()) } } impl Redirect { - pub fn validate(&self, has_prefix: bool) -> Result<(), Error> { - if self.replace_prefix.is_some() { - if self.replace_full.is_some() { - return Err(Error::bad_request( - "Bad XML: both ReplaceKeyPrefixWith and ReplaceKeyWith are set", - )); - } - if !has_prefix { - return Err(Error::bad_request( - "Bad XML: ReplaceKeyPrefixWith is set, but KeyPrefixEquals isn't", - )); - } + pub fn validate(&self) -> Result<(), Error> { + if self.replace_prefix.is_some() && self.replace_full.is_some() { + return Err(Error::bad_request( + "Bad XML: both ReplaceKeyPrefixWith and ReplaceKeyWith are set", + )); } if let Some(ref protocol) = self.protocol { if protocol.0 != "http" && protocol.0 != "https" { return Err(Error::bad_request("Bad XML: invalid protocol")); } } - // TODO there are probably more invalid cases, but which ones? + if let Some(ref http_redirect_code) = self.http_redirect_code { + match http_redirect_code.0 { + // aws allows all 3xx except 300, but some are non-sensical (not modified, + // use proxy...) + 301 | 302 | 303 | 307 | 308 => { + if self.hostname.is_none() && self.protocol.is_some() { + return Err(Error::bad_request( + "Bad XML: HostName must be set if Protocol is set", + )); + } + } + // aws doesn't allow these codes, but netlify does, and it seems like a + // cool feature (change the page seen without changing the url shown by the + // user agent) + 200 | 404 => { + if self.hostname.is_some() || self.protocol.is_some() { + // hostname would mean different bucket, protocol doesn't make + // sense + return Err(Error::bad_request( + "Bad XML: an HttpRedirectCode of 200 is not acceptable alongside HostName or Protocol", + )); + } + } + _ => { + return Err(Error::bad_request("Bad XML: invalid HttpRedirectCode")); + } + } + } Ok(()) } } @@ -311,6 +405,15 @@ mod tests { fullkey + + + + + + 404 + missing + + "#; let conf: WebsiteConfiguration = from_str(message).unwrap(); @@ -326,21 +429,36 @@ mod tests { hostname: Value("garage.tld".to_owned()), protocol: Some(Value("https".to_owned())), }), - routing_rules: Some(vec![RoutingRule { - inner: RoutingRuleInner { - condition: Some(Condition { - http_error_code: Some(IntValue(404)), - prefix: Some(Value("prefix1".to_owned())), - }), - redirect: Redirect { - hostname: Some(Value("gara.ge".to_owned())), - protocol: Some(Value("http".to_owned())), - http_redirect_code: Some(IntValue(303)), - replace_prefix: Some(Value("prefix2".to_owned())), - replace_full: Some(Value("fullkey".to_owned())), + routing_rules: RoutingRules { + rules: vec![ + RoutingRule { + condition: Some(Condition { + http_error_code: Some(IntValue(404)), + prefix: Some(Value("prefix1".to_owned())), + }), + redirect: Redirect { + hostname: Some(Value("gara.ge".to_owned())), + protocol: Some(Value("http".to_owned())), + http_redirect_code: Some(IntValue(303)), + replace_prefix: Some(Value("prefix2".to_owned())), + replace_full: Some(Value("fullkey".to_owned())), + }, }, - }, - }]), + RoutingRule { + condition: Some(Condition { + http_error_code: None, + prefix: Some(Value("".to_owned())), + }), + redirect: Redirect { + hostname: None, + protocol: None, + http_redirect_code: Some(IntValue(404)), + replace_prefix: None, + replace_full: Some(Value("missing".to_owned())), + }, + }, + ], + }, }; assert_eq! { ref_value, diff --git a/src/api/s3/xml.rs b/src/api/s3/xml.rs index fdb36318..bfe95fff 100644 --- a/src/api/s3/xml.rs +++ b/src/api/s3/xml.rs @@ -139,10 +139,14 @@ pub struct CompleteMultipartUploadResult { pub checksum_crc32: Option, #[serde(rename = "ChecksumCRC32C")] pub checksum_crc32c: Option, + #[serde(rename = "ChecksumCR64NVME")] + pub checksum_crc64nvme: Option, #[serde(rename = "ChecksumSHA1")] pub checksum_sha1: Option, #[serde(rename = "ChecksumSHA256")] pub checksum_sha256: Option, + #[serde(rename = "ChecksumType")] + pub checksum_type: Option, } #[derive(Debug, Serialize, PartialEq, Eq)] @@ -213,6 +217,8 @@ pub struct PartItem { pub checksum_crc32: Option, #[serde(rename = "ChecksumCRC32C")] pub checksum_crc32c: Option, + #[serde(rename = "ChecksumCRC64NVME")] + pub checksum_crc64nvme: Option, #[serde(rename = "ChecksumSHA1")] pub checksum_sha1: Option, #[serde(rename = "ChecksumSHA256")] @@ -587,6 +593,7 @@ mod tests { #[test] fn complete_multipart_upload_result() -> Result<(), ApiError> { + use garage_api_common::signature::checksum::COMPOSITE; let result = CompleteMultipartUploadResult { xmlns: (), location: Some(Value("https://garage.tld/mybucket/a/plop".to_string())), @@ -595,8 +602,10 @@ mod tests { etag: Value("\"3858f62230ac3c915f300c664312c11f-9\"".to_string()), checksum_crc32: None, checksum_crc32c: None, + checksum_crc64nvme: None, checksum_sha1: Some(Value("ZJAnHyG8PeKz9tI8UTcHrJos39A=".into())), checksum_sha256: None, + checksum_type: Some(Value(COMPOSITE.into())), }; assert_eq!( to_xml_with_header(&result)?, @@ -607,6 +616,7 @@ mod tests { a/plop\ "3858f62230ac3c915f300c664312c11f-9"\ ZJAnHyG8PeKz9tI8UTcHrJos39A=\ + COMPOSITE\ " ); Ok(()) @@ -880,6 +890,7 @@ mod tests { size: IntValue(10485760), checksum_crc32: None, checksum_crc32c: None, + checksum_crc64nvme: None, checksum_sha256: Some(Value( "5RQ3A5uk0w7ojNjvegohch4JRBBGN/cLhsNrPzfv/hA=".into(), )), @@ -893,6 +904,7 @@ mod tests { checksum_sha256: None, checksum_crc32c: None, checksum_crc32: Some(Value("ZJAnHyG8=".into())), + checksum_crc64nvme: None, checksum_sha1: None, }, ], diff --git a/src/block/Cargo.toml b/src/block/Cargo.toml index c4dbba44..b45898a2 100644 --- a/src/block/Cargo.toml +++ b/src/block/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_block" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -18,7 +18,6 @@ garage_db.workspace = true garage_net.workspace = true garage_rpc.workspace = true garage_util.workspace = true -garage_table.workspace = true opentelemetry.workspace = true @@ -40,4 +39,4 @@ tokio.workspace = true tokio-util.workspace = true [features] -system-libs = [ "zstd/pkg-config" ] +system-libs = ["zstd/pkg-config"] diff --git a/src/block/block.rs b/src/block/block.rs index bd95680e..867aa546 100644 --- a/src/block/block.rs +++ b/src/block/block.rs @@ -89,7 +89,7 @@ impl DataBlock { return DataBlock::compressed(data_compressed.into()); } } - DataBlock::plain(data.into()) + DataBlock::plain(data) }) .await .unwrap() diff --git a/src/block/layout.rs b/src/block/layout.rs index 00e3debb..7e818d77 100644 --- a/src/block/layout.rs +++ b/src/block/layout.rs @@ -262,7 +262,7 @@ impl DataLayout { pub(crate) fn primary_block_dir(&self, hash: &Hash) -> PathBuf { let ipart = self.partition_from(hash); let idir = self.part_prim[ipart] as usize; - self.block_dir_from(hash, &self.data_dirs[idir].path) + self.block_dir_from(hash, self.data_dirs[idir].path.clone()) } pub(crate) fn secondary_block_dirs<'a>( @@ -272,7 +272,7 @@ impl DataLayout { let ipart = self.partition_from(hash); self.part_sec[ipart] .iter() - .map(move |idir| self.block_dir_from(hash, &self.data_dirs[*idir as usize].path)) + .map(move |idir| self.block_dir_from(hash, self.data_dirs[*idir as usize].path.clone())) } fn partition_from(&self, hash: &Hash) -> usize { @@ -283,8 +283,7 @@ impl DataLayout { % DRIVE_NPART } - fn block_dir_from(&self, hash: &Hash, dir: &PathBuf) -> PathBuf { - let mut path = dir.clone(); + fn block_dir_from(&self, hash: &Hash, mut path: PathBuf) -> PathBuf { path.push(hex::encode(&hash.as_slice()[0..1])); path.push(hex::encode(&hash.as_slice()[1..2])); path @@ -326,7 +325,7 @@ fn make_data_dirs(dirs: &DataDirEnum) -> Result, Error> { let mut ok = false; for dir in dirs.iter() { let state = match &dir.capacity { - Some(cap) if dir.read_only == false => { + Some(cap) if !dir.read_only => { let capacity = cap.parse::() .ok_or_message("invalid capacity value")?.as_u64(); if capacity == 0 { @@ -337,7 +336,7 @@ fn make_data_dirs(dirs: &DataDirEnum) -> Result, Error> { capacity, } } - None if dir.read_only == true => { + None if dir.read_only => { DataDirState::ReadOnly } _ => return Err(Error::Message(format!("data directories in data_dir should have a capacity value or be marked read_only, not the case for {}", dir.path.to_string_lossy()))), @@ -359,7 +358,7 @@ fn make_data_dirs(dirs: &DataDirEnum) -> Result, Error> { } fn dir_not_empty(path: &PathBuf) -> Result { - for entry in std::fs::read_dir(&path)? { + for entry in std::fs::read_dir(path)? { let dir = entry?; let ft = dir.file_type()?; let name = dir.file_name().into_string().ok(); diff --git a/src/block/manager.rs b/src/block/manager.rs index 96ca9c90..d33110cb 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -33,8 +33,6 @@ use garage_rpc::rpc_helper::OrderTag; use garage_rpc::system::System; use garage_rpc::*; -use garage_table::replication::{TableReplication, TableShardedReplication}; - use crate::block::*; use crate::layout::*; use crate::metrics::*; @@ -76,8 +74,8 @@ impl Rpc for BlockRpc { /// The block manager, handling block exchange between nodes, and block storage on local node pub struct BlockManager { - /// Replication strategy, allowing to find on which node blocks should be located - pub replication: TableShardedReplication, + /// Quorum of nodes for write operations + pub write_quorum: usize, /// Data layout pub(crate) data_layout: ArcSwap, @@ -125,7 +123,7 @@ impl BlockManager { pub fn new( db: &db::Db, config: &Config, - replication: TableShardedReplication, + write_quorum: usize, system: Arc, ) -> Result, Error> { // Load or compute layout, i.e. assignment of data blocks to the different data directories @@ -169,13 +167,13 @@ impl BlockManager { let scrub_persister = PersisterShared::new(&system.metadata_dir, "scrub_info"); let block_manager = Arc::new(Self { - replication, + write_quorum, data_layout: ArcSwap::new(Arc::new(data_layout)), data_layout_persister, data_fsync: config.data_fsync, disable_scrub: config.disable_scrub, compression_level: config.compression_level, - mutation_lock: vec![(); MUTEX_COUNT] + mutation_lock: [(); MUTEX_COUNT] .iter() .map(|_| Mutex::new(BlockManagerLocked())) .collect::>(), @@ -292,7 +290,7 @@ impl BlockManager { let who = self .system .rpc_helper() - .block_read_nodes_of(hash, self.system.rpc_helper()); + .block_read_nodes_of(hash, self.system.rpc_helper())?; for node in who.iter() { let node_id = NodeID::from(*node); @@ -343,6 +341,16 @@ impl BlockManager { Err(err) } + /// Returns the set of nodes that should store a copy of a given block. + /// These are the nodes assigned to the block's hash in the current + /// layout version only: since blocks are immutable, we don't need to + /// do complex logic when several layout versions are active at once, + /// just move them directly to the new nodes. + pub(crate) fn storage_nodes_of(&self, hash: &Hash) -> Result, Error> { + let cluster_layout = self.system.cluster_layout(); + Ok(cluster_layout.current()?.nodes_of(hash).collect()) + } + // ---- Public interface ---- /// Ask nodes that might have a block for it, return it as a stream @@ -375,7 +383,7 @@ impl BlockManager { prevent_compression: bool, order_tag: Option, ) -> Result<(), Error> { - let who = self.system.cluster_layout().current_storage_nodes_of(&hash); + let who = self.storage_nodes_of(&hash)?; let compression_level = self.compression_level.filter(|_| !prevent_compression); let (header, bytes) = DataBlock::from_buffer(data, compression_level) @@ -405,7 +413,7 @@ impl BlockManager { put_block_rpc, RequestStrategy::with_priority(PRIO_NORMAL | PRIO_SECONDARY) .with_drop_on_completion(permit) - .with_quorum(self.replication.write_quorum()), + .with_quorum(self.write_quorum), ) .await?; @@ -561,12 +569,10 @@ impl BlockManager { async { match self.find_block(hash).await { Some(p) => self.read_block_from(hash, &p).await, - None => { - return Err(Error::Message(format!( - "block {:?} not found on node", - hash - ))); - } + None => Err(Error::Message(format!( + "block {:?} not found on node", + hash + ))), } } .bound_record_duration(&self.metrics.block_read_duration) @@ -783,7 +789,6 @@ impl BlockManagerLocked { let mut f = fs::File::create(&path_tmp).await?; f.write_all(data).await?; - f.flush().await?; mgr.metrics.bytes_written.add(data.len() as u64); if mgr.data_fsync { diff --git a/src/block/rc.rs b/src/block/rc.rs index 4a55ee29..d8b611ed 100644 --- a/src/block/rc.rs +++ b/src/block/rc.rs @@ -89,7 +89,7 @@ impl BlockRc { .transaction(|tx| { let mut cnt = 0; for f in recalc_fns.iter() { - cnt += f(&tx, hash)?; + cnt += f(tx, hash)?; } let old_rc = RcEntry::parse_opt(tx.get(&self.rc_table, hash)?); trace!( diff --git a/src/block/repair.rs b/src/block/repair.rs index ef271094..96ad4bb5 100644 --- a/src/block/repair.rs +++ b/src/block/repair.rs @@ -558,7 +558,7 @@ impl Worker for RebalanceWorker { } fn status(&self) -> WorkerStatus { - let t_cur = self.t_finished.unwrap_or_else(|| now_msec()); + let t_cur = self.t_finished.unwrap_or_else(now_msec); let rate = self.moved_bytes / std::cmp::max(1, (t_cur - self.t_started) / 1000); let mut freeform = vec![ format!("Blocks moved: {}", self.moved), diff --git a/src/block/resync.rs b/src/block/resync.rs index 7056a828..28666a71 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -27,8 +27,6 @@ use garage_util::tranquilizer::Tranquilizer; use garage_rpc::system::System; use garage_rpc::*; -use garage_table::replication::TableReplication; - use crate::manager::*; // The delay between the time where a resync operation fails @@ -385,11 +383,8 @@ impl BlockResyncManager { info!("Resync block {:?}: offloading and deleting", hash); let existing_path = existing_path.unwrap(); - let mut who = manager - .system - .cluster_layout() - .current_storage_nodes_of(hash); - if who.len() < manager.replication.write_quorum() { + let mut who = manager.storage_nodes_of(hash)?; + if who.len() < manager.write_quorum { return Err(Error::Message("Not trying to offload block because we don't have a quorum of nodes to write to".to_string())); } who.retain(|id| *id != manager.system.id); @@ -471,10 +466,7 @@ impl BlockResyncManager { // First, check whether we are still supposed to store that // block in the latest cluster layout version. - let storage_nodes = manager - .system - .cluster_layout() - .current_storage_nodes_of(&hash); + let storage_nodes = manager.storage_nodes_of(hash)?; if !storage_nodes.contains(&manager.system.id) { info!( diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index 9e860e7d..40134169 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_db" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -28,8 +28,8 @@ parking_lot = { workspace = true, optional = true } mktemp.workspace = true [features] -default = [ "lmdb", "sqlite" ] -bundled-libs = [ "rusqlite?/bundled" ] -lmdb = [ "heed" ] -fjall = [ "dep:fjall", "dep:parking_lot" ] -sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ] +default = ["lmdb", "sqlite"] +bundled-libs = ["rusqlite?/bundled"] +lmdb = ["heed"] +fjall = ["dep:fjall", "dep:parking_lot"] +sqlite = ["rusqlite", "r2d2", "r2d2_sqlite"] diff --git a/src/db/fjall_adapter.rs b/src/db/fjall_adapter.rs index 25913a1f..9e9efe9f 100644 --- a/src/db/fjall_adapter.rs +++ b/src/db/fjall_adapter.rs @@ -1,6 +1,6 @@ use core::ops::Bound; -use std::path::PathBuf; +use std::path::Path; use std::sync::Arc; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; @@ -20,7 +20,7 @@ pub use fjall; // -- -pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result { +pub(crate) fn open_db(path: &Path, opt: &OpenOpt) -> Result { info!("Opening Fjall database at: {}", path.display()); if opt.fsync { return Err(Error( @@ -105,15 +105,14 @@ impl IDb for FjallDb { } fn list_trees(&self) -> Result> { - Ok(self - .keyspace + self.keyspace .list_partitions() .iter() - .map(|n| decode_name(&n)) - .collect::>>()?) + .map(|n| decode_name(n)) + .collect::>>() } - fn snapshot(&self, base_path: &PathBuf) -> Result<()> { + fn snapshot(&self, base_path: &Path) -> Result<()> { std::fs::create_dir_all(base_path)?; let path = Engine::Fjall.db_path(base_path); @@ -272,7 +271,7 @@ impl<'a> FjallTx<'a> { fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalPartitionHandle> { self.trees.get(i).map(|tup| &tup.1).ok_or_else(|| { TxOpError(Error( - "invalid tree id (it might have been openned after the transaction started)".into(), + "invalid tree id (it might have been opened after the transaction started)".into(), )) }) } @@ -288,7 +287,7 @@ impl<'a> ITx for FjallTx<'a> { } fn len(&self, tree_idx: usize) -> TxOpResult { let tree = self.get_tree(tree_idx)?; - Ok(self.tx.len(tree)? as usize) + Ok(self.tx.len(tree)?) } fn insert(&mut self, tree_idx: usize, key: &[u8], value: &[u8]) -> TxOpResult<()> { @@ -325,7 +324,7 @@ impl<'a> ITx for FjallTx<'a> { let high = clone_bound(high); Ok(Box::new( self.tx - .range::, ByteVecRangeBounds>(&tree, (low, high)) + .range::, ByteVecRangeBounds>(tree, (low, high)) .map(iterator_remap_tx), )) } @@ -340,7 +339,7 @@ impl<'a> ITx for FjallTx<'a> { let high = clone_bound(high); Ok(Box::new( self.tx - .range::, ByteVecRangeBounds>(&tree, (low, high)) + .range::, ByteVecRangeBounds>(tree, (low, high)) .rev() .map(iterator_remap_tx), )) diff --git a/src/db/lib.rs b/src/db/lib.rs index 2a467c7c..3e83e0f0 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -17,7 +17,7 @@ use core::ops::{Bound, RangeBounds}; use std::borrow::Cow; use std::cell::Cell; -use std::path::PathBuf; +use std::path::Path; use std::sync::Arc; use thiserror::Error; @@ -133,7 +133,7 @@ impl Db { Err(TxError::Db(tx_e)) } (Err(TxError::Db(tx_e)), Some(Ok(_))) => { - // Transaction encounterred a DB error when commiting the transaction, + // Transaction encounterred a DB error when committing the transaction, // after user code was called Err(TxError::Db(tx_e)) } @@ -147,7 +147,7 @@ impl Db { } } - pub fn snapshot(&self, path: &PathBuf) -> Result<()> { + pub fn snapshot(&self, path: &Path) -> Result<()> { self.0.snapshot(path) } @@ -348,7 +348,7 @@ pub(crate) trait IDb: Send + Sync { fn engine(&self) -> String; fn open_tree(&self, name: &str) -> Result; fn list_trees(&self) -> Result>; - fn snapshot(&self, path: &PathBuf) -> Result<()>; + fn snapshot(&self, path: &Path) -> Result<()>; fn get(&self, tree: usize, key: &[u8]) -> Result>; fn approximate_len(&self, tree: usize) -> Result; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index ac185ae9..dca121fd 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -3,7 +3,7 @@ use core::ops::Bound; use std::collections::HashMap; use std::convert::TryInto; use std::marker::PhantomPinned; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::{Arc, RwLock}; @@ -22,7 +22,7 @@ pub use heed; pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result { info!("Opening LMDB database at: {}", path.display()); - if let Err(e) = std::fs::create_dir_all(&path) { + if let Err(e) = std::fs::create_dir_all(path) { return Err(Error( format!("Unable to create LMDB data directory: {}", e).into(), )); @@ -44,17 +44,15 @@ pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result { env_builder.flag(heed::flags::Flags::MdbNoSync); } } - match env_builder.open(&path) { - Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => { - return Err(Error( - "OutOfMemory error while trying to open LMDB database. This can happen \ + match env_builder.open(path) { + Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => Err(Error( + "OutOfMemory error while trying to open LMDB database. This can happen \ if your operating system is not allowing you to use sufficient virtual \ memory address space. Please check that no limit is set (ulimit -v). \ You may also try to set a smaller `lmdb_map_size` configuration parameter. \ On 32-bit machines, you should probably switch to another database engine." - .into(), - )) - } + .into(), + )), Err(e) => Err(Error(format!("Cannot open LMDB database: {}", e).into())), Ok(db) => Ok(LmdbDb::init(db)), } @@ -147,7 +145,7 @@ impl IDb for LmdbDb { Ok(ret2) } - fn snapshot(&self, base_path: &PathBuf) -> Result<()> { + fn snapshot(&self, base_path: &Path) -> Result<()> { std::fs::create_dir_all(base_path)?; let path = Engine::Lmdb.db_path(base_path); self.db @@ -397,9 +395,12 @@ where // this reference will only be stored and accessed from the // returned ValueIter which guarantees that it is destroyed // before the tx it is pointing to. - unsafe { &*&raw const *tx } + #[expect(clippy::deref_addrof)] + unsafe { + &*&raw const *tx + } }; - let iter = iterfun(&tx_lifetime_overextended)?; + let iter = iterfun(tx_lifetime_overextended)?; *boxed.as_mut().iter() = Some(iter); diff --git a/src/db/open.rs b/src/db/open.rs index 23391c61..dad0492a 100644 --- a/src/db/open.rs +++ b/src/db/open.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use crate::{Db, Error, Result}; @@ -25,20 +25,13 @@ impl Engine { } /// Return engine-specific DB path from base path - pub fn db_path(&self, base_path: &PathBuf) -> PathBuf { - let mut ret = base_path.clone(); - match self { - Self::Lmdb => { - ret.push("db.lmdb"); - } - Self::Sqlite => { - ret.push("db.sqlite"); - } - Self::Fjall => { - ret.push("db.fjall"); - } - } - ret + pub fn db_path(&self, base_path: &Path) -> PathBuf { + let suffix = match self { + Self::Lmdb => "db.lmdb", + Self::Sqlite => "db.sqlite", + Self::Fjall => "db.fjall", + }; + base_path.join(suffix) } } @@ -68,22 +61,13 @@ impl std::str::FromStr for Engine { } } +#[derive(Default)] pub struct OpenOpt { pub fsync: bool, pub lmdb_map_size: Option, pub fjall_block_cache_size: Option, } -impl Default for OpenOpt { - fn default() -> Self { - Self { - fsync: false, - lmdb_map_size: None, - fjall_block_cache_size: None, - } - } -} - pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result { match engine { // ---- Sqlite DB ---- diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index a03ee8ef..832dab14 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -1,7 +1,7 @@ use core::ops::Bound; use std::marker::PhantomPinned; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::pin::Pin; use std::ptr::NonNull; use std::sync::{Arc, Mutex, RwLock}; @@ -23,7 +23,7 @@ pub use rusqlite; pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result { info!("Opening Sqlite database at: {}", path.display()); let manager = r2d2_sqlite::SqliteConnectionManager::file(path); - Ok(SqliteDb::new(manager, opt.fsync)?) + SqliteDb::open(manager, opt.fsync) } // ---- @@ -62,7 +62,7 @@ pub struct SqliteDb { } impl SqliteDb { - pub fn new(manager: SqliteConnectionManager, sync_mode: bool) -> Result { + pub fn open(manager: SqliteConnectionManager, sync_mode: bool) -> Result { let manager = manager.with_init(move |db| { db.pragma_update(None, "journal_mode", "WAL")?; if sync_mode { @@ -110,7 +110,7 @@ impl IDb for SqliteDb { let name = format!("tree_{}", name.replace(':', "_COLON_")); let mut trees = self.trees.write().unwrap(); - if let Some(i) = trees.iter().position(|x| x.as_ref() == &name) { + if let Some(i) = trees.iter().position(|x| x.as_ref() == name) { Ok(i) } else { let db = self.db.get()?; @@ -150,10 +150,10 @@ impl IDb for SqliteDb { Ok(trees) } - fn snapshot(&self, base_path: &PathBuf) -> Result<()> { + fn snapshot(&self, base_path: &Path) -> Result<()> { std::fs::create_dir_all(base_path)?; let path = Engine::Sqlite - .db_path(&base_path) + .db_path(base_path) .into_os_string() .into_string() .map_err(|_| Error("invalid sqlite path string".into()))?; @@ -308,7 +308,7 @@ impl IDb for SqliteDb { trace!("transaction done"); drop(lock); - return res; + res } } diff --git a/src/db/test.rs b/src/db/test.rs index 977dc965..91ac3522 100644 --- a/src/db/test.rs +++ b/src/db/test.rs @@ -21,7 +21,7 @@ fn test_suite(db: Db) { let res = db.transaction::<_, (), _>(|tx| { assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va); - assert_eq!(tx.insert(&tree, ka, vb).unwrap(), ()); + let _: () = tx.insert(&tree, ka, vb).unwrap(); assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), vb); @@ -33,7 +33,7 @@ fn test_suite(db: Db) { let res = db.transaction::<(), _, _>(|tx| { assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), vb); - assert_eq!(tx.insert(&tree, ka, vc).unwrap(), ()); + let _: () = tx.insert(&tree, ka, vc).unwrap(); assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), vc); @@ -145,7 +145,7 @@ fn test_sqlite_db() { use crate::sqlite_adapter::SqliteDb; let manager = r2d2_sqlite::SqliteConnectionManager::memory(); - let db = SqliteDb::new(manager, false).unwrap(); + let db = SqliteDb::open(manager, false).unwrap(); test_suite(db); } diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index a4f695a4..8b726a86 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -26,6 +26,7 @@ garage_db.workspace = true garage_api_admin.workspace = true garage_api_s3.workspace = true garage_api_k2v = { workspace = true, optional = true } +garage_api_common.workspace = true garage_block.workspace = true garage_model.workspace = true garage_net.workspace = true @@ -37,6 +38,7 @@ garage_web.workspace = true backtrace.workspace = true bytes.workspace = true bytesize.workspace = true +chrono.workspace = true timeago.workspace = true parse_duration.workspace = true hex.workspace = true @@ -47,8 +49,8 @@ sha1.workspace = true sodiumoxide.workspace = true structopt.workspace = true git-version.workspace = true - -serde.workspace = true +utoipa.workspace = true +serde_json.workspace = true futures.workspace = true tokio.workspace = true @@ -78,38 +80,42 @@ static_init.workspace = true assert-json-diff.workspace = true serde_json.workspace = true base64.workspace = true -crc32fast.workspace = true +crc-fast.workspace = true k2v-client.workspace = true [features] -default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ] +default = ["bundled-libs", "metrics", "lmdb", "sqlite", "k2v"] -k2v = [ "garage_util/k2v", "garage_api_k2v" ] +k2v = ["garage_util/k2v", "garage_api_k2v", "garage_api_admin/k2v"] # Database engines -lmdb = [ "garage_model/lmdb" ] -sqlite = [ "garage_model/sqlite" ] -fjall = [ "garage_model/fjall" ] +lmdb = ["garage_model/lmdb"] +sqlite = ["garage_model/sqlite"] +fjall = ["garage_model/fjall"] # Automatic registration and discovery via Consul API -consul-discovery = [ "garage_rpc/consul-discovery" ] +consul-discovery = ["garage_rpc/consul-discovery"] # Automatic registration and discovery via Kubernetes API -kubernetes-discovery = [ "garage_rpc/kubernetes-discovery" ] +kubernetes-discovery = ["garage_rpc/kubernetes-discovery"] # Prometheus exporter (/metrics endpoint). -metrics = [ "garage_api_admin/metrics", "opentelemetry-prometheus" ] +metrics = ["garage_api_admin/metrics", "opentelemetry-prometheus"] # Exporter for the OpenTelemetry Collector. -telemetry-otlp = [ "opentelemetry-otlp" ] +telemetry-otlp = ["opentelemetry-otlp"] # Logging to syslog -syslog = [ "syslog-tracing" ] +syslog = ["syslog-tracing"] # Logging to journald -journald = [ "tracing-journald" ] +journald = ["tracing-journald"] # NOTE: bundled-libs and system-libs should be treat as mutually exclusive; # exactly one of them should be enabled. # Use bundled libsqlite instead of linking against system-provided. -bundled-libs = [ "garage_db/bundled-libs" ] +bundled-libs = ["garage_db/bundled-libs"] # Link against system-provided libsodium and libzstd. -system-libs = [ "garage_block/system-libs", "garage_rpc/system-libs", "sodiumoxide/use-pkg-config" ] +system-libs = [ + "garage_block/system-libs", + "garage_rpc/system-libs", + "sodiumoxide/use-pkg-config", +] diff --git a/src/garage/admin/block.rs b/src/garage/admin/block.rs deleted file mode 100644 index 5f908ce4..00000000 --- a/src/garage/admin/block.rs +++ /dev/null @@ -1,243 +0,0 @@ -use garage_util::data::*; - -use garage_table::*; - -use garage_model::helper::error::{Error, OkOrBadRequest}; -use garage_model::s3::object_table::*; -use garage_model::s3::version_table::*; - -use crate::cli::*; - -use super::*; - -impl AdminRpcHandler { - pub(super) async fn handle_block_cmd(&self, cmd: &BlockOperation) -> Result { - match cmd { - BlockOperation::ListErrors => Ok(AdminRpc::BlockErrorList( - self.garage.block_manager.list_resync_errors()?, - )), - BlockOperation::Info { hash } => self.handle_block_info(hash).await, - BlockOperation::RetryNow { all, blocks } => { - self.handle_block_retry_now(*all, blocks).await - } - BlockOperation::Purge { yes, blocks } => self.handle_block_purge(*yes, blocks).await, - } - } - - async fn handle_block_info(&self, hash: &String) -> Result { - let hash = self.find_block_hash_by_prefix(hash)?; - let refcount = self.garage.block_manager.get_block_rc(&hash)?; - let block_refs = self - .garage - .block_ref_table - .get_range(&hash, None, None, 10000, Default::default()) - .await?; - let mut versions = vec![]; - let mut uploads = vec![]; - for br in block_refs { - if let Some(v) = self - .garage - .version_table - .get(&br.version, &EmptyKey) - .await? - { - if let VersionBacklink::MultipartUpload { upload_id } = &v.backlink { - if let Some(u) = self.garage.mpu_table.get(upload_id, &EmptyKey).await? { - uploads.push(u); - } - } - versions.push(Ok(v)); - } else { - versions.push(Err(br.version)); - } - } - Ok(AdminRpc::BlockInfo { - hash, - refcount, - versions, - uploads, - }) - } - - async fn handle_block_retry_now( - &self, - all: bool, - blocks: &[String], - ) -> Result { - if all { - if !blocks.is_empty() { - return Err(Error::BadRequest( - "--all was specified, cannot also specify blocks".into(), - )); - } - let blocks = self.garage.block_manager.list_resync_errors()?; - for b in blocks.iter() { - self.garage.block_manager.resync.clear_backoff(&b.hash)?; - } - Ok(AdminRpc::Ok(format!( - "{} blocks returned in queue for a retry now (check logs to see results)", - blocks.len() - ))) - } else { - for hash in blocks { - let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?; - let hash = Hash::try_from(&hash).ok_or_bad_request("invalid hash")?; - self.garage.block_manager.resync.clear_backoff(&hash)?; - } - Ok(AdminRpc::Ok(format!( - "{} blocks returned in queue for a retry now (check logs to see results)", - blocks.len() - ))) - } - } - - async fn handle_block_purge(&self, yes: bool, blocks: &[String]) -> Result { - if !yes { - return Err(Error::BadRequest( - "Pass the --yes flag to confirm block purge operation.".into(), - )); - } - - let mut obj_dels = 0; - let mut mpu_dels = 0; - let mut ver_dels = 0; - let mut br_dels = 0; - - for hash in blocks { - let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?; - let hash = Hash::try_from(&hash).ok_or_bad_request("invalid hash")?; - let block_refs = self - .garage - .block_ref_table - .get_range(&hash, None, None, 10000, Default::default()) - .await?; - - for br in block_refs { - if let Some(version) = self - .garage - .version_table - .get(&br.version, &EmptyKey) - .await? - { - self.handle_block_purge_version_backlink( - &version, - &mut obj_dels, - &mut mpu_dels, - ) - .await?; - - if !version.deleted.get() { - let deleted_version = Version::new(version.uuid, version.backlink, true); - self.garage.version_table.insert(&deleted_version).await?; - ver_dels += 1; - } - } - if !br.deleted.get() { - let mut br = br; - br.deleted.set(); - self.garage.block_ref_table.insert(&br).await?; - br_dels += 1; - } - } - } - - Ok(AdminRpc::Ok(format!( - "Purged {} blocks: marked {} block refs, {} versions, {} objects and {} multipart uploads as deleted", - blocks.len(), - br_dels, - ver_dels, - obj_dels, - mpu_dels, - ))) - } - - async fn handle_block_purge_version_backlink( - &self, - version: &Version, - obj_dels: &mut usize, - mpu_dels: &mut usize, - ) -> Result<(), Error> { - let (bucket_id, key, ov_id) = match &version.backlink { - VersionBacklink::Object { bucket_id, key } => (*bucket_id, key.clone(), version.uuid), - VersionBacklink::MultipartUpload { upload_id } => { - if let Some(mut mpu) = self.garage.mpu_table.get(upload_id, &EmptyKey).await? { - if !mpu.deleted.get() { - mpu.parts.clear(); - mpu.deleted.set(); - self.garage.mpu_table.insert(&mpu).await?; - *mpu_dels += 1; - } - (mpu.bucket_id, mpu.key.clone(), *upload_id) - } else { - return Ok(()); - } - } - }; - - if let Some(object) = self.garage.object_table.get(&bucket_id, &key).await? { - let ov = object.versions().iter().rev().find(|v| v.is_complete()); - if let Some(ov) = ov { - if ov.uuid == ov_id { - let del_uuid = gen_uuid(); - let deleted_object = Object::new( - bucket_id, - key, - vec![ObjectVersion { - uuid: del_uuid, - timestamp: ov.timestamp + 1, - state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), - }], - ); - self.garage.object_table.insert(&deleted_object).await?; - *obj_dels += 1; - } - } - } - - Ok(()) - } - - // ---- helper function ---- - fn find_block_hash_by_prefix(&self, prefix: &str) -> Result { - if prefix.len() < 4 { - return Err(Error::BadRequest( - "Please specify at least 4 characters of the block hash".into(), - )); - } - - let prefix_bin = - hex::decode(&prefix[..prefix.len() & !1]).ok_or_bad_request("invalid hash")?; - - let iter = self - .garage - .block_ref_table - .data - .store - .range(&prefix_bin[..]..) - .map_err(GarageError::from)?; - let mut found = None; - for item in iter { - let (k, _v) = item.map_err(GarageError::from)?; - let hash = Hash::try_from(&k[..32]).unwrap(); - if &hash.as_slice()[..prefix_bin.len()] != prefix_bin { - break; - } - if hex::encode(hash.as_slice()).starts_with(prefix) { - match &found { - Some(x) if *x == hash => (), - Some(_) => { - return Err(Error::BadRequest(format!( - "Several blocks match prefix `{}`", - prefix - ))); - } - None => { - found = Some(hash); - } - } - } - } - - found.ok_or_else(|| Error::BadRequest("No matching block found".into())) - } -} diff --git a/src/garage/admin/bucket.rs b/src/garage/admin/bucket.rs deleted file mode 100644 index 073329c1..00000000 --- a/src/garage/admin/bucket.rs +++ /dev/null @@ -1,500 +0,0 @@ -use std::collections::HashMap; -use std::fmt::Write; - -use garage_util::crdt::*; -use garage_util::time::*; - -use garage_table::*; - -use garage_model::bucket_alias_table::*; -use garage_model::bucket_table::*; -use garage_model::helper::error::{Error, OkOrBadRequest}; -use garage_model::permission::*; - -use crate::cli::*; - -use super::*; - -impl AdminRpcHandler { - pub(super) async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result { - match cmd { - BucketOperation::List => self.handle_list_buckets().await, - BucketOperation::Info(query) => self.handle_bucket_info(query).await, - BucketOperation::Create(query) => self.handle_create_bucket(&query.name).await, - BucketOperation::Delete(query) => self.handle_delete_bucket(query).await, - BucketOperation::Alias(query) => self.handle_alias_bucket(query).await, - BucketOperation::Unalias(query) => self.handle_unalias_bucket(query).await, - BucketOperation::Allow(query) => self.handle_bucket_allow(query).await, - BucketOperation::Deny(query) => self.handle_bucket_deny(query).await, - BucketOperation::Website(query) => self.handle_bucket_website(query).await, - BucketOperation::SetQuotas(query) => self.handle_bucket_set_quotas(query).await, - BucketOperation::CleanupIncompleteUploads(query) => { - self.handle_bucket_cleanup_incomplete_uploads(query).await - } - } - } - - async fn handle_list_buckets(&self) -> Result { - let buckets = self - .garage - .bucket_table - .get_range( - &EmptyKey, - None, - Some(DeletedFilter::NotDeleted), - 10000, - EnumerationOrder::Forward, - ) - .await?; - - Ok(AdminRpc::BucketList(buckets)) - } - - async fn handle_bucket_info(&self, query: &BucketOpt) -> Result { - let bucket_id = self - .garage - .bucket_helper() - .admin_get_existing_matching_bucket(&query.name) - .await?; - - let bucket = self - .garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let counters = self - .garage - .object_counter_table - .table - .get(&bucket_id, &EmptyKey) - .await? - .map(|x| x.filtered_values(&self.garage.system.cluster_layout())) - .unwrap_or_default(); - - let mpu_counters = self - .garage - .mpu_counter_table - .table - .get(&bucket_id, &EmptyKey) - .await? - .map(|x| x.filtered_values(&self.garage.system.cluster_layout())) - .unwrap_or_default(); - - let mut relevant_keys = HashMap::new(); - for (k, _) in bucket - .state - .as_option() - .unwrap() - .authorized_keys - .items() - .iter() - { - if let Some(key) = self - .garage - .key_table - .get(&EmptyKey, k) - .await? - .filter(|k| !k.is_deleted()) - { - relevant_keys.insert(k.clone(), key); - } - } - for ((k, _), _, _) in bucket - .state - .as_option() - .unwrap() - .local_aliases - .items() - .iter() - { - if relevant_keys.contains_key(k) { - continue; - } - if let Some(key) = self.garage.key_table.get(&EmptyKey, k).await? { - relevant_keys.insert(k.clone(), key); - } - } - - Ok(AdminRpc::BucketInfo { - bucket, - relevant_keys, - counters, - mpu_counters, - }) - } - - #[allow(clippy::ptr_arg)] - async fn handle_create_bucket(&self, name: &String) -> Result { - if !is_valid_bucket_name(name, self.garage.config.allow_punycode) { - return Err(Error::BadRequest(format!( - "{}: {}", - name, INVALID_BUCKET_NAME_MESSAGE - ))); - } - - let helper = self.garage.locked_helper().await; - - if let Some(alias) = self.garage.bucket_alias_table.get(&EmptyKey, name).await? { - if alias.state.get().is_some() { - return Err(Error::BadRequest(format!("Bucket {} already exists", name))); - } - } - - // ---- done checking, now commit ---- - - let bucket = Bucket::new(); - self.garage.bucket_table.insert(&bucket).await?; - - helper.set_global_bucket_alias(bucket.id, name).await?; - - Ok(AdminRpc::Ok(format!("Bucket {} was created.", name))) - } - - async fn handle_delete_bucket(&self, query: &DeleteBucketOpt) -> Result { - let helper = self.garage.locked_helper().await; - - let bucket_id = helper - .bucket() - .admin_get_existing_matching_bucket(&query.name) - .await?; - - // Get the alias, but keep in minde here the bucket name - // given in parameter can also be directly the bucket's ID. - // In that case bucket_alias will be None, and - // we can still delete the bucket if it has zero aliases - // (a condition which we try to prevent but that could still happen somehow). - // We just won't try to delete an alias entry because there isn't one. - let bucket_alias = self - .garage - .bucket_alias_table - .get(&EmptyKey, &query.name) - .await?; - - // Check bucket doesn't have other aliases - let mut bucket = helper.bucket().get_existing_bucket(bucket_id).await?; - let bucket_state = bucket.state.as_option().unwrap(); - if bucket_state - .aliases - .items() - .iter() - .filter(|(_, _, active)| *active) - .any(|(name, _, _)| name != &query.name) - { - return Err(Error::BadRequest(format!("Bucket {} still has other global aliases. Use `bucket unalias` to delete them one by one.", query.name))); - } - if bucket_state - .local_aliases - .items() - .iter() - .any(|(_, _, active)| *active) - { - return Err(Error::BadRequest(format!("Bucket {} still has other local aliases. Use `bucket unalias` to delete them one by one.", query.name))); - } - - // Check bucket is empty - if !helper.bucket().is_bucket_empty(bucket_id).await? { - return Err(Error::BadRequest(format!( - "Bucket {} is not empty", - query.name - ))); - } - - if !query.yes { - return Err(Error::BadRequest( - "Add --yes flag to really perform this operation".to_string(), - )); - } - - // --- done checking, now commit --- - // 1. delete authorization from keys that had access - for (key_id, _) in bucket.authorized_keys() { - helper - .set_bucket_key_permissions(bucket.id, key_id, BucketKeyPerm::NO_PERMISSIONS) - .await?; - } - - // 2. delete bucket alias - if bucket_alias.is_some() { - helper - .purge_global_bucket_alias(bucket_id, &query.name) - .await?; - } - - // 3. delete bucket - bucket.state = Deletable::delete(); - self.garage.bucket_table.insert(&bucket).await?; - - Ok(AdminRpc::Ok(format!("Bucket {} was deleted.", query.name))) - } - - async fn handle_alias_bucket(&self, query: &AliasBucketOpt) -> Result { - let helper = self.garage.locked_helper().await; - - let bucket_id = helper - .bucket() - .admin_get_existing_matching_bucket(&query.existing_bucket) - .await?; - - if let Some(key_pattern) = &query.local { - let key = helper.key().get_existing_matching_key(key_pattern).await?; - - helper - .set_local_bucket_alias(bucket_id, &key.key_id, &query.new_name) - .await?; - Ok(AdminRpc::Ok(format!( - "Alias {} now points to bucket {:?} in namespace of key {}", - query.new_name, bucket_id, key.key_id - ))) - } else { - helper - .set_global_bucket_alias(bucket_id, &query.new_name) - .await?; - Ok(AdminRpc::Ok(format!( - "Alias {} now points to bucket {:?}", - query.new_name, bucket_id - ))) - } - } - - async fn handle_unalias_bucket(&self, query: &UnaliasBucketOpt) -> Result { - let helper = self.garage.locked_helper().await; - - if let Some(key_pattern) = &query.local { - let key = helper.key().get_existing_matching_key(key_pattern).await?; - - let bucket_id = key - .state - .as_option() - .unwrap() - .local_aliases - .get(&query.name) - .cloned() - .flatten() - .ok_or_bad_request("Bucket not found")?; - - helper - .unset_local_bucket_alias(bucket_id, &key.key_id, &query.name) - .await?; - - Ok(AdminRpc::Ok(format!( - "Alias {} no longer points to bucket {:?} in namespace of key {}", - &query.name, bucket_id, key.key_id - ))) - } else { - let bucket_id = helper - .bucket() - .resolve_global_bucket_name(&query.name) - .await? - .ok_or_bad_request("Bucket not found")?; - - helper - .unset_global_bucket_alias(bucket_id, &query.name) - .await?; - - Ok(AdminRpc::Ok(format!( - "Alias {} no longer points to bucket {:?}", - &query.name, bucket_id - ))) - } - } - - async fn handle_bucket_allow(&self, query: &PermBucketOpt) -> Result { - let helper = self.garage.locked_helper().await; - - let bucket_id = helper - .bucket() - .admin_get_existing_matching_bucket(&query.bucket) - .await?; - let key = helper - .key() - .get_existing_matching_key(&query.key_pattern) - .await?; - - let allow_read = query.read || key.allow_read(&bucket_id); - let allow_write = query.write || key.allow_write(&bucket_id); - let allow_owner = query.owner || key.allow_owner(&bucket_id); - - helper - .set_bucket_key_permissions( - bucket_id, - &key.key_id, - BucketKeyPerm { - timestamp: now_msec(), - allow_read, - allow_write, - allow_owner, - }, - ) - .await?; - - Ok(AdminRpc::Ok(format!( - "New permissions for {} on {}: read {}, write {}, owner {}.", - &key.key_id, &query.bucket, allow_read, allow_write, allow_owner - ))) - } - - async fn handle_bucket_deny(&self, query: &PermBucketOpt) -> Result { - let helper = self.garage.locked_helper().await; - - let bucket_id = helper - .bucket() - .admin_get_existing_matching_bucket(&query.bucket) - .await?; - let key = helper - .key() - .get_existing_matching_key(&query.key_pattern) - .await?; - - let allow_read = !query.read && key.allow_read(&bucket_id); - let allow_write = !query.write && key.allow_write(&bucket_id); - let allow_owner = !query.owner && key.allow_owner(&bucket_id); - - helper - .set_bucket_key_permissions( - bucket_id, - &key.key_id, - BucketKeyPerm { - timestamp: now_msec(), - allow_read, - allow_write, - allow_owner, - }, - ) - .await?; - - Ok(AdminRpc::Ok(format!( - "New permissions for {} on {}: read {}, write {}, owner {}.", - &key.key_id, &query.bucket, allow_read, allow_write, allow_owner - ))) - } - - async fn handle_bucket_website(&self, query: &WebsiteOpt) -> Result { - let bucket_id = self - .garage - .bucket_helper() - .admin_get_existing_matching_bucket(&query.bucket) - .await?; - - let mut bucket = self - .garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - let bucket_state = bucket.state.as_option_mut().unwrap(); - - if !(query.allow ^ query.deny) { - return Err(Error::BadRequest( - "You must specify exactly one flag, either --allow or --deny".to_string(), - )); - } - - let website = if query.allow { - Some(WebsiteConfig { - index_document: query.index_document.clone(), - error_document: query.error_document.clone(), - }) - } else { - None - }; - - bucket_state.website_config.update(website); - self.garage.bucket_table.insert(&bucket).await?; - - let msg = if query.allow { - format!("Website access allowed for {}", &query.bucket) - } else { - format!("Website access denied for {}", &query.bucket) - }; - - Ok(AdminRpc::Ok(msg)) - } - - async fn handle_bucket_set_quotas(&self, query: &SetQuotasOpt) -> Result { - let bucket_id = self - .garage - .bucket_helper() - .admin_get_existing_matching_bucket(&query.bucket) - .await?; - - let mut bucket = self - .garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - let bucket_state = bucket.state.as_option_mut().unwrap(); - - if query.max_size.is_none() && query.max_objects.is_none() { - return Err(Error::BadRequest( - "You must specify either --max-size or --max-objects (or both) for this command to do something.".to_string(), - )); - } - - let mut quotas = bucket_state.quotas.get().clone(); - - match query.max_size.as_ref().map(String::as_ref) { - Some("none") => quotas.max_size = None, - Some(v) => { - let bs = v - .parse::() - .ok_or_bad_request(format!("Invalid size specified: {}", v))?; - quotas.max_size = Some(bs.as_u64()); - } - _ => (), - } - - match query.max_objects.as_ref().map(String::as_ref) { - Some("none") => quotas.max_objects = None, - Some(v) => { - let mo = v - .parse::() - .ok_or_bad_request(format!("Invalid number specified: {}", v))?; - quotas.max_objects = Some(mo); - } - _ => (), - } - - bucket_state.quotas.update(quotas); - self.garage.bucket_table.insert(&bucket).await?; - - Ok(AdminRpc::Ok(format!( - "Quotas updated for {}", - &query.bucket - ))) - } - - async fn handle_bucket_cleanup_incomplete_uploads( - &self, - query: &CleanupIncompleteUploadsOpt, - ) -> Result { - let mut bucket_ids = vec![]; - for b in query.buckets.iter() { - bucket_ids.push( - self.garage - .bucket_helper() - .admin_get_existing_matching_bucket(b) - .await?, - ); - } - - let duration = parse_duration::parse::parse(&query.older_than) - .ok_or_bad_request("Invalid duration passed for --older-than parameter")?; - - let mut ret = String::new(); - for bucket in bucket_ids { - let count = self - .garage - .bucket_helper() - .cleanup_incomplete_uploads(&bucket, duration) - .await?; - writeln!( - &mut ret, - "Bucket {:?}: {} incomplete uploads aborted", - bucket, count - ) - .unwrap(); - } - - Ok(AdminRpc::Ok(ret)) - } -} diff --git a/src/garage/admin/key.rs b/src/garage/admin/key.rs deleted file mode 100644 index bd010d2c..00000000 --- a/src/garage/admin/key.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::collections::HashMap; - -use garage_table::*; - -use garage_model::helper::error::*; -use garage_model::key_table::*; - -use crate::cli::*; - -use super::*; - -impl AdminRpcHandler { - pub(super) async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result { - match cmd { - KeyOperation::List => self.handle_list_keys().await, - KeyOperation::Info(query) => self.handle_key_info(query).await, - KeyOperation::Create(query) => self.handle_create_key(query).await, - KeyOperation::Rename(query) => self.handle_rename_key(query).await, - KeyOperation::Delete(query) => self.handle_delete_key(query).await, - KeyOperation::Allow(query) => self.handle_allow_key(query).await, - KeyOperation::Deny(query) => self.handle_deny_key(query).await, - KeyOperation::Import(query) => self.handle_import_key(query).await, - } - } - - async fn handle_list_keys(&self) -> Result { - let key_ids = self - .garage - .key_table - .get_range( - &EmptyKey, - None, - Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), - 10000, - EnumerationOrder::Forward, - ) - .await? - .iter() - .map(|k| (k.key_id.to_string(), k.params().unwrap().name.get().clone())) - .collect::>(); - Ok(AdminRpc::KeyList(key_ids)) - } - - async fn handle_key_info(&self, query: &KeyInfoOpt) -> Result { - let mut key = self - .garage - .key_helper() - .get_existing_matching_key(&query.key_pattern) - .await?; - - if !query.show_secret { - key.state.as_option_mut().unwrap().secret_key = "(redacted)".into(); - } - - self.key_info_result(key).await - } - - async fn handle_create_key(&self, query: &KeyNewOpt) -> Result { - let key = Key::new(&query.name); - self.garage.key_table.insert(&key).await?; - self.key_info_result(key).await - } - - async fn handle_rename_key(&self, query: &KeyRenameOpt) -> Result { - let mut key = self - .garage - .key_helper() - .get_existing_matching_key(&query.key_pattern) - .await?; - key.params_mut() - .unwrap() - .name - .update(query.new_name.clone()); - self.garage.key_table.insert(&key).await?; - self.key_info_result(key).await - } - - async fn handle_delete_key(&self, query: &KeyDeleteOpt) -> Result { - let helper = self.garage.locked_helper().await; - - let mut key = helper - .key() - .get_existing_matching_key(&query.key_pattern) - .await?; - - if !query.yes { - return Err(Error::BadRequest( - "Add --yes flag to really perform this operation".to_string(), - )); - } - - helper.delete_key(&mut key).await?; - - Ok(AdminRpc::Ok(format!( - "Key {} was deleted successfully.", - key.key_id - ))) - } - - async fn handle_allow_key(&self, query: &KeyPermOpt) -> Result { - let mut key = self - .garage - .key_helper() - .get_existing_matching_key(&query.key_pattern) - .await?; - if query.create_bucket { - key.params_mut().unwrap().allow_create_bucket.update(true); - } - self.garage.key_table.insert(&key).await?; - self.key_info_result(key).await - } - - async fn handle_deny_key(&self, query: &KeyPermOpt) -> Result { - let mut key = self - .garage - .key_helper() - .get_existing_matching_key(&query.key_pattern) - .await?; - if query.create_bucket { - key.params_mut().unwrap().allow_create_bucket.update(false); - } - self.garage.key_table.insert(&key).await?; - self.key_info_result(key).await - } - - async fn handle_import_key(&self, query: &KeyImportOpt) -> Result { - if !query.yes { - return Err(Error::BadRequest("This command is intended to re-import keys that were previously generated by Garage. If you want to create a new key, use `garage key new` instead. Add the --yes flag if you really want to re-import a key.".to_string())); - } - - let prev_key = self.garage.key_table.get(&EmptyKey, &query.key_id).await?; - if prev_key.is_some() { - return Err(Error::BadRequest(format!("Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.", query.key_id))); - } - - let imported_key = Key::import(&query.key_id, &query.secret_key, &query.name) - .ok_or_bad_request("Invalid key format")?; - self.garage.key_table.insert(&imported_key).await?; - - self.key_info_result(imported_key).await - } - - async fn key_info_result(&self, key: Key) -> Result { - let mut relevant_buckets = HashMap::new(); - - for (id, _) in key - .state - .as_option() - .unwrap() - .authorized_buckets - .items() - .iter() - { - if let Some(b) = self.garage.bucket_table.get(&EmptyKey, id).await? { - relevant_buckets.insert(*id, b); - } - } - - Ok(AdminRpc::KeyInfo(key, relevant_buckets)) - } -} diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs deleted file mode 100644 index 6ae8fa88..00000000 --- a/src/garage/admin/mod.rs +++ /dev/null @@ -1,545 +0,0 @@ -mod block; -mod bucket; -mod key; - -use std::collections::HashMap; -use std::fmt::Write; -use std::future::Future; -use std::sync::Arc; - -use futures::future::FutureExt; - -use serde::{Deserialize, Serialize}; - -use format_table::format_table_to_string; - -use garage_util::background::BackgroundRunner; -use garage_util::data::*; -use garage_util::error::Error as GarageError; - -use garage_table::replication::*; -use garage_table::*; - -use garage_rpc::layout::PARTITION_BITS; -use garage_rpc::*; - -use garage_block::manager::BlockResyncErrorInfo; - -use garage_model::bucket_table::*; -use garage_model::garage::Garage; -use garage_model::helper::error::{Error, OkOrBadRequest}; -use garage_model::key_table::*; -use garage_model::s3::mpu_table::MultipartUpload; -use garage_model::s3::version_table::Version; - -use crate::cli::*; -use crate::repair::online::launch_online_repair; - -pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc"; - -#[derive(Debug, Serialize, Deserialize)] -#[allow(clippy::large_enum_variant)] -pub enum AdminRpc { - BucketOperation(BucketOperation), - KeyOperation(KeyOperation), - LaunchRepair(RepairOpt), - Stats(StatsOpt), - Worker(WorkerOperation), - BlockOperation(BlockOperation), - MetaOperation(MetaOperation), - - // Replies - Ok(String), - BucketList(Vec), - BucketInfo { - bucket: Bucket, - relevant_keys: HashMap, - counters: HashMap, - mpu_counters: HashMap, - }, - KeyList(Vec<(String, String)>), - KeyInfo(Key, HashMap), - WorkerList( - HashMap, - WorkerListOpt, - ), - WorkerVars(Vec<(Uuid, String, String)>), - WorkerInfo(usize, garage_util::background::WorkerInfo), - BlockErrorList(Vec), - BlockInfo { - hash: Hash, - refcount: u64, - versions: Vec>, - uploads: Vec, - }, -} - -impl Rpc for AdminRpc { - type Response = Result; -} - -pub struct AdminRpcHandler { - garage: Arc, - background: Arc, - endpoint: Arc>, -} - -impl AdminRpcHandler { - pub fn new(garage: Arc, background: Arc) -> Arc { - let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into()); - let admin = Arc::new(Self { - garage, - background, - endpoint, - }); - admin.endpoint.set_handler(admin.clone()); - admin - } - - // ================ REPAIR COMMANDS ==================== - - async fn handle_launch_repair(self: &Arc, opt: RepairOpt) -> Result { - if !opt.yes { - return Err(Error::BadRequest( - "Please provide the --yes flag to initiate repair operations.".to_string(), - )); - } - if opt.all_nodes { - let mut opt_to_send = opt.clone(); - opt_to_send.all_nodes = false; - - let mut failures = vec![]; - let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); - for node in all_nodes.iter() { - let node = (*node).into(); - let resp = self - .endpoint - .call( - &node, - AdminRpc::LaunchRepair(opt_to_send.clone()), - PRIO_NORMAL, - ) - .await; - if !matches!(resp, Ok(Ok(_))) { - failures.push(node); - } - } - if failures.is_empty() { - Ok(AdminRpc::Ok("Repair launched on all nodes".to_string())) - } else { - Err(Error::BadRequest(format!( - "Could not launch repair on nodes: {:?} (launched successfully on other nodes)", - failures - ))) - } - } else { - launch_online_repair(&self.garage, &self.background, opt).await?; - Ok(AdminRpc::Ok(format!( - "Repair launched on {:?}", - self.garage.system.id - ))) - } - } - - // ================ STATS COMMANDS ==================== - - async fn handle_stats(&self, opt: StatsOpt) -> Result { - if opt.all_nodes { - let mut ret = String::new(); - let mut all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); - for node in self.garage.system.get_known_nodes().iter() { - if node.is_up && !all_nodes.contains(&node.id) { - all_nodes.push(node.id); - } - } - - for node in all_nodes.iter() { - let mut opt = opt.clone(); - opt.all_nodes = false; - opt.skip_global = true; - - writeln!(&mut ret, "\n======================").unwrap(); - writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); - - let node_id = (*node).into(); - match self - .endpoint - .call(&node_id, AdminRpc::Stats(opt), PRIO_NORMAL) - .await - { - Ok(Ok(AdminRpc::Ok(s))) => writeln!(&mut ret, "{}", s).unwrap(), - Ok(Ok(x)) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(), - Ok(Err(e)) => writeln!(&mut ret, "Remote error: {}", e).unwrap(), - Err(e) => writeln!(&mut ret, "Network error: {}", e).unwrap(), - } - } - - writeln!(&mut ret, "\n======================").unwrap(); - write!( - &mut ret, - "Cluster statistics:\n\n{}", - self.gather_cluster_stats() - ) - .unwrap(); - - Ok(AdminRpc::Ok(ret)) - } else { - Ok(AdminRpc::Ok(self.gather_stats_local(opt)?)) - } - } - - fn gather_stats_local(&self, opt: StatsOpt) -> Result { - let mut ret = String::new(); - writeln!( - &mut ret, - "\nGarage version: {} [features: {}]\nRust compiler version: {}", - garage_util::version::garage_version(), - garage_util::version::garage_features() - .map(|list| list.join(", ")) - .unwrap_or_else(|| "(unknown)".into()), - garage_util::version::rust_version(), - ) - .unwrap(); - - writeln!(&mut ret, "\nDatabase engine: {}", self.garage.db.engine()).unwrap(); - - // Gather table statistics - let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; - table.push(self.gather_table_stats(&self.garage.bucket_table)?); - table.push(self.gather_table_stats(&self.garage.key_table)?); - table.push(self.gather_table_stats(&self.garage.object_table)?); - table.push(self.gather_table_stats(&self.garage.version_table)?); - table.push(self.gather_table_stats(&self.garage.block_ref_table)?); - write!( - &mut ret, - "\nTable stats:\n{}", - format_table_to_string(table) - ) - .unwrap(); - - // Gather block manager statistics - writeln!(&mut ret, "\nBlock manager stats:").unwrap(); - let rc_len = self.garage.block_manager.rc_approximate_len()?.to_string(); - - writeln!( - &mut ret, - " number of RC entries (~= number of blocks): {}", - rc_len - ) - .unwrap(); - writeln!( - &mut ret, - " resync queue length: {}", - self.garage.block_manager.resync.queue_approximate_len()? - ) - .unwrap(); - writeln!( - &mut ret, - " blocks with resync errors: {}", - self.garage.block_manager.resync.errors_approximate_len()? - ) - .unwrap(); - - if !opt.skip_global { - write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); - } - - Ok(ret) - } - - fn gather_cluster_stats(&self) -> String { - let mut ret = String::new(); - - // Gather storage node and free space statistics for current nodes - let layout = &self.garage.system.cluster_layout(); - let mut node_partition_count = HashMap::::new(); - for short_id in layout.current().ring_assignment_data.iter() { - let id = layout.current().node_id_vec[*short_id as usize]; - *node_partition_count.entry(id).or_default() += 1; - } - let node_info = self - .garage - .system - .get_known_nodes() - .into_iter() - .map(|n| (n.id, n)) - .collect::>(); - - let mut table = vec![" ID\tHostname\tZone\tCapacity\tPart.\tDataAvail\tMetaAvail".into()]; - for (id, parts) in node_partition_count.iter() { - let info = node_info.get(id); - let status = info.map(|x| &x.status); - let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref()); - let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?"); - let zone = role.map(|x| x.zone.as_str()).unwrap_or("?"); - let capacity = role - .map(|x| x.capacity_string()) - .unwrap_or_else(|| "?".into()); - let avail_str = |x| match x { - Some((avail, total)) => { - let pct = (avail as f64) / (total as f64) * 100.; - let avail = bytesize::ByteSize::b(avail); - let total = bytesize::ByteSize::b(total); - format!("{}/{} ({:.1}%)", avail, total, pct) - } - None => "?".into(), - }; - let data_avail = avail_str(status.and_then(|x| x.data_disk_avail)); - let meta_avail = avail_str(status.and_then(|x| x.meta_disk_avail)); - table.push(format!( - " {:?}\t{}\t{}\t{}\t{}\t{}\t{}", - id, hostname, zone, capacity, parts, data_avail, meta_avail - )); - } - write!( - &mut ret, - "Storage nodes:\n{}", - format_table_to_string(table) - ) - .unwrap(); - - let meta_part_avail = node_partition_count - .iter() - .filter_map(|(id, parts)| { - node_info - .get(id) - .and_then(|x| x.status.meta_disk_avail) - .map(|c| c.0 / *parts) - }) - .collect::>(); - let data_part_avail = node_partition_count - .iter() - .filter_map(|(id, parts)| { - node_info - .get(id) - .and_then(|x| x.status.data_disk_avail) - .map(|c| c.0 / *parts) - }) - .collect::>(); - if !meta_part_avail.is_empty() && !data_part_avail.is_empty() { - let meta_avail = - bytesize::ByteSize(meta_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS)); - let data_avail = - bytesize::ByteSize(data_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS)); - writeln!( - &mut ret, - "\nEstimated available storage space cluster-wide (might be lower in practice):" - ) - .unwrap(); - if meta_part_avail.len() < node_partition_count.len() - || data_part_avail.len() < node_partition_count.len() - { - writeln!(&mut ret, " data: < {}", data_avail).unwrap(); - writeln!(&mut ret, " metadata: < {}", meta_avail).unwrap(); - writeln!(&mut ret, "A precise estimate could not be given as information is missing for some storage nodes.").unwrap(); - } else { - writeln!(&mut ret, " data: {}", data_avail).unwrap(); - writeln!(&mut ret, " metadata: {}", meta_avail).unwrap(); - } - } - - ret - } - - fn gather_table_stats(&self, t: &Arc>) -> Result - where - F: TableSchema + 'static, - R: TableReplication + 'static, - { - let data_len = t - .data - .store - .approximate_len() - .map_err(GarageError::from)? - .to_string(); - let mkl_len = t.merkle_updater.merkle_tree_approximate_len()?.to_string(); - - Ok(format!( - " {}\t{}\t{}\t{}\t{}", - F::TABLE_NAME, - data_len, - mkl_len, - t.merkle_updater.todo_approximate_len()?, - t.data.gc_todo_approximate_len()? - )) - } - - // ================ WORKER COMMANDS ==================== - - async fn handle_worker_cmd(&self, cmd: &WorkerOperation) -> Result { - match cmd { - WorkerOperation::List { opt } => { - let workers = self.background.get_worker_info(); - Ok(AdminRpc::WorkerList(workers, *opt)) - } - WorkerOperation::Info { tid } => { - let info = self - .background - .get_worker_info() - .get(tid) - .ok_or_bad_request(format!("No worker with TID {}", tid))? - .clone(); - Ok(AdminRpc::WorkerInfo(*tid, info)) - } - WorkerOperation::Get { - all_nodes, - variable, - } => self.handle_get_var(*all_nodes, variable).await, - WorkerOperation::Set { - all_nodes, - variable, - value, - } => self.handle_set_var(*all_nodes, variable, value).await, - } - } - - async fn handle_get_var( - &self, - all_nodes: bool, - variable: &Option, - ) -> Result { - if all_nodes { - let mut ret = vec![]; - let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); - for node in all_nodes.iter() { - let node = (*node).into(); - match self - .endpoint - .call( - &node, - AdminRpc::Worker(WorkerOperation::Get { - all_nodes: false, - variable: variable.clone(), - }), - PRIO_NORMAL, - ) - .await?? - { - AdminRpc::WorkerVars(v) => ret.extend(v), - m => return Err(GarageError::unexpected_rpc_message(m).into()), - } - } - Ok(AdminRpc::WorkerVars(ret)) - } else { - #[allow(clippy::collapsible_else_if)] - if let Some(v) = variable { - Ok(AdminRpc::WorkerVars(vec![( - self.garage.system.id, - v.clone(), - self.garage.bg_vars.get(v)?, - )])) - } else { - let mut vars = self.garage.bg_vars.get_all(); - vars.sort(); - Ok(AdminRpc::WorkerVars( - vars.into_iter() - .map(|(k, v)| (self.garage.system.id, k.to_string(), v)) - .collect(), - )) - } - } - } - - async fn handle_set_var( - &self, - all_nodes: bool, - variable: &str, - value: &str, - ) -> Result { - if all_nodes { - let mut ret = vec![]; - let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); - for node in all_nodes.iter() { - let node = (*node).into(); - match self - .endpoint - .call( - &node, - AdminRpc::Worker(WorkerOperation::Set { - all_nodes: false, - variable: variable.to_string(), - value: value.to_string(), - }), - PRIO_NORMAL, - ) - .await?? - { - AdminRpc::WorkerVars(v) => ret.extend(v), - m => return Err(GarageError::unexpected_rpc_message(m).into()), - } - } - Ok(AdminRpc::WorkerVars(ret)) - } else { - self.garage.bg_vars.set(variable, value)?; - Ok(AdminRpc::WorkerVars(vec![( - self.garage.system.id, - variable.to_string(), - value.to_string(), - )])) - } - } - - // ================ META DB COMMANDS ==================== - - async fn handle_meta_cmd(self: &Arc, mo: &MetaOperation) -> Result { - match mo { - MetaOperation::Snapshot { all: true } => { - let to = self.garage.system.cluster_layout().all_nodes().to_vec(); - - let resps = futures::future::join_all(to.iter().map(|to| async move { - let to = (*to).into(); - self.endpoint - .call( - &to, - AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }), - PRIO_NORMAL, - ) - .await? - })) - .await; - - let mut ret = vec![]; - for (to, resp) in to.iter().zip(resps.iter()) { - let res_str = match resp { - Ok(_) => "ok".to_string(), - Err(e) => format!("error: {}", e), - }; - ret.push(format!("{:?}\t{}", to, res_str)); - } - - if resps.iter().any(Result::is_err) { - Err(GarageError::Message(format_table_to_string(ret)).into()) - } else { - Ok(AdminRpc::Ok(format_table_to_string(ret))) - } - } - MetaOperation::Snapshot { all: false } => { - garage_model::snapshot::async_snapshot_metadata(&self.garage).await?; - Ok(AdminRpc::Ok("Snapshot has been saved.".into())) - } - } - } -} - -impl EndpointHandler for AdminRpcHandler { - fn handle( - self: &Arc, - message: &AdminRpc, - _from: NodeID, - ) -> impl Future> + Send { - let self2 = self.clone(); - async move { - match message { - AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await, - AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await, - AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt.clone()).await, - AdminRpc::Stats(opt) => self2.handle_stats(opt.clone()).await, - AdminRpc::Worker(wo) => self2.handle_worker_cmd(wo).await, - AdminRpc::BlockOperation(bo) => self2.handle_block_cmd(bo).await, - AdminRpc::MetaOperation(mo) => self2.handle_meta_cmd(mo).await, - m => Err(GarageError::unexpected_rpc_message(m).into()), - } - } - .boxed() - } -} diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs deleted file mode 100644 index 44d3d96c..00000000 --- a/src/garage/cli/cmd.rs +++ /dev/null @@ -1,280 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::time::Duration; - -use format_table::format_table; -use garage_util::error::*; - -use garage_rpc::layout::*; -use garage_rpc::system::*; -use garage_rpc::*; - -use garage_model::helper::error::Error as HelperError; - -use crate::admin::*; -use crate::cli::*; - -pub async fn cli_command_dispatch( - cmd: Command, - system_rpc_endpoint: &Endpoint, - admin_rpc_endpoint: &Endpoint, - rpc_host: NodeID, -) -> Result<(), HelperError> { - match cmd { - Command::Status => Ok(cmd_status(system_rpc_endpoint, rpc_host).await?), - Command::Node(NodeOperation::Connect(connect_opt)) => { - Ok(cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await?) - } - Command::Layout(layout_opt) => { - Ok(cli_layout_command_dispatch(layout_opt, system_rpc_endpoint, rpc_host).await?) - } - Command::Bucket(bo) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await - } - Command::Key(ko) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await - } - Command::Repair(ro) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await - } - Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await, - Command::Worker(wo) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Worker(wo)).await, - Command::Block(bo) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BlockOperation(bo)).await - } - Command::Meta(mo) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::MetaOperation(mo)).await - } - _ => unreachable!(), - } -} - -pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> Result<(), Error> { - let status = fetch_status(rpc_cli, rpc_host).await?; - let layout = fetch_layout(rpc_cli, rpc_host).await?; - - println!("==== HEALTHY NODES ===="); - let mut healthy_nodes = - vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; - for adv in status.iter().filter(|adv| adv.is_up) { - let host = adv.status.hostname.as_deref().unwrap_or("?"); - let addr = match adv.addr { - Some(addr) => addr.to_string(), - None => "N/A".to_string(), - }; - if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { - let data_avail = match &adv.status.data_disk_avail { - _ if cfg.capacity.is_none() => "N/A".into(), - Some((avail, total)) => { - let pct = (*avail as f64) / (*total as f64) * 100.; - let avail = bytesize::ByteSize::b(*avail); - format!("{} ({:.1}%)", avail, pct) - } - None => "?".into(), - }; - healthy_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", - id = adv.id, - host = host, - addr = addr, - tags = cfg.tags.join(","), - zone = cfg.zone, - capacity = cfg.capacity_string(), - data_avail = data_avail, - )); - } else { - let prev_role = layout - .versions - .iter() - .rev() - .find_map(|x| match x.roles.get(&adv.id) { - Some(NodeRoleV(Some(cfg))) => Some(cfg), - _ => None, - }); - if let Some(cfg) = prev_role { - healthy_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", - id = adv.id, - host = host, - addr = addr, - tags = cfg.tags.join(","), - zone = cfg.zone, - )); - } else { - let new_role = match layout.staging.get().roles.get(&adv.id) { - Some(NodeRoleV(Some(_))) => "pending...", - _ => "NO ROLE ASSIGNED", - }; - healthy_nodes.push(format!( - "{id:?}\t{h}\t{addr}\t\t\t{new_role}", - id = adv.id, - h = host, - addr = addr, - new_role = new_role, - )); - } - } - } - format_table(healthy_nodes); - - // Determine which nodes are unhealthy and print that to stdout - let status_map = status - .iter() - .map(|adv| (adv.id, adv)) - .collect::>(); - - let tf = timeago::Formatter::new(); - let mut drain_msg = false; - let mut failed_nodes = vec!["ID\tHostname\tTags\tZone\tCapacity\tLast seen".to_string()]; - let mut listed = HashSet::new(); - for ver in layout.versions.iter().rev() { - for (node, _, role) in ver.roles.items().iter() { - let cfg = match role { - NodeRoleV(Some(role)) if role.capacity.is_some() => role, - _ => continue, - }; - - if listed.contains(node) { - continue; - } - listed.insert(*node); - - let adv = status_map.get(node); - if adv.map(|x| x.is_up).unwrap_or(false) { - continue; - } - - // Node is in a layout version, is not a gateway node, and is not up: - // it is in a failed state, add proper line to the output - let (host, last_seen) = match adv { - Some(adv) => ( - adv.status.hostname.as_deref().unwrap_or("?"), - adv.last_seen_secs_ago - .map(|s| tf.convert(Duration::from_secs(s))) - .unwrap_or_else(|| "never seen".into()), - ), - None => ("??", "never seen".into()), - }; - let capacity = if ver.version == layout.current().version { - cfg.capacity_string() - } else { - drain_msg = true; - "draining metadata...".to_string() - }; - failed_nodes.push(format!( - "{id:?}\t{host}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", - id = node, - host = host, - tags = cfg.tags.join(","), - zone = cfg.zone, - capacity = capacity, - last_seen = last_seen, - )); - } - } - - if failed_nodes.len() > 1 { - println!("\n==== FAILED NODES ===="); - format_table(failed_nodes); - if drain_msg { - println!(); - println!("Your cluster is expecting to drain data from nodes that are currently unavailable."); - println!("If these nodes are definitely dead, please review the layout history with"); - println!( - "`garage layout history` and use `garage layout skip-dead-nodes` to force progress." - ); - } - } - - if print_staging_role_changes(&layout) { - println!(); - println!("Please use `garage layout show` to check the proposed new layout and apply it."); - println!(); - } - - Ok(()) -} - -pub async fn cmd_connect( - rpc_cli: &Endpoint, - rpc_host: NodeID, - args: ConnectNodeOpt, -) -> Result<(), Error> { - match rpc_cli - .call(&rpc_host, SystemRpc::Connect(args.node), PRIO_NORMAL) - .await?? - { - SystemRpc::Ok => { - println!("Success."); - Ok(()) - } - m => Err(Error::unexpected_rpc_message(m)), - } -} - -pub async fn cmd_admin( - rpc_cli: &Endpoint, - rpc_host: NodeID, - args: AdminRpc, -) -> Result<(), HelperError> { - match rpc_cli.call(&rpc_host, args, PRIO_NORMAL).await?? { - AdminRpc::Ok(msg) => { - println!("{}", msg); - } - AdminRpc::BucketList(bl) => { - print_bucket_list(bl); - } - AdminRpc::BucketInfo { - bucket, - relevant_keys, - counters, - mpu_counters, - } => { - print_bucket_info(&bucket, &relevant_keys, &counters, &mpu_counters); - } - AdminRpc::KeyList(kl) => { - print_key_list(kl); - } - AdminRpc::KeyInfo(key, rb) => { - print_key_info(&key, &rb); - } - AdminRpc::WorkerList(wi, wlo) => { - print_worker_list(wi, wlo); - } - AdminRpc::WorkerVars(wv) => { - print_worker_vars(wv); - } - AdminRpc::WorkerInfo(tid, wi) => { - print_worker_info(tid, wi); - } - AdminRpc::BlockErrorList(el) => { - print_block_error_list(el); - } - AdminRpc::BlockInfo { - hash, - refcount, - versions, - uploads, - } => { - print_block_info(hash, refcount, versions, uploads); - } - r => { - error!("Unexpected response: {:?}", r); - } - } - Ok(()) -} - -// ---- utility ---- - -pub async fn fetch_status( - rpc_cli: &Endpoint, - rpc_host: NodeID, -) -> Result, Error> { - match rpc_cli - .call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL) - .await?? - { - SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes), - resp => Err(Error::unexpected_rpc_message(resp)), - } -} diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs deleted file mode 100644 index f053eef4..00000000 --- a/src/garage/cli/layout.rs +++ /dev/null @@ -1,584 +0,0 @@ -use bytesize::ByteSize; - -use format_table::format_table; -use garage_util::crdt::Crdt; -use garage_util::error::*; - -use garage_rpc::layout::*; -use garage_rpc::system::*; -use garage_rpc::*; - -use crate::cli::*; - -pub async fn cli_layout_command_dispatch( - cmd: LayoutOperation, - system_rpc_endpoint: &Endpoint, - rpc_host: NodeID, -) -> Result<(), Error> { - match cmd { - LayoutOperation::Assign(assign_opt) => { - cmd_assign_role(system_rpc_endpoint, rpc_host, assign_opt).await - } - LayoutOperation::Remove(remove_opt) => { - cmd_remove_role(system_rpc_endpoint, rpc_host, remove_opt).await - } - LayoutOperation::Show => cmd_show_layout(system_rpc_endpoint, rpc_host).await, - LayoutOperation::Apply(apply_opt) => { - cmd_apply_layout(system_rpc_endpoint, rpc_host, apply_opt).await - } - LayoutOperation::Revert(revert_opt) => { - cmd_revert_layout(system_rpc_endpoint, rpc_host, revert_opt).await - } - LayoutOperation::Config(config_opt) => { - cmd_config_layout(system_rpc_endpoint, rpc_host, config_opt).await - } - LayoutOperation::History => cmd_layout_history(system_rpc_endpoint, rpc_host).await, - LayoutOperation::SkipDeadNodes(assume_sync_opt) => { - cmd_layout_skip_dead_nodes(system_rpc_endpoint, rpc_host, assume_sync_opt).await - } - } -} - -pub async fn cmd_assign_role( - rpc_cli: &Endpoint, - rpc_host: NodeID, - args: AssignRoleOpt, -) -> Result<(), Error> { - let status = match rpc_cli - .call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL) - .await?? - { - SystemRpc::ReturnKnownNodes(nodes) => nodes, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - - let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - let all_nodes = layout.get_all_nodes(); - - let added_nodes = args - .node_ids - .iter() - .map(|node_id| { - find_matching_node( - status - .iter() - .map(|adv| adv.id) - .chain(all_nodes.iter().cloned()), - node_id, - ) - }) - .collect::, _>>()?; - - let mut roles = layout.current().roles.clone(); - roles.merge(&layout.staging.get().roles); - - for replaced in args.replace.iter() { - let replaced_node = find_matching_node(all_nodes.iter().cloned(), replaced)?; - match roles.get(&replaced_node) { - Some(NodeRoleV(Some(_))) => { - layout - .staging - .get_mut() - .roles - .merge(&roles.update_mutator(replaced_node, NodeRoleV(None))); - } - _ => { - return Err(Error::Message(format!( - "Cannot replace node {:?} as it is not currently in planned layout", - replaced_node - ))); - } - } - } - - if args.capacity.is_some() && args.gateway { - return Err(Error::Message( - "-c and -g are mutually exclusive, please configure node either with c>0 to act as a storage node or with -g to act as a gateway node".into())); - } - if args.capacity == Some(ByteSize::b(0)) { - return Err(Error::Message("Invalid capacity value: 0".into())); - } - - for added_node in added_nodes { - let new_entry = match roles.get(&added_node) { - Some(NodeRoleV(Some(old))) => { - let capacity = match args.capacity { - Some(c) => Some(c.as_u64()), - None if args.gateway => None, - None => old.capacity, - }; - let tags = if args.tags.is_empty() { - old.tags.clone() - } else { - args.tags.clone() - }; - NodeRole { - zone: args.zone.clone().unwrap_or_else(|| old.zone.to_string()), - capacity, - tags, - } - } - _ => { - let capacity = match args.capacity { - Some(c) => Some(c.as_u64()), - None if args.gateway => None, - None => return Err(Error::Message( - "Please specify a capacity with the -c flag, or set node explicitly as gateway with -g".into())), - }; - NodeRole { - zone: args - .zone - .clone() - .ok_or("Please specify a zone with the -z flag")?, - capacity, - tags: args.tags.clone(), - } - } - }; - - layout - .staging - .get_mut() - .roles - .merge(&roles.update_mutator(added_node, NodeRoleV(Some(new_entry)))); - } - - send_layout(rpc_cli, rpc_host, layout).await?; - - println!("Role changes are staged but not yet committed."); - println!("Use `garage layout show` to view staged role changes,"); - println!("and `garage layout apply` to enact staged changes."); - Ok(()) -} - -pub async fn cmd_remove_role( - rpc_cli: &Endpoint, - rpc_host: NodeID, - args: RemoveRoleOpt, -) -> Result<(), Error> { - let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - - let mut roles = layout.current().roles.clone(); - roles.merge(&layout.staging.get().roles); - - let deleted_node = - find_matching_node(roles.items().iter().map(|(id, _, _)| *id), &args.node_id)?; - - layout - .staging - .get_mut() - .roles - .merge(&roles.update_mutator(deleted_node, NodeRoleV(None))); - - send_layout(rpc_cli, rpc_host, layout).await?; - - println!("Role removal is staged but not yet committed."); - println!("Use `garage layout show` to view staged role changes,"); - println!("and `garage layout apply` to enact staged changes."); - Ok(()) -} - -pub async fn cmd_show_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, -) -> Result<(), Error> { - let layout = fetch_layout(rpc_cli, rpc_host).await?; - - println!("==== CURRENT CLUSTER LAYOUT ===="); - print_cluster_layout(layout.current(), "No nodes currently have a role in the cluster.\nSee `garage status` to view available nodes."); - println!(); - println!( - "Current cluster layout version: {}", - layout.current().version - ); - - let has_role_changes = print_staging_role_changes(&layout); - if has_role_changes { - let v = layout.current().version; - let res_apply = layout.apply_staged_changes(Some(v + 1)); - - // this will print the stats of what partitions - // will move around when we apply - match res_apply { - Ok((layout, msg)) => { - println!(); - println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ===="); - print_cluster_layout(layout.current(), "No nodes have a role in the new layout."); - println!(); - - for line in msg.iter() { - println!("{}", line); - } - println!("To enact the staged role changes, type:"); - println!(); - println!(" garage layout apply --version {}", v + 1); - println!(); - println!("You can also revert all proposed changes with: garage layout revert"); - } - Err(e) => { - println!("Error while trying to compute the assignment: {}", e); - println!("This new layout cannot yet be applied."); - println!("You can also revert all proposed changes with: garage layout revert"); - } - } - } - - Ok(()) -} - -pub async fn cmd_apply_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, - apply_opt: ApplyLayoutOpt, -) -> Result<(), Error> { - let layout = fetch_layout(rpc_cli, rpc_host).await?; - - let (layout, msg) = layout.apply_staged_changes(apply_opt.version)?; - for line in msg.iter() { - println!("{}", line); - } - - send_layout(rpc_cli, rpc_host, layout).await?; - - println!("New cluster layout with updated role assignment has been applied in cluster."); - println!("Data will now be moved around between nodes accordingly."); - - Ok(()) -} - -pub async fn cmd_revert_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, - revert_opt: RevertLayoutOpt, -) -> Result<(), Error> { - if !revert_opt.yes { - return Err(Error::Message( - "Please add the --yes flag to run the layout revert operation".into(), - )); - } - - let layout = fetch_layout(rpc_cli, rpc_host).await?; - - let layout = layout.revert_staged_changes()?; - - send_layout(rpc_cli, rpc_host, layout).await?; - - println!("All proposed role changes in cluster layout have been canceled."); - Ok(()) -} - -pub async fn cmd_config_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, - config_opt: ConfigLayoutOpt, -) -> Result<(), Error> { - let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - - let mut did_something = false; - match config_opt.redundancy { - None => (), - Some(r_str) => { - let r = r_str - .parse::() - .ok_or_message("invalid zone redundancy value")?; - if let ZoneRedundancy::AtLeast(r_int) = r { - if r_int > layout.current().replication_factor { - return Err(Error::Message(format!( - "The zone redundancy must be smaller or equal to the \ - replication factor ({}).", - layout.current().replication_factor - ))); - } else if r_int < 1 { - return Err(Error::Message( - "The zone redundancy must be at least 1.".into(), - )); - } - } - - layout - .staging - .get_mut() - .parameters - .update(LayoutParameters { zone_redundancy: r }); - println!("The zone redundancy parameter has been set to '{}'.", r); - did_something = true; - } - } - - if !did_something { - return Err(Error::Message( - "Please specify an action for `garage layout config`".into(), - )); - } - - send_layout(rpc_cli, rpc_host, layout).await?; - Ok(()) -} - -pub async fn cmd_layout_history( - rpc_cli: &Endpoint, - rpc_host: NodeID, -) -> Result<(), Error> { - let layout = fetch_layout(rpc_cli, rpc_host).await?; - let min_stored = layout.min_stored(); - - println!("==== LAYOUT HISTORY ===="); - let mut table = vec!["Version\tStatus\tStorage nodes\tGateway nodes".to_string()]; - for ver in layout - .versions - .iter() - .rev() - .chain(layout.old_versions.iter().rev()) - { - let status = if ver.version == layout.current().version { - "current" - } else if ver.version >= min_stored { - "draining" - } else { - "historical" - }; - table.push(format!( - "#{}\t{}\t{}\t{}", - ver.version, - status, - ver.roles - .items() - .iter() - .filter(|(_, _, x)| matches!(x, NodeRoleV(Some(c)) if c.capacity.is_some())) - .count(), - ver.roles - .items() - .iter() - .filter(|(_, _, x)| matches!(x, NodeRoleV(Some(c)) if c.capacity.is_none())) - .count(), - )); - } - format_table(table); - println!(); - - if layout.versions.len() > 1 { - println!("==== UPDATE TRACKERS ===="); - println!("Several layout versions are currently live in the cluster, and data is being migrated."); - println!( - "This is the internal data that Garage stores to know which nodes have what data." - ); - println!(); - let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()]; - let all_nodes = layout.get_all_nodes(); - for node in all_nodes.iter() { - table.push(format!( - "{:?}\t#{}\t#{}\t#{}", - node, - layout.update_trackers.ack_map.get(node, min_stored), - layout.update_trackers.sync_map.get(node, min_stored), - layout.update_trackers.sync_ack_map.get(node, min_stored), - )); - } - table[1..].sort(); - format_table(table); - - let min_ack = layout - .update_trackers - .ack_map - .min_among(&all_nodes, layout.min_stored()); - - println!(); - println!( - "If some nodes are not catching up to the latest layout version in the update trackers," - ); - println!("it might be because they are offline or unable to complete a sync successfully."); - if min_ack < layout.current().version { - println!( - "You may force progress using `garage layout skip-dead-nodes --version {}`", - layout.current().version - ); - } else { - println!( - "You may force progress using `garage layout skip-dead-nodes --version {} --allow-missing-data`.", - layout.current().version - ); - } - } else { - println!("Your cluster is currently in a stable state with a single live layout version."); - println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,"); - println!( - "so you might want to keep old nodes online until their data directories become empty." - ); - } - - Ok(()) -} - -pub async fn cmd_layout_skip_dead_nodes( - rpc_cli: &Endpoint, - rpc_host: NodeID, - opt: SkipDeadNodesOpt, -) -> Result<(), Error> { - let status = fetch_status(rpc_cli, rpc_host).await?; - let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - - if layout.versions.len() == 1 { - return Err(Error::Message( - "This command cannot be called when there is only one live cluster layout version" - .into(), - )); - } - - let min_v = layout.min_stored(); - if opt.version <= min_v || opt.version > layout.current().version { - return Err(Error::Message(format!( - "Invalid version, you may use the following version numbers: {}", - (min_v + 1..=layout.current().version) - .map(|x| x.to_string()) - .collect::>() - .join(" ") - ))); - } - - let all_nodes = layout.get_all_nodes(); - let mut did_something = false; - for node in all_nodes.iter() { - // Update ACK tracker for dead nodes or for all nodes if --allow-missing-data - if opt.allow_missing_data || !status.iter().any(|x| x.id == *node && x.is_up) { - if layout.update_trackers.ack_map.set_max(*node, opt.version) { - println!("Increased the ACK tracker for node {:?}", node); - did_something = true; - } - } - - // If --allow-missing-data, update SYNC tracker for all nodes. - if opt.allow_missing_data { - if layout.update_trackers.sync_map.set_max(*node, opt.version) { - println!("Increased the SYNC tracker for node {:?}", node); - did_something = true; - } - } - } - - if did_something { - send_layout(rpc_cli, rpc_host, layout).await?; - println!("Success."); - Ok(()) - } else if !opt.allow_missing_data { - Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into())) - } else { - Err(Error::Message( - "Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(), - )) - } -} - -// --- utility --- - -pub async fn fetch_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, -) -> Result { - match rpc_cli - .call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL) - .await?? - { - SystemRpc::AdvertiseClusterLayout(t) => Ok(t), - resp => Err(Error::unexpected_rpc_message(resp)), - } -} - -pub async fn send_layout( - rpc_cli: &Endpoint, - rpc_host: NodeID, - layout: LayoutHistory, -) -> Result<(), Error> { - rpc_cli - .call( - &rpc_host, - SystemRpc::AdvertiseClusterLayout(layout), - PRIO_NORMAL, - ) - .await??; - Ok(()) -} - -pub fn print_cluster_layout(layout: &LayoutVersion, empty_msg: &str) { - let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable capacity".to_string()]; - for (id, _, role) in layout.roles.items().iter() { - let role = match &role.0 { - Some(r) => r, - _ => continue, - }; - let tags = role.tags.join(","); - let usage = layout.get_node_usage(id).unwrap_or(0); - let capacity = layout.get_node_capacity(id).unwrap_or(0); - if capacity > 0 { - table.push(format!( - "{:?}\t{}\t{}\t{}\t{} ({:.1}%)", - id, - tags, - role.zone, - role.capacity_string(), - ByteSize::b(usage as u64 * layout.partition_size).to_string_as(false), - (100.0 * usage as f32 * layout.partition_size as f32) / (capacity as f32) - )); - } else { - table.push(format!( - "{:?}\t{}\t{}\t{}", - id, - tags, - role.zone, - role.capacity_string() - )); - }; - } - if table.len() > 1 { - format_table(table); - println!(); - println!("Zone redundancy: {}", layout.parameters.zone_redundancy); - } else { - println!("{}", empty_msg); - } -} - -pub fn print_staging_role_changes(layout: &LayoutHistory) -> bool { - let staging = layout.staging.get(); - let has_role_changes = staging - .roles - .items() - .iter() - .any(|(k, _, v)| layout.current().roles.get(k) != Some(v)); - let has_layout_changes = *staging.parameters.get() != layout.current().parameters; - - if has_role_changes || has_layout_changes { - println!(); - println!("==== STAGED ROLE CHANGES ===="); - if has_role_changes { - let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; - for (id, _, role) in staging.roles.items().iter() { - if layout.current().roles.get(id) == Some(role) { - continue; - } - if let Some(role) = &role.0 { - let tags = role.tags.join(","); - table.push(format!( - "{:?}\t{}\t{}\t{}", - id, - tags, - role.zone, - role.capacity_string() - )); - } else { - table.push(format!("{:?}\tREMOVED", id)); - } - } - format_table(table); - println!(); - } - if has_layout_changes { - println!( - "Zone redundancy: {}", - staging.parameters.get().zone_redundancy - ); - } - true - } else { - false - } -} diff --git a/src/garage/cli/convert_db.rs b/src/garage/cli/local/convert_db.rs similarity index 94% rename from src/garage/cli/convert_db.rs rename to src/garage/cli/local/convert_db.rs index a40fb61f..6ac34ee0 100644 --- a/src/garage/cli/convert_db.rs +++ b/src/garage/cli/local/convert_db.rs @@ -8,7 +8,7 @@ use garage_db::*; #[derive(StructOpt, Debug)] pub struct ConvertDbOpt { /// Input database path (not the same as metadata_dir, see - /// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0) + /// #[structopt(short = "i")] input_path: PathBuf, /// Input database engine (lmdb or sqlite; limited by db engines diff --git a/src/garage/cli/init.rs b/src/garage/cli/local/init.rs similarity index 86% rename from src/garage/cli/init.rs rename to src/garage/cli/local/init.rs index 43ca5c09..683930ca 100644 --- a/src/garage/cli/init.rs +++ b/src/garage/cli/local/init.rs @@ -36,16 +36,6 @@ pub fn node_id_command(config_file: PathBuf, quiet: bool) -> Result<(), Error> { ); eprintln!(" garage [-c ] node connect {}", idstr); eprintln!(); - eprintln!("Or instruct them to connect from here by running:"); - eprintln!( - " garage -c {} -h node connect {}", - config_file.to_string_lossy(), - idstr - ); - eprintln!( - "where is their own node identifier in the format: @:" - ); - eprintln!(); eprintln!("This node identifier can also be added as a bootstrap node in other node's garage.toml files:"); eprintln!(" bootstrap_peers = ["); eprintln!(" \"{}\",", idstr); diff --git a/src/garage/cli/local/mod.rs b/src/garage/cli/local/mod.rs new file mode 100644 index 00000000..476010b8 --- /dev/null +++ b/src/garage/cli/local/mod.rs @@ -0,0 +1,3 @@ +pub(crate) mod convert_db; +pub(crate) mod init; +pub(crate) mod repair; diff --git a/src/garage/repair/offline.rs b/src/garage/cli/local/repair.rs similarity index 100% rename from src/garage/repair/offline.rs rename to src/garage/cli/local/repair.rs diff --git a/src/garage/cli/mod.rs b/src/garage/cli/mod.rs index e131f62c..60e9a5de 100644 --- a/src/garage/cli/mod.rs +++ b/src/garage/cli/mod.rs @@ -1,13 +1,4 @@ -pub(crate) mod cmd; -pub(crate) mod init; -pub(crate) mod layout; -pub(crate) mod structs; -pub(crate) mod util; +pub mod structs; -pub(crate) mod convert_db; - -pub(crate) use cmd::*; -pub(crate) use init::*; -pub(crate) use layout::*; -pub(crate) use structs::*; -pub(crate) use util::*; +pub mod local; +pub mod remote; diff --git a/src/garage/cli/remote/admin_token.rs b/src/garage/cli/remote/admin_token.rs new file mode 100644 index 00000000..5a0b0595 --- /dev/null +++ b/src/garage/cli/remote/admin_token.rs @@ -0,0 +1,254 @@ +use format_table::format_table; + +use chrono::Local; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_admin_token(&self, cmd: AdminTokenOperation) -> Result<(), Error> { + match cmd { + AdminTokenOperation::List => self.cmd_list_admin_tokens().await, + AdminTokenOperation::Info { api_token } => self.cmd_admin_token_info(api_token).await, + AdminTokenOperation::Create(opt) => self.cmd_create_admin_token(opt).await, + AdminTokenOperation::Rename { + api_token, + new_name, + } => self.cmd_rename_admin_token(api_token, new_name).await, + AdminTokenOperation::Set(opt) => self.cmd_update_admin_token(opt).await, + AdminTokenOperation::Delete { api_token, yes } => { + self.cmd_delete_admin_token(api_token, yes).await + } + AdminTokenOperation::DeleteExpired { yes } => { + self.cmd_delete_expired_admin_tokens(yes).await + } + } + } + + pub async fn cmd_list_admin_tokens(&self) -> Result<(), Error> { + let mut list = self.api_request(ListAdminTokensRequest).await?; + + list.0.sort_by_key(|x| x.created); + + let mut table = vec!["ID\tCreated\tName\tExpiration\tScope".to_string()]; + for tok in list.0.iter() { + let scope = if tok.expired { + String::new() + } else { + table_list_abbr(&tok.scope) + }; + let exp = if tok.expired { + "expired".to_string() + } else { + tok.expiration + .map(|x| x.with_timezone(&Local).to_string()) + .unwrap_or("never".into()) + }; + table.push(format!( + "{}\t{}\t{}\t{}\t{}", + tok.id.as_deref().unwrap_or("-"), + tok.created + .map(|x| x.with_timezone(&Local).date_naive().to_string()) + .unwrap_or("-".into()), + tok.name, + exp, + scope, + )); + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_admin_token_info(&self, search: String) -> Result<(), Error> { + let info = self + .api_request(GetAdminTokenInfoRequest { + id: None, + search: Some(search), + }) + .await?; + + print_token_info(&info); + + Ok(()) + } + + pub async fn cmd_create_admin_token(&self, opt: AdminTokenCreateOp) -> Result<(), Error> { + let res = self + .api_request(CreateAdminTokenRequest(UpdateAdminTokenRequestBody { + name: opt.name, + expiration: parse_expires_in(&opt.expires_in)?, + never_expires: false, + scope: opt.scope.map(|s| { + s.split(",") + .map(|x| x.trim().to_string()) + .collect::>() + }), + })) + .await?; + + if opt.quiet { + println!("{}", res.secret_token); + } else { + println!("This is your secret bearer token, it will not be shown again by Garage:"); + println!("\n {}\n", res.secret_token); + print_token_info(&res.info); + } + + Ok(()) + } + + pub async fn cmd_rename_admin_token(&self, old: String, new: String) -> Result<(), Error> { + let token = self + .api_request(GetAdminTokenInfoRequest { + id: None, + search: Some(old), + }) + .await?; + + let info = self + .api_request(UpdateAdminTokenRequest { + id: token.id.unwrap(), + body: UpdateAdminTokenRequestBody { + name: Some(new), + expiration: None, + never_expires: false, + scope: None, + }, + }) + .await?; + + print_token_info(&info.0); + + Ok(()) + } + + pub async fn cmd_update_admin_token(&self, opt: AdminTokenSetOp) -> Result<(), Error> { + let token = self + .api_request(GetAdminTokenInfoRequest { + id: None, + search: Some(opt.api_token), + }) + .await?; + + let info = self + .api_request(UpdateAdminTokenRequest { + id: token.id.unwrap(), + body: UpdateAdminTokenRequestBody { + name: None, + expiration: parse_expires_in(&opt.expires_in)?, + never_expires: opt.never_expires, + scope: opt.scope.map({ + let mut new_scope = token.scope; + |scope_str| { + if let Some(add) = scope_str.strip_prefix("+") { + for a in add.split(",").map(|x| x.trim().to_string()) { + if !new_scope.contains(&a) { + new_scope.push(a); + } + } + new_scope + } else if let Some(sub) = scope_str.strip_prefix("-") { + for r in sub.split(",").map(|x| x.trim()) { + new_scope.retain(|x| x != r); + } + new_scope + } else { + scope_str + .split(",") + .map(|x| x.trim().to_string()) + .collect::>() + } + } + }), + }, + }) + .await?; + + print_token_info(&info.0); + + Ok(()) + } + + pub async fn cmd_delete_admin_token(&self, token: String, yes: bool) -> Result<(), Error> { + let token = self + .api_request(GetAdminTokenInfoRequest { + id: None, + search: Some(token), + }) + .await?; + + let id = token.id.unwrap(); + + if !yes { + return Err(Error::Message(format!( + "Add the --yes flag to delete API token `{}` ({})", + token.name, id + ))); + } + + self.api_request(DeleteAdminTokenRequest { id }).await?; + + println!("Admin API token has been deleted."); + + Ok(()) + } + + pub async fn cmd_delete_expired_admin_tokens(&self, yes: bool) -> Result<(), Error> { + let mut list = self.api_request(ListAdminTokensRequest).await?.0; + + list.retain(|tok| tok.expired); + + if !yes { + return Err(Error::Message(format!( + "This would delete {} admin API tokens, add the --yes flag to proceed.", + list.len(), + ))); + } + + for token in list.iter() { + let id = token.id.clone().unwrap(); + println!("Deleting token `{}` ({})", token.name, id); + self.api_request(DeleteAdminTokenRequest { id }).await?; + } + + println!("{} admin API tokens have been deleted.", list.len()); + + Ok(()) + } +} + +fn print_token_info(token: &GetAdminTokenInfoResponse) { + println!("==== ADMINISTRATION TOKEN INFORMATION ===="); + let mut table = vec![ + format!("Token ID:\t{}", token.id.as_ref().unwrap()), + format!("Token name:\t{}", token.name), + format!("Created:\t{}", token.created.unwrap().with_timezone(&Local)), + format!( + "Validity:\t{}", + if token.expired { "EXPIRED" } else { "valid" } + ), + format!( + "Expiration:\t{}", + token + .expiration + .map(|x| x.with_timezone(&Local).to_string()) + .unwrap_or("never".into()) + ), + String::new(), + ]; + + for (i, scope) in token.scope.iter().enumerate() { + if i == 0 { + table.push(format!("Scope:\t{}", scope)); + } else { + table.push(format!("\t{}", scope)); + } + } + + format_table(table); +} diff --git a/src/garage/cli/remote/block.rs b/src/garage/cli/remote/block.rs new file mode 100644 index 00000000..613a1a16 --- /dev/null +++ b/src/garage/cli/remote/block.rs @@ -0,0 +1,174 @@ +//use bytesize::ByteSize; +use format_table::format_table; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_block(&self, cmd: BlockOperation) -> Result<(), Error> { + match cmd { + BlockOperation::ListErrors => self.cmd_list_block_errors().await, + BlockOperation::Info { hash } => self.cmd_get_block_info(hash).await, + BlockOperation::RetryNow { all, blocks } => self.cmd_block_retry_now(all, blocks).await, + BlockOperation::Purge { yes, blocks } => self.cmd_block_purge(yes, blocks).await, + } + } + + pub async fn cmd_list_block_errors(&self) -> Result<(), Error> { + let errors = self.local_api_request(LocalListBlockErrorsRequest).await?.0; + + let tf = timeago::Formatter::new(); + let mut tf2 = timeago::Formatter::new(); + tf2.ago(""); + + let mut table = vec!["Hash\tRC\tErrors\tLast error\tNext try".into()]; + for e in errors { + let next_try = if e.next_try_in_secs > 0 { + tf2.convert(Duration::from_secs(e.next_try_in_secs)) + } else { + "asap".to_string() + }; + table.push(format!( + "{}\t{}\t{}\t{}\tin {}", + e.block_hash, + e.refcount, + e.error_count, + tf.convert(Duration::from_secs(e.last_try_secs_ago)), + next_try + )); + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_get_block_info(&self, hash: String) -> Result<(), Error> { + let info = self + .local_api_request(LocalGetBlockInfoRequest { block_hash: hash }) + .await?; + + println!("==== BLOCK INFORMATION ===="); + format_table(vec![ + format!("Block hash:\t{}", info.block_hash), + format!("Refcount:\t{}", info.refcount), + ]); + println!(); + + println!("==== REFERENCES TO THIS BLOCK ===="); + let mut table = vec!["Status\tVersion\tBucket\tKey\tMPU".into()]; + let mut nondeleted_count = 0; + let mut inconsistent_refs = false; + for ver in info.versions.iter() { + match &ver.backlink { + Some(BlockVersionBacklink::Object { bucket_id, key }) => { + table.push(format!( + "{}\t{:.16}{}\t{:.16}\t{}", + if ver.ref_deleted { "deleted" } else { "active" }, + ver.version_id, + deleted_to_str(ver.version_deleted), + bucket_id, + key + )); + } + Some(BlockVersionBacklink::Upload { + upload_id, + upload_deleted, + upload_garbage_collected: _, + bucket_id, + key, + }) => { + table.push(format!( + "{}\t{:.16}{}\t{:.16}\t{}\t{:.16}{}", + if ver.ref_deleted { "deleted" } else { "active" }, + ver.version_id, + deleted_to_str(ver.version_deleted), + bucket_id.as_deref().unwrap_or(""), + key.as_deref().unwrap_or(""), + upload_id, + deleted_to_str(*upload_deleted), + )); + } + None => { + table.push(format!("{:.16}\t\t\tyes", ver.version_id)); + } + } + if ver.ref_deleted != ver.version_deleted { + inconsistent_refs = true; + } + if !ver.ref_deleted { + nondeleted_count += 1; + } + } + format_table(table); + + if inconsistent_refs { + println!(); + println!("There are inconsistencies between the block_ref and the version tables."); + println!("Fix them by running `garage repair block-refs`"); + } + + if info.refcount != nondeleted_count { + println!(); + println!( + "Warning: refcount does not match number of non-deleted versions, you should try `garage repair block-rc`." + ); + } + + Ok(()) + } + + pub async fn cmd_block_retry_now(&self, all: bool, blocks: Vec) -> Result<(), Error> { + let req = match (all, blocks.len()) { + (true, 0) => LocalRetryBlockResyncRequest::All { all: true }, + (false, n) if n > 0 => LocalRetryBlockResyncRequest::Blocks { + block_hashes: blocks, + }, + _ => { + return Err(Error::Message( + "Please specify block hashes or --all (not both)".into(), + )) + } + }; + + let res = self.local_api_request(req).await?; + + println!( + "{} blocks returned in queue for a retry now (check logs to see results)", + res.count + ); + + Ok(()) + } + + pub async fn cmd_block_purge(&self, yes: bool, blocks: Vec) -> Result<(), Error> { + if !yes { + return Err(Error::Message( + "Pass the --yes flag to confirm block purge operation.".into(), + )); + } + + let res = self + .local_api_request(LocalPurgeBlocksRequest(blocks)) + .await?; + + println!( + "Purged {} blocks: deleted {} block refs, {} versions, {} objects, {} multipart uploads", + res.blocks_purged, res.block_refs_purged, res.versions_deleted, res.objects_deleted, res.uploads_deleted, + ); + + Ok(()) + } +} + +#[must_use] +const fn deleted_to_str(deleted: bool) -> &'static str { + if deleted { + " (deleted)" + } else { + "" + } +} diff --git a/src/garage/cli/remote/bucket.rs b/src/garage/cli/remote/bucket.rs new file mode 100644 index 00000000..cd0ac87f --- /dev/null +++ b/src/garage/cli/remote/bucket.rs @@ -0,0 +1,583 @@ +//use bytesize::ByteSize; +use format_table::format_table; + +use chrono::Local; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_bucket(&self, cmd: BucketOperation) -> Result<(), Error> { + match cmd { + BucketOperation::List => self.cmd_list_buckets().await, + BucketOperation::Info(query) => self.cmd_bucket_info(query).await, + BucketOperation::Create(query) => self.cmd_create_bucket(query).await, + BucketOperation::Delete(query) => self.cmd_delete_bucket(query).await, + BucketOperation::Alias(query) => self.cmd_alias_bucket(query).await, + BucketOperation::Unalias(query) => self.cmd_unalias_bucket(query).await, + BucketOperation::Allow(query) => self.cmd_bucket_allow(query).await, + BucketOperation::Deny(query) => self.cmd_bucket_deny(query).await, + BucketOperation::Website(query) => self.cmd_bucket_website(query).await, + BucketOperation::SetQuotas(query) => self.cmd_bucket_set_quotas(query).await, + BucketOperation::CleanupIncompleteUploads(query) => { + self.cmd_cleanup_incomplete_uploads(query).await + } + BucketOperation::InspectObject(query) => self.cmd_inspect_object(query).await, + } + } + + pub async fn cmd_list_buckets(&self) -> Result<(), Error> { + let mut buckets = self.api_request(ListBucketsRequest).await?; + + buckets.0.sort_by_key(|x| x.created); + + let mut table = vec!["ID\tCreated\tGlobal aliases\tLocal aliases".to_string()]; + for bucket in buckets.0.iter() { + table.push(format!( + "{:.16}\t{}\t{}\t{}", + bucket.id, + bucket.created.with_timezone(&Local).date_naive(), + table_list_abbr(&bucket.global_aliases), + table_list_abbr( + bucket + .local_aliases + .iter() + .map(|x| format!("{}:{}", x.access_key_id, x.alias)) + ), + )); + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_bucket_info(&self, opt: BucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.name), + }) + .await?; + + print_bucket_info(&bucket); + + Ok(()) + } + + pub async fn cmd_create_bucket(&self, opt: BucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(CreateBucketRequest { + global_alias: Some(opt.name.clone()), + local_alias: None, + }) + .await?; + + print_bucket_info(&bucket.0); + + Ok(()) + } + + pub async fn cmd_delete_bucket(&self, opt: DeleteBucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.name.clone()), + }) + .await?; + + // CLI-only checks: the bucket must not have other aliases + if bucket.global_aliases.iter().any(|a| *a != opt.name) { + return Err(Error::Message(format!("Bucket {} still has other global aliases. Use `bucket unalias` to delete them one by one.", opt.name))); + } + + if bucket + .keys + .iter() + .any(|k| !k.bucket_local_aliases.is_empty()) + { + return Err(Error::Message(format!("Bucket {} still has other local aliases. Use `bucket unalias` to delete them one by one.", opt.name))); + } + + if !opt.yes { + println!("About to delete bucket {}.", bucket.id); + return Err(Error::Message( + "Add --yes flag to really perform this operation".to_string(), + )); + } + + self.api_request(DeleteBucketRequest { + id: bucket.id.clone(), + }) + .await?; + + println!("Bucket {} has been deleted.", bucket.id); + + Ok(()) + } + + pub async fn cmd_alias_bucket(&self, opt: AliasBucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.existing_bucket.clone()), + }) + .await?; + + let res = if let Some(key_pat) = &opt.local { + let key = self + .api_request(GetKeyInfoRequest { + search: Some(key_pat.clone()), + id: None, + show_secret_key: false, + }) + .await?; + + self.api_request(AddBucketAliasRequest { + bucket_id: bucket.id.clone(), + alias: BucketAliasEnum::Local { + local_alias: opt.new_name.clone(), + access_key_id: key.access_key_id.clone(), + }, + }) + .await? + } else { + self.api_request(AddBucketAliasRequest { + bucket_id: bucket.id.clone(), + alias: BucketAliasEnum::Global { + global_alias: opt.new_name.clone(), + }, + }) + .await? + }; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_unalias_bucket(&self, opt: UnaliasBucketOpt) -> Result<(), Error> { + let res = if let Some(key_pat) = &opt.local { + let key = self + .api_request(GetKeyInfoRequest { + search: Some(key_pat.clone()), + id: None, + show_secret_key: false, + }) + .await?; + + let bucket = key + .buckets + .iter() + .find(|x| x.local_aliases.contains(&opt.name)) + .ok_or_message(format!( + "No bucket called {} in namespace of key {}", + opt.name, key.access_key_id + ))?; + + self.api_request(RemoveBucketAliasRequest { + bucket_id: bucket.id.clone(), + alias: BucketAliasEnum::Local { + access_key_id: key.access_key_id.clone(), + local_alias: opt.name.clone(), + }, + }) + .await? + } else { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: Some(opt.name.clone()), + search: None, + }) + .await?; + + self.api_request(RemoveBucketAliasRequest { + bucket_id: bucket.id.clone(), + alias: BucketAliasEnum::Global { + global_alias: opt.name.clone(), + }, + }) + .await? + }; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_bucket_allow(&self, opt: PermBucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.bucket.clone()), + }) + .await?; + + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern.clone()), + show_secret_key: false, + }) + .await?; + + let res = self + .api_request(AllowBucketKeyRequest(BucketKeyPermChangeRequest { + bucket_id: bucket.id.clone(), + access_key_id: key.access_key_id.clone(), + permissions: ApiBucketKeyPerm { + read: opt.read, + write: opt.write, + owner: opt.owner, + }, + })) + .await?; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_bucket_deny(&self, opt: PermBucketOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.bucket.clone()), + }) + .await?; + + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern.clone()), + show_secret_key: false, + }) + .await?; + + let res = self + .api_request(DenyBucketKeyRequest(BucketKeyPermChangeRequest { + bucket_id: bucket.id.clone(), + access_key_id: key.access_key_id.clone(), + permissions: ApiBucketKeyPerm { + read: opt.read, + write: opt.write, + owner: opt.owner, + }, + })) + .await?; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_bucket_website(&self, opt: WebsiteOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.bucket.clone()), + }) + .await?; + + if !(opt.allow ^ opt.deny) { + return Err(Error::Message( + "You must specify exactly one flag, either --allow or --deny".to_string(), + )); + } + + let wa = if opt.allow { + UpdateBucketWebsiteAccess { + enabled: true, + index_document: Some(opt.index_document.clone()), + error_document: opt + .error_document + .or(bucket.website_config.and_then(|x| x.error_document.clone())), + } + } else { + UpdateBucketWebsiteAccess { + enabled: false, + index_document: None, + error_document: None, + } + }; + + let res = self + .api_request(UpdateBucketRequest { + id: bucket.id, + body: UpdateBucketRequestBody { + website_access: Some(wa), + quotas: None, + }, + }) + .await?; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_bucket_set_quotas(&self, opt: SetQuotasOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.bucket.clone()), + }) + .await?; + + if opt.max_size.is_none() && opt.max_objects.is_none() { + return Err(Error::Message( + "You must specify either --max-size or --max-objects (or both) for this command to do something.".to_string(), + )); + } + + let new_quotas = ApiBucketQuotas { + max_size: match opt.max_size.as_deref() { + Some("none") => None, + Some(v) => Some( + v.parse::() + .ok_or_message(format!("Invalid size specified: {}", v))? + .as_u64(), + ), + None => bucket.quotas.max_size, + }, + max_objects: match opt.max_objects.as_deref() { + Some("none") => None, + Some(v) => Some( + v.parse::() + .ok_or_message(format!("Invalid number: {}", v))?, + ), + None => bucket.quotas.max_objects, + }, + }; + + let res = self + .api_request(UpdateBucketRequest { + id: bucket.id.clone(), + body: UpdateBucketRequestBody { + website_access: None, + quotas: Some(new_quotas), + }, + }) + .await?; + + print_bucket_info(&res.0); + + Ok(()) + } + + pub async fn cmd_cleanup_incomplete_uploads( + &self, + opt: CleanupIncompleteUploadsOpt, + ) -> Result<(), Error> { + let older_than = parse_duration::parse::parse(&opt.older_than) + .ok_or_message("Invalid duration passed for --older-than parameter")?; + + for b in opt.buckets.iter() { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(b.clone()), + }) + .await?; + + let res = self + .api_request(CleanupIncompleteUploadsRequest { + bucket_id: bucket.id.clone(), + older_than_secs: older_than.as_secs(), + }) + .await?; + + if res.uploads_deleted > 0 { + println!("{:.16}: {} uploads deleted", bucket.id, res.uploads_deleted); + } else { + println!("{:.16}: no uploads deleted", bucket.id); + } + } + + Ok(()) + } + + pub async fn cmd_inspect_object(&self, opt: InspectObjectOpt) -> Result<(), Error> { + let bucket = self + .api_request(GetBucketInfoRequest { + id: None, + global_alias: None, + search: Some(opt.bucket), + }) + .await?; + + let info = self + .api_request(InspectObjectRequest { + bucket_id: bucket.id, + key: opt.key, + }) + .await?; + + for ver in info.versions { + println!("==== OBJECT VERSION ===="); + let mut tab = vec![ + format!("Bucket ID:\t{}", info.bucket_id), + format!("Key:\t{}", info.key), + format!("Version ID:\t{}", ver.uuid), + format!("Timestamp:\t{}", ver.timestamp), + ]; + if let Some(size) = ver.size { + let bs = bytesize::ByteSize::b(size); + tab.push(format!( + "Size:\t{} ({})", + bs.to_string_as(true), + bs.to_string_as(false) + )); + tab.push(format!("Size (exact):\t{}", size)); + if !ver.blocks.is_empty() { + tab.push(format!("Number of blocks:\t{:?}", ver.blocks.len())); + } + } + if let Some(etag) = ver.etag { + tab.push(format!("Etag:\t{}", etag)); + } + tab.extend([ + format!("Encrypted:\t{}", ver.encrypted), + format!("Uploading:\t{}", ver.uploading), + format!("Aborted:\t{}", ver.aborted), + format!("Delete marker:\t{}", ver.delete_marker), + format!("Inline data:\t{}", ver.inline), + ]); + if !ver.headers.is_empty() { + tab.push(String::new()); + tab.extend(ver.headers.iter().map(|(k, v)| format!("{}\t{}", k, v))); + } + format_table(tab); + + if !ver.blocks.is_empty() { + let mut tab = vec!["Part#\tOffset\tBlock hash\tSize".to_string()]; + tab.extend(ver.blocks.iter().map(|b| { + format!( + "{:4}\t{:9}\t{}\t{:9}", + b.part_number, b.offset, b.hash, b.size + ) + })); + println!(); + format_table(tab); + } + println!(); + } + + Ok(()) + } +} + +fn print_bucket_info(bucket: &GetBucketInfoResponse) { + println!("==== BUCKET INFORMATION ===="); + + let mut info = vec![ + format!("Bucket:\t{}", bucket.id), + format!("Created:\t{}", bucket.created.with_timezone(&Local)), + String::new(), + { + let size = bytesize::ByteSize::b(bucket.bytes as u64); + format!( + "Size:\t{} ({})", + size.to_string_as(true), + size.to_string_as(false) + ) + }, + format!("Objects:\t{}", bucket.objects), + ]; + + if bucket.unfinished_uploads > 0 { + info.extend([ + format!( + "Unfinished uploads:\t{} multipart uploads", + bucket.unfinished_multipart_uploads + ), + format!("\t{} including regular uploads", bucket.unfinished_uploads), + { + let mpu_size = + bytesize::ByteSize::b(bucket.unfinished_multipart_upload_bytes as u64); + format!( + "Size of unfinished multipart uploads:\t{} ({})", + mpu_size.to_string_as(true), + mpu_size.to_string_as(false), + ) + }, + ]); + } + + info.extend([ + String::new(), + format!("Website access:\t{}", bucket.website_access), + ]); + + if let Some(wc) = &bucket.website_config { + info.extend([ + format!(" index document:\t{}", wc.index_document), + format!( + " error document:\t{}", + wc.error_document.as_deref().unwrap_or("(not defined)") + ), + ]); + } + + if bucket.quotas.max_size.is_some() || bucket.quotas.max_objects.is_some() { + info.push(String::new()); + info.push("Quotas:\tenabled".into()); + if let Some(ms) = bucket.quotas.max_size { + let ms = bytesize::ByteSize::b(ms); + info.push(format!( + " maximum size:\t{} ({})", + ms.to_string_as(true), + ms.to_string_as(false) + )); + } + if let Some(mo) = bucket.quotas.max_objects { + info.push(format!(" maximum number of objects:\t{}", mo)); + } + } + + if !bucket.global_aliases.is_empty() { + info.push(String::new()); + for (i, alias) in bucket.global_aliases.iter().enumerate() { + if i == 0 && bucket.global_aliases.len() > 1 { + info.push(format!("Global aliases:\t{}", alias)); + } else if i == 0 { + info.push(format!("Global alias:\t{}", alias)); + } else { + info.push(format!("\t{}", alias)); + } + } + } + + format_table(info); + + println!(); + println!("==== KEYS FOR THIS BUCKET ===="); + let mut key_info = vec!["Permissions\tAccess key\t\tLocal aliases".to_string()]; + key_info.extend(bucket.keys.iter().map(|key| { + let rflag = if key.permissions.read { "R" } else { " " }; + let wflag = if key.permissions.write { "W" } else { " " }; + let oflag = if key.permissions.owner { "O" } else { " " }; + format!( + "{}{}{}\t{}\t{}\t{}", + rflag, + wflag, + oflag, + key.access_key_id, + key.name, + key.bucket_local_aliases.to_vec().join(","), + ) + })); + format_table(key_info); +} diff --git a/src/garage/cli/remote/cluster.rs b/src/garage/cli/remote/cluster.rs new file mode 100644 index 00000000..284e3690 --- /dev/null +++ b/src/garage/cli/remote/cluster.rs @@ -0,0 +1,160 @@ +use format_table::format_table; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::layout::*; +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_status(&self) -> Result<(), Error> { + let status = self.api_request(GetClusterStatusRequest).await?; + let layout = self.api_request(GetClusterLayoutRequest).await?; + + println!("==== HEALTHY NODES ===="); + + let mut healthy_nodes = + vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail\tVersion".to_string()]; + + for adv in status.nodes.iter().filter(|adv| adv.is_up) { + let host = adv.hostname.as_deref().unwrap_or("?"); + let addr = match adv.addr { + Some(addr) => addr.to_string(), + None => "N/A".to_string(), + }; + if let Some(cfg) = &adv.role { + let data_avail = match &adv.data_partition { + _ if cfg.capacity.is_none() => "N/A".into(), + Some(FreeSpaceResp { available, total }) => { + let pct = (*available as f64) / (*total as f64) * 100.; + let avail_str = bytesize::ByteSize::b(*available); + format!("{} ({:.1}%)", avail_str, pct) + } + None => "?".into(), + }; + healthy_nodes.push(format!( + "{id:.16}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}\t{version}", + id = adv.id, + host = host, + addr = addr, + tags = cfg.tags.join(","), + zone = cfg.zone, + capacity = capacity_string(cfg.capacity), + data_avail = data_avail, + version = adv.garage_version.as_deref().unwrap_or_default(), + )); + } else { + let status = match layout.staged_role_changes.iter().find(|x| x.id == adv.id) { + Some(NodeRoleChange { + action: NodeRoleChangeEnum::Update { .. }, + .. + }) => "pending...", + _ if adv.draining => "draining metadata..", + _ => "NO ROLE ASSIGNED", + }; + healthy_nodes.push(format!( + "{id:.16}\t{h}\t{addr}\t\t\t{status}\t\t{version}", + id = adv.id, + h = host, + addr = addr, + status = status, + version = adv.garage_version.as_deref().unwrap_or_default(), + )); + } + } + format_table(healthy_nodes); + + let tf = timeago::Formatter::new(); + let mut drain_msg = false; + let mut failed_nodes = vec!["ID\tHostname\tTags\tZone\tCapacity\tLast seen".to_string()]; + for adv in status.nodes.iter().filter(|x| !x.is_up) { + let node = &adv.id; + + let host = adv.hostname.as_deref().unwrap_or("?"); + let last_seen = adv + .last_seen_secs_ago + .map(|s| tf.convert(Duration::from_secs(s))) + .unwrap_or_else(|| "never seen".into()); + + if let Some(cfg) = &adv.role { + let capacity = capacity_string(cfg.capacity); + + failed_nodes.push(format!( + "{id:.16}\t{host}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", + id = node, + host = host, + tags = cfg.tags.join(","), + zone = cfg.zone, + capacity = capacity, + last_seen = last_seen, + )); + } else { + let status = match layout.staged_role_changes.iter().find(|x| x.id == adv.id) { + Some(NodeRoleChange { + action: NodeRoleChangeEnum::Update { .. }, + .. + }) => "pending...", + _ if adv.draining => { + drain_msg = true; + "draining metadata.." + } + _ => continue, + }; + + failed_nodes.push(format!( + "{id:.16}\t{host}\t\t\t{status}\t{last_seen}", + id = node, + host = host, + status = status, + last_seen = last_seen, + )); + } + } + + if failed_nodes.len() > 1 { + println!("\n==== FAILED NODES ===="); + format_table(failed_nodes); + if drain_msg { + println!(); + println!("Your cluster is expecting to drain data from nodes that are currently unavailable."); + println!( + "If these nodes are definitely dead, please review the layout history with" + ); + println!( + "`garage layout history` and use `garage layout skip-dead-nodes` to force progress." + ); + } + } + + if print_staging_role_changes(&layout) { + println!(); + println!( + "Please use `garage layout show` to check the proposed new layout and apply it." + ); + println!(); + } + + Ok(()) + } + + pub async fn cmd_connect(&self, opt: ConnectNodeOpt) -> Result<(), Error> { + let res = self + .api_request(ConnectClusterNodesRequest(vec![opt.node])) + .await?; + if res.0.len() != 1 { + return Err(Error::Message(format!("unexpected response: {:?}", res))); + } + let res = res.0.into_iter().next().unwrap(); + if res.success { + println!("Success."); + Ok(()) + } else { + Err(Error::Message(format!( + "Failure: {}", + res.error.unwrap_or_default() + ))) + } + } +} diff --git a/src/garage/cli/remote/key.rs b/src/garage/cli/remote/key.rs new file mode 100644 index 00000000..657607ea --- /dev/null +++ b/src/garage/cli/remote/key.rs @@ -0,0 +1,318 @@ +use format_table::format_table; + +use chrono::Local; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_key(&self, cmd: KeyOperation) -> Result<(), Error> { + match cmd { + KeyOperation::List => self.cmd_list_keys().await, + KeyOperation::Info(query) => self.cmd_key_info(query).await, + KeyOperation::Create(query) => self.cmd_create_key(query).await, + KeyOperation::Rename(query) => self.cmd_rename_key(query).await, + KeyOperation::Set(opt) => self.cmd_update_key(opt).await, + KeyOperation::Delete(query) => self.cmd_delete_key(query).await, + KeyOperation::Allow(query) => self.cmd_allow_key(query).await, + KeyOperation::Deny(query) => self.cmd_deny_key(query).await, + KeyOperation::Import(query) => self.cmd_import_key(query).await, + KeyOperation::DeleteExpired { yes } => self.cmd_delete_expired_keys(yes).await, + } + } + + pub async fn cmd_list_keys(&self) -> Result<(), Error> { + let mut keys = self.api_request(ListKeysRequest).await?; + + keys.0.sort_by_key(|x| x.created); + + let mut table = vec!["ID\tCreated\tName\tExpiration".to_string()]; + for key in keys.0.iter() { + let exp = if key.expired { + "expired".to_string() + } else { + key.expiration + .map(|x| x.with_timezone(&Local).to_string()) + .unwrap_or("never".into()) + }; + table.push(format!( + "{}\t{}\t{}\t{}", + key.id, + key.created + .map(|x| x.with_timezone(&Local).date_naive().to_string()) + .unwrap_or_default(), + key.name, + exp + )); + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_key_info(&self, opt: KeyInfoOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: opt.show_secret, + }) + .await?; + + print_key_info(&key); + + Ok(()) + } + + pub async fn cmd_create_key(&self, opt: KeyNewOpt) -> Result<(), Error> { + let key = self + .api_request(CreateKeyRequest(UpdateKeyRequestBody { + name: Some(opt.name), + expiration: parse_expires_in(&opt.expires_in)?, + never_expires: false, + allow: None, + deny: None, + })) + .await?; + + print_key_info(&key.0); + + Ok(()) + } + + pub async fn cmd_rename_key(&self, opt: KeyRenameOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: false, + }) + .await?; + + let new_key = self + .api_request(UpdateKeyRequest { + id: key.access_key_id, + body: UpdateKeyRequestBody { + name: Some(opt.new_name), + expiration: None, + never_expires: false, + allow: None, + deny: None, + }, + }) + .await?; + + print_key_info(&new_key.0); + + Ok(()) + } + + pub async fn cmd_update_key(&self, opt: KeySetOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: false, + }) + .await?; + + let new_key = self + .api_request(UpdateKeyRequest { + id: key.access_key_id, + body: UpdateKeyRequestBody { + name: None, + expiration: parse_expires_in(&opt.expires_in)?, + never_expires: opt.never_expires, + allow: None, + deny: None, + }, + }) + .await?; + + print_key_info(&new_key.0); + + Ok(()) + } + + pub async fn cmd_delete_key(&self, opt: KeyDeleteOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: false, + }) + .await?; + + if !opt.yes { + println!("About to delete key {}...", key.access_key_id); + return Err(Error::Message( + "Add --yes flag to really perform this operation".to_string(), + )); + } + + self.api_request(DeleteKeyRequest { + id: key.access_key_id.clone(), + }) + .await?; + + println!("Access key {} has been deleted.", key.access_key_id); + + Ok(()) + } + + pub async fn cmd_allow_key(&self, opt: KeyPermOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: false, + }) + .await?; + + let new_key = self + .api_request(UpdateKeyRequest { + id: key.access_key_id, + body: UpdateKeyRequestBody { + name: None, + expiration: None, + never_expires: false, + allow: Some(KeyPerm { + create_bucket: opt.create_bucket, + }), + deny: None, + }, + }) + .await?; + + print_key_info(&new_key.0); + + Ok(()) + } + + pub async fn cmd_deny_key(&self, opt: KeyPermOpt) -> Result<(), Error> { + let key = self + .api_request(GetKeyInfoRequest { + id: None, + search: Some(opt.key_pattern), + show_secret_key: false, + }) + .await?; + + let new_key = self + .api_request(UpdateKeyRequest { + id: key.access_key_id, + body: UpdateKeyRequestBody { + name: None, + expiration: None, + never_expires: false, + allow: None, + deny: Some(KeyPerm { + create_bucket: opt.create_bucket, + }), + }, + }) + .await?; + + print_key_info(&new_key.0); + + Ok(()) + } + + pub async fn cmd_import_key(&self, opt: KeyImportOpt) -> Result<(), Error> { + if !opt.yes { + return Err(Error::Message("This command is intended to re-import keys that were previously generated by Garage. If you want to create a new key, use `garage key new` instead. Add the --yes flag if you really want to re-import a key.".to_string())); + } + + let new_key = self + .api_request(ImportKeyRequest { + name: Some(opt.name), + access_key_id: opt.key_id, + secret_access_key: opt.secret_key, + }) + .await?; + + print_key_info(&new_key.0); + + Ok(()) + } + + pub async fn cmd_delete_expired_keys(&self, yes: bool) -> Result<(), Error> { + let mut list = self.api_request(ListKeysRequest).await?.0; + + list.retain(|key| key.expired); + + if !yes { + return Err(Error::Message(format!( + "This would delete {} access keys, add the --yes flag to proceed.", + list.len(), + ))); + } + + for key in list.iter() { + let id = key.id.clone(); + println!("Deleting access key `{}` ({})", key.name, id); + self.api_request(DeleteKeyRequest { id }).await?; + } + + println!("{} access keys have been deleted.", list.len()); + + Ok(()) + } +} + +fn print_key_info(key: &GetKeyInfoResponse) { + println!("==== ACCESS KEY INFORMATION ===="); + + let mut table = vec![ + format!("Key ID:\t{}", key.access_key_id), + format!("Key name:\t{}", key.name), + format!( + "Secret key:\t{}", + key.secret_access_key.as_deref().unwrap_or("(redacted)") + ), + ]; + + if let Some(c) = key.created { + table.push(format!("Created:\t{}", c.with_timezone(&Local))); + } + + table.extend([ + format!( + "Validity:\t{}", + if key.expired { "EXPIRED" } else { "valid" } + ), + format!( + "Expiration:\t{}", + key.expiration + .map(|x| x.with_timezone(&Local).to_string()) + .unwrap_or("never".into()) + ), + String::new(), + format!("Can create buckets:\t{}", key.permissions.create_bucket), + ]); + format_table(table); + + println!(); + println!("==== BUCKETS FOR THIS KEY ===="); + let mut bucket_info = vec!["Permissions\tID\tGlobal aliases\tLocal aliases".to_string()]; + bucket_info.extend(key.buckets.iter().map(|bucket| { + let rflag = if bucket.permissions.read { "R" } else { " " }; + let wflag = if bucket.permissions.write { "W" } else { " " }; + let oflag = if bucket.permissions.owner { "O" } else { " " }; + format!( + "{}{}{}\t{:.16}\t{}\t{}", + rflag, + wflag, + oflag, + bucket.id, + table_list_abbr(&bucket.global_aliases), + bucket.local_aliases.join(","), + ) + })); + + format_table(bucket_info); +} diff --git a/src/garage/cli/remote/layout.rs b/src/garage/cli/remote/layout.rs new file mode 100644 index 00000000..edf92efc --- /dev/null +++ b/src/garage/cli/remote/layout.rs @@ -0,0 +1,474 @@ +use bytesize::ByteSize; +use format_table::format_table; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn layout_command_dispatch(&self, cmd: LayoutOperation) -> Result<(), Error> { + match cmd { + LayoutOperation::Show => self.cmd_show_layout().await, + LayoutOperation::Assign(assign_opt) => self.cmd_assign_role(assign_opt).await, + LayoutOperation::Remove(remove_opt) => self.cmd_remove_role(remove_opt).await, + LayoutOperation::Config(config_opt) => self.cmd_config_layout(config_opt).await, + LayoutOperation::Apply(apply_opt) => self.cmd_apply_layout(apply_opt).await, + LayoutOperation::Revert(revert_opt) => self.cmd_revert_layout(revert_opt).await, + LayoutOperation::History => self.cmd_layout_history().await, + LayoutOperation::SkipDeadNodes(opt) => self.cmd_skip_dead_nodes(opt).await, + } + } + + pub async fn cmd_show_layout(&self) -> Result<(), Error> { + let layout = self.api_request(GetClusterLayoutRequest).await?; + + println!("==== CURRENT CLUSTER LAYOUT ===="); + print_cluster_layout(&layout, "No nodes currently have a role in the cluster.\nSee `garage status` to view available nodes."); + println!(); + println!("Current cluster layout version: {}", layout.version); + + let has_role_changes = print_staging_role_changes(&layout); + if has_role_changes { + let res_apply = self.api_request(PreviewClusterLayoutChangesRequest).await?; + + // this will print the stats of what partitions + // will move around when we apply + match res_apply { + PreviewClusterLayoutChangesResponse::Success { + message, + new_layout, + } => { + println!(); + println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ===="); + print_cluster_layout(&new_layout, "No nodes have a role in the new layout."); + println!(); + + for line in message.iter() { + println!("{}", line); + } + println!("To enact the staged role changes, type:"); + println!(); + println!(" garage layout apply --version {}", new_layout.version); + println!(); + println!("You can also revert all proposed changes with: garage layout revert"); + } + PreviewClusterLayoutChangesResponse::Error { error } => { + println!("Error while trying to compute the assignment: {}", error); + println!("This new layout cannot yet be applied."); + println!("You can also revert all proposed changes with: garage layout revert"); + } + } + } + + Ok(()) + } + + pub async fn cmd_assign_role(&self, opt: AssignRoleOpt) -> Result<(), Error> { + let status = self.api_request(GetClusterStatusRequest).await?; + let layout = self.api_request(GetClusterLayoutRequest).await?; + + let mut actions = vec![]; + + for node in opt.replace.iter() { + let id = find_matching_node(&status, &layout, node)?; + + actions.push(NodeRoleChange { + id, + action: NodeRoleChangeEnum::Remove { remove: true }, + }); + } + + for node in opt.node_ids.iter() { + let id = find_matching_node(&status, &layout, node)?; + + let current = get_staged_or_current_role(&id, &layout); + + let zone = opt + .zone + .clone() + .or_else(|| current.as_ref().map(|c| c.zone.clone())) + .ok_or_message("Please specify a zone with the -z flag")?; + + let capacity = if opt.gateway { + if opt.capacity.is_some() { + return Err(Error::Message("Please specify only -c or -g".into())); + } + None + } else if let Some(cap) = opt.capacity { + Some(cap.as_u64()) + } else { + current.as_ref().ok_or_message("Please specify a capacity with the -c flag, or set node explicitly as gateway with -g")?.capacity + }; + + let tags = if !opt.tags.is_empty() { + opt.tags.clone() + } else if let Some(cur) = current.as_ref() { + cur.tags.clone() + } else { + vec![] + }; + + actions.push(NodeRoleChange { + id, + action: NodeRoleChangeEnum::Update(NodeAssignedRole { + zone, + capacity, + tags, + }), + }); + } + + self.api_request(UpdateClusterLayoutRequest { + roles: actions, + parameters: None, + }) + .await?; + + println!("Role changes are staged but not yet committed."); + println!("Use `garage layout show` to view staged role changes,"); + println!("and `garage layout apply` to enact staged changes."); + Ok(()) + } + + pub async fn cmd_remove_role(&self, opt: RemoveRoleOpt) -> Result<(), Error> { + let status = self.api_request(GetClusterStatusRequest).await?; + let layout = self.api_request(GetClusterLayoutRequest).await?; + + let id = find_matching_node(&status, &layout, &opt.node_id)?; + + let actions = vec![NodeRoleChange { + id, + action: NodeRoleChangeEnum::Remove { remove: true }, + }]; + + self.api_request(UpdateClusterLayoutRequest { + roles: actions, + parameters: None, + }) + .await?; + + println!("Role removal is staged but not yet committed."); + println!("Use `garage layout show` to view staged role changes,"); + println!("and `garage layout apply` to enact staged changes."); + Ok(()) + } + + pub async fn cmd_config_layout(&self, config_opt: ConfigLayoutOpt) -> Result<(), Error> { + let mut did_something = false; + match config_opt.redundancy { + None => (), + Some(r_str) => { + let r = parse_zone_redundancy(&r_str)?; + + self.api_request(UpdateClusterLayoutRequest { + roles: vec![], + parameters: Some(LayoutParameters { zone_redundancy: r }), + }) + .await?; + println!( + "The zone redundancy parameter has been set to '{}'.", + display_zone_redundancy(r) + ); + did_something = true; + } + } + + if !did_something { + return Err(Error::Message( + "Please specify an action for `garage layout config`".into(), + )); + } + + Ok(()) + } + + pub async fn cmd_apply_layout(&self, apply_opt: ApplyLayoutOpt) -> Result<(), Error> { + let missing_version_error = r#" +Please pass the new layout version number to ensure that you are writing the correct version of the cluster layout. +To know the correct value of the new layout version, invoke `garage layout show` and review the proposed changes. + "#; + + let req = ApplyClusterLayoutRequest { + version: apply_opt.version.ok_or_message(missing_version_error)?, + }; + let res = self.api_request(req).await?; + + for line in res.message.iter() { + println!("{}", line); + } + + println!("New cluster layout with updated role assignment has been applied in cluster."); + println!("Data will now be moved around between nodes accordingly."); + + Ok(()) + } + + pub async fn cmd_revert_layout(&self, revert_opt: RevertLayoutOpt) -> Result<(), Error> { + if !revert_opt.yes { + return Err(Error::Message( + "Please add the --yes flag to run the layout revert operation".into(), + )); + } + + self.api_request(RevertClusterLayoutRequest).await?; + + println!("All proposed role changes in cluster layout have been canceled."); + Ok(()) + } + + pub async fn cmd_layout_history(&self) -> Result<(), Error> { + let history = self.api_request(GetClusterLayoutHistoryRequest).await?; + + println!("==== LAYOUT HISTORY ===="); + let mut table = vec!["Version\tStatus\tStorage nodes\tGateway nodes".to_string()]; + for ver in history.versions.iter() { + table.push(format!( + "#{}\t{:?}\t{}\t{}", + ver.version, ver.status, ver.storage_nodes, ver.gateway_nodes, + )); + } + format_table(table); + println!(); + + if let Some(update_trackers) = history.update_trackers { + println!("==== UPDATE TRACKERS ===="); + println!("Several layout versions are currently live in the cluster, and data is being migrated."); + println!( + "This is the internal data that Garage stores to know which nodes have what data." + ); + println!(); + let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()]; + for (node, trackers) in update_trackers.iter() { + table.push(format!( + "{:.16}\t#{}\t#{}\t#{}", + node, trackers.ack, trackers.sync, trackers.sync_ack, + )); + } + table[1..].sort(); + format_table(table); + + println!(); + println!( + "If some nodes are not catching up to the latest layout version in the update trackers," + ); + println!( + "it might be because they are offline or unable to complete a sync successfully." + ); + if history.min_ack < history.current_version { + println!( + "You may force progress using `garage layout skip-dead-nodes --version {}`", + history.current_version + ); + } else { + println!( + "You may force progress using `garage layout skip-dead-nodes --version {} --allow-missing-data`.", + history.current_version + ); + } + } else { + println!( + "Your cluster is currently in a stable state with a single live layout version." + ); + println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,"); + println!( + "so you might want to keep old nodes online until their data directories become empty." + ); + } + + Ok(()) + } + + pub async fn cmd_skip_dead_nodes(&self, opt: SkipDeadNodesOpt) -> Result<(), Error> { + let res = self + .api_request(ClusterLayoutSkipDeadNodesRequest { + version: opt.version, + allow_missing_data: opt.allow_missing_data, + }) + .await?; + + if !res.sync_updated.is_empty() || !res.ack_updated.is_empty() { + for node in res.ack_updated.iter() { + println!("Increased the ACK tracker for node {:.16}", node); + } + for node in res.sync_updated.iter() { + println!("Increased the SYNC tracker for node {:.16}", node); + } + Ok(()) + } else if !opt.allow_missing_data { + Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into())) + } else { + Err(Error::Message( + "Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(), + )) + } + } +} + +// -------------------------- +// ---- helper functions ---- +// -------------------------- + +pub fn capacity_string(v: Option) -> String { + match v { + Some(c) => ByteSize::b(c).to_string_as(false), + None => "gateway".to_string(), + } +} + +pub fn get_staged_or_current_role( + id: &str, + layout: &GetClusterLayoutResponse, +) -> Option { + for node in layout.staged_role_changes.iter() { + if node.id == id { + return match &node.action { + NodeRoleChangeEnum::Remove { .. } => None, + NodeRoleChangeEnum::Update(role) => Some(role.clone()), + }; + } + } + + for node in layout.roles.iter() { + if node.id == id { + return Some(NodeAssignedRole { + zone: node.zone.clone(), + capacity: node.capacity, + tags: node.tags.clone(), + }); + } + } + + None +} + +pub fn find_matching_node( + status: &GetClusterStatusResponse, + layout: &GetClusterLayoutResponse, + pattern: &str, +) -> Result { + let all_node_ids_iter = status + .nodes + .iter() + .map(|x| x.id.as_str()) + .chain(layout.roles.iter().map(|x| x.id.as_str())); + + let mut candidates = vec![]; + for c in all_node_ids_iter { + if c.starts_with(pattern) && !candidates.contains(&c) { + candidates.push(c); + } + } + if candidates.len() != 1 { + Err(Error::Message(format!( + "{} nodes match '{}'", + candidates.len(), + pattern, + ))) + } else { + Ok(candidates[0].to_string()) + } +} + +pub fn print_cluster_layout(layout: &GetClusterLayoutResponse, empty_msg: &str) { + let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable capacity".to_string()]; + for role in layout.roles.iter() { + let tags = role.tags.join(","); + if let (Some(capacity), Some(usable_capacity)) = (role.capacity, role.usable_capacity) { + table.push(format!( + "{:.16}\t[{}]\t{}\t{}\t{} ({:.1}%)", + role.id, + tags, + role.zone, + capacity_string(role.capacity), + ByteSize::b(usable_capacity).to_string_as(false), + (100.0 * usable_capacity as f32) / (capacity as f32) + )); + } else { + table.push(format!( + "{:.16}\t[{}]\t{}\t{}", + role.id, + tags, + role.zone, + capacity_string(role.capacity), + )); + }; + } + if table.len() > 1 { + format_table(table); + println!(); + println!( + "Zone redundancy: {}", + display_zone_redundancy(layout.parameters.zone_redundancy), + ); + } else { + println!("{}", empty_msg); + } +} + +pub fn print_staging_role_changes(layout: &GetClusterLayoutResponse) -> bool { + let has_role_changes = !layout.staged_role_changes.is_empty(); + + let has_layout_changes = layout.staged_parameters.is_some(); + + if has_role_changes || has_layout_changes { + println!(); + println!("==== STAGED ROLE CHANGES ===="); + if has_role_changes { + let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; + for change in layout.staged_role_changes.iter() { + match &change.action { + NodeRoleChangeEnum::Update(NodeAssignedRole { + tags, + zone, + capacity, + }) => { + let tags = tags.join(","); + table.push(format!( + "{:.16}\t[{}]\t{}\t{}", + change.id, + tags, + zone, + capacity_string(*capacity), + )); + } + NodeRoleChangeEnum::Remove { .. } => { + table.push(format!("{:.16}\tREMOVED", change.id)); + } + } + } + format_table(table); + println!(); + } + if let Some(p) = layout.staged_parameters.as_ref() { + println!( + "Zone redundancy: {}", + display_zone_redundancy(p.zone_redundancy) + ); + } + true + } else { + false + } +} + +pub fn display_zone_redundancy(z: ZoneRedundancy) -> String { + match z { + ZoneRedundancy::Maximum => "maximum".into(), + ZoneRedundancy::AtLeast(x) => x.to_string(), + } +} + +pub fn parse_zone_redundancy(s: &str) -> Result { + match s { + "none" | "max" | "maximum" => Ok(ZoneRedundancy::Maximum), + x => { + let v = x.parse::().map_err(|_| { + Error::Message("zone redundancy must be 'none'/'max' or an integer".into()) + })?; + Ok(ZoneRedundancy::AtLeast(v)) + } + } +} diff --git a/src/garage/cli/remote/mod.rs b/src/garage/cli/remote/mod.rs new file mode 100644 index 00000000..d1a20989 --- /dev/null +++ b/src/garage/cli/remote/mod.rs @@ -0,0 +1,174 @@ +pub mod admin_token; +pub mod bucket; +pub mod cluster; +pub mod key; +pub mod layout; + +pub mod block; +pub mod node; +pub mod worker; + +use std::convert::TryFrom; +use std::sync::Arc; +use std::time::Duration; + +use chrono::{DateTime, Utc}; + +use garage_util::error::*; + +use garage_rpc::*; + +use garage_api_admin::api::*; +use garage_api_admin::api_server::{AdminRpc as ProxyRpc, AdminRpcResponse as ProxyRpcResponse}; +use garage_api_admin::RequestHandler; + +use crate::cli::structs::*; + +pub struct Cli { + pub proxy_rpc_endpoint: Arc>, + pub rpc_host: NodeID, +} + +impl Cli { + pub async fn handle(&self, cmd: Command) -> Result<(), Error> { + match cmd { + Command::Status => self.cmd_status().await, + Command::Node(NodeOperation::Connect(connect_opt)) => { + self.cmd_connect(connect_opt).await + } + Command::Layout(layout_opt) => self.layout_command_dispatch(layout_opt).await, + Command::Bucket(bo) => self.cmd_bucket(bo).await, + Command::AdminToken(to) => self.cmd_admin_token(to).await, + Command::Key(ko) => self.cmd_key(ko).await, + Command::Worker(wo) => self.cmd_worker(wo).await, + Command::Block(bo) => self.cmd_block(bo).await, + Command::Meta(mo) => self.cmd_meta(mo).await, + Command::Stats(so) => self.cmd_stats(so).await, + Command::Repair(ro) => self.cmd_repair(ro).await, + Command::JsonApi { endpoint, payload } => self.cmd_json_api(endpoint, payload).await, + + _ => unreachable!(), + } + } + + pub async fn api_request(&self, req: T) -> Result<::Response, Error> + where + T: RequestHandler, + AdminApiRequest: From, + ::Response: TryFrom, + { + let req = AdminApiRequest::from(req); + let req_name = req.name(); + match self + .proxy_rpc_endpoint + .call(&self.rpc_host, ProxyRpc::Proxy(req), PRIO_NORMAL) + .await?? + { + ProxyRpcResponse::ProxyApiOkResponse(resp) => { + ::Response::try_from(resp).map_err(|_| { + Error::Message(format!("{} returned unexpected response", req_name)) + }) + } + ProxyRpcResponse::ApiErrorResponse { + http_code, + error_code, + message, + } => Err(Error::Message(format!( + "{} returned {} ({}): {}", + req_name, error_code, http_code, message + ))), + m => Err(Error::unexpected_rpc_message(m)), + } + } + + pub async fn local_api_request( + &self, + req: T, + ) -> Result<::Response, Error> + where + T: RequestHandler, + MultiRequest: RequestHandler::Response>>, + AdminApiRequest: From>, + as RequestHandler>::Response: TryFrom, + { + let req = MultiRequest { + node: hex::encode(self.rpc_host), + body: req, + }; + let resp = self.api_request(req).await?; + + if let Some((_, e)) = resp.error.into_iter().next() { + return Err(Error::Message(e)); + } + if resp.success.len() != 1 { + return Err(Error::Message(format!( + "{} responses returned, expected 1", + resp.success.len() + ))); + } + Ok(resp.success.into_iter().next().unwrap().1) + } + + pub async fn cmd_json_api(&self, endpoint: String, payload: String) -> Result<(), Error> { + let payload: serde_json::Value = if payload == "-" { + serde_json::from_reader(&std::io::stdin())? + } else { + serde_json::from_str(&payload)? + }; + + let request: AdminApiRequest = serde_json::from_value(serde_json::json!({ + endpoint.clone(): payload, + }))?; + + let resp = match self + .proxy_rpc_endpoint + .call(&self.rpc_host, ProxyRpc::Proxy(request), PRIO_NORMAL) + .await?? + { + ProxyRpcResponse::ProxyApiOkResponse(resp) => resp, + ProxyRpcResponse::ApiErrorResponse { + http_code, + error_code, + message, + } => { + return Err(Error::Message(format!( + "{} ({}): {}", + error_code, http_code, message + ))) + } + m => return Err(Error::unexpected_rpc_message(m)), + }; + + if let serde_json::Value::Object(map) = serde_json::to_value(&resp)? { + if let Some(inner) = map.get(&endpoint) { + serde_json::to_writer_pretty(std::io::stdout(), &inner)?; + return Ok(()); + } + } + + Err(Error::Message(format!( + "Invalid response: {}", + serde_json::to_string(&resp)? + ))) + } +} + +pub fn table_list_abbr, S: AsRef>(values: T) -> String { + let mut iter = values.into_iter(); + + match iter.next() { + Some(first) => match iter.count() { + 0 => first.as_ref().to_string(), + n => format!("{}, ... ({})", first.as_ref(), n + 1), + }, + None => String::new(), + } +} + +pub fn parse_expires_in(expires_in: &Option) -> Result>, Error> { + expires_in + .as_ref() + .map(|x| parse_duration::parse::parse(x).map(|dur| Utc::now() + dur)) + .transpose() + .ok_or_message("Invalid duration passed for --expires-in parameter") +} diff --git a/src/garage/cli/remote/node.rs b/src/garage/cli/remote/node.rs new file mode 100644 index 00000000..482f239d --- /dev/null +++ b/src/garage/cli/remote/node.rs @@ -0,0 +1,120 @@ +use format_table::format_table; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_meta(&self, cmd: MetaOperation) -> Result<(), Error> { + let MetaOperation::Snapshot { all } = cmd; + + let res = self + .api_request(CreateMetadataSnapshotRequest { + node: if all { + "*".to_string() + } else { + hex::encode(self.rpc_host) + }, + body: LocalCreateMetadataSnapshotRequest, + }) + .await?; + + let mut table = vec!["Node\tResult".to_string()]; + for (node, _) in res.success.iter() { + table.push(format!("{:.16}\tSnapshot created", node)); + } + for (node, err) in res.error.iter() { + table.push(format!("{:.16}\tError: {}", node, err)); + } + format_table(table); + + if !res.error.is_empty() { + return Err(Error::Message(format!( + "{} nodes returned an error", + res.error.len() + ))); + } + + Ok(()) + } + + pub async fn cmd_stats(&self, cmd: StatsOpt) -> Result<(), Error> { + let res = self + .api_request(GetNodeStatisticsRequest { + node: if cmd.all_nodes { + "*".to_string() + } else { + hex::encode(self.rpc_host) + }, + body: LocalGetNodeStatisticsRequest, + }) + .await?; + + for (node, res) in res.success.iter() { + println!("==== NODE [{:.16}] ====", node); + println!("{}\n", res.freeform); + } + + for (node, err) in res.error.iter() { + println!("==== NODE [{:.16}] ====", node); + println!("Error: {}\n", err); + } + + let res = self.api_request(GetClusterStatisticsRequest).await?; + println!("==== CLUSTER STATISTICS ===="); + println!("{}\n", res.freeform); + + Ok(()) + } + + pub async fn cmd_repair(&self, cmd: RepairOpt) -> Result<(), Error> { + if !cmd.yes { + return Err(Error::Message( + "Please add --yes to start the repair operation".into(), + )); + } + + let repair_type = match cmd.what { + RepairWhat::Tables => RepairType::Tables, + RepairWhat::Blocks => RepairType::Blocks, + RepairWhat::Versions => RepairType::Versions, + RepairWhat::MultipartUploads => RepairType::MultipartUploads, + RepairWhat::BlockRefs => RepairType::BlockRefs, + RepairWhat::BlockRc => RepairType::BlockRc, + RepairWhat::Rebalance => RepairType::Rebalance, + RepairWhat::Scrub { cmd } => RepairType::Scrub(match cmd { + ScrubCmd::Start => ScrubCommand::Start, + ScrubCmd::Cancel => ScrubCommand::Cancel, + ScrubCmd::Pause => ScrubCommand::Pause, + ScrubCmd::Resume => ScrubCommand::Resume, + }), + RepairWhat::Aliases => RepairType::Aliases, + RepairWhat::ClearResyncQueue => RepairType::ClearResyncQueue, + }; + + let res = self + .api_request(LaunchRepairOperationRequest { + node: if cmd.all_nodes { + "*".to_string() + } else { + hex::encode(self.rpc_host) + }, + body: LocalLaunchRepairOperationRequest { repair_type }, + }) + .await?; + + let mut table = vec![]; + for (node, err) in res.error.iter() { + table.push(format!("{:.16}\tError: {}", node, err)); + } + for (node, _) in res.success.iter() { + table.push(format!("{:.16}\tRepair launched", node)); + } + format_table(table); + + Ok(()) + } +} diff --git a/src/garage/cli/remote/worker.rs b/src/garage/cli/remote/worker.rs new file mode 100644 index 00000000..45f0b3cd --- /dev/null +++ b/src/garage/cli/remote/worker.rs @@ -0,0 +1,213 @@ +use format_table::format_table; + +use garage_util::error::*; + +use garage_api_admin::api::*; + +use crate::cli::remote::*; +use crate::cli::structs::*; + +impl Cli { + pub async fn cmd_worker(&self, cmd: WorkerOperation) -> Result<(), Error> { + match cmd { + WorkerOperation::List { opt } => self.cmd_list_workers(opt).await, + WorkerOperation::Info { tid } => self.cmd_worker_info(tid).await, + WorkerOperation::Get { + all_nodes, + variable, + } => self.cmd_get_var(all_nodes, variable).await, + WorkerOperation::Set { + all_nodes, + variable, + value, + } => self.cmd_set_var(all_nodes, variable, value).await, + } + } + + pub async fn cmd_list_workers(&self, opt: WorkerListOpt) -> Result<(), Error> { + let mut list = self + .local_api_request(LocalListWorkersRequest { + busy_only: opt.busy, + error_only: opt.errors, + }) + .await? + .0; + + list.sort_by_key(|info| { + ( + match info.state { + WorkerStateResp::Busy | WorkerStateResp::Throttled { .. } => 0, + WorkerStateResp::Idle => 1, + WorkerStateResp::Done => 2, + }, + info.id, + ) + }); + + let mut table = + vec!["TID\tState\tName\tTranq\tDone\tQueue\tErrors\tConsec\tLast".to_string()]; + let tf = timeago::Formatter::new(); + for info in list.iter() { + let err_ago = info + .last_error + .as_ref() + .map(|x| tf.convert(Duration::from_secs(x.secs_ago))) + .unwrap_or_default(); + let (total_err, consec_err) = if info.errors > 0 { + (info.errors.to_string(), info.consecutive_errors.to_string()) + } else { + ("-".into(), "-".into()) + }; + + table.push(format!( + "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}", + info.id, + format_worker_state(&info.state), + info.name, + info.tranquility + .as_ref() + .map(ToString::to_string) + .unwrap_or_else(|| "-".into()), + info.progress.as_deref().unwrap_or("-"), + info.queue_length + .as_ref() + .map(ToString::to_string) + .unwrap_or_else(|| "-".into()), + total_err, + consec_err, + err_ago, + )); + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_worker_info(&self, tid: usize) -> Result<(), Error> { + let info = self + .local_api_request(LocalGetWorkerInfoRequest { id: tid as u64 }) + .await? + .0; + + let mut table = vec![]; + table.push(format!("Task id:\t{}", info.id)); + table.push(format!("Worker name:\t{}", info.name)); + match &info.state { + WorkerStateResp::Throttled { duration_secs } => { + table.push(format!( + "Worker state:\tBusy (throttled, paused for {:.3}s)", + duration_secs + )); + } + s => { + table.push(format!("Worker state:\t{}", format_worker_state(s))); + } + }; + if let Some(tql) = info.tranquility { + table.push(format!("Tranquility:\t{}", tql)); + } + + table.push("".into()); + table.push(format!("Total errors:\t{}", info.errors)); + table.push(format!("Consecutive errs:\t{}", info.consecutive_errors)); + if let Some(err) = info.last_error { + table.push(format!("Last error:\t{}", err.message)); + let tf = timeago::Formatter::new(); + table.push(format!( + "Last error time:\t{}", + tf.convert(Duration::from_secs(err.secs_ago)) + )); + } + + table.push("".into()); + if let Some(p) = info.progress { + table.push(format!("Progress:\t{}", p)); + } + if let Some(ql) = info.queue_length { + table.push(format!("Queue length:\t{}", ql)); + } + if let Some(pe) = info.persistent_errors { + table.push(format!("Persistent errors:\t{}", pe)); + } + + for (i, s) in info.freeform.iter().enumerate() { + if i == 0 { + if table.last() != Some(&"".into()) { + table.push("".into()); + } + table.push(format!("Message:\t{}", s)); + } else { + table.push(format!("\t{}", s)); + } + } + format_table(table); + + Ok(()) + } + + pub async fn cmd_get_var(&self, all: bool, var: Option) -> Result<(), Error> { + let res = self + .api_request(GetWorkerVariableRequest { + node: if all { + "*".to_string() + } else { + hex::encode(self.rpc_host) + }, + body: LocalGetWorkerVariableRequest { variable: var }, + }) + .await?; + + let mut table = vec![]; + for (node, vars) in res.success.iter() { + for (key, val) in vars.0.iter() { + table.push(format!("{:.16}\t{}\t{}", node, key, val)); + } + } + format_table(table); + + for (node, err) in res.error.iter() { + eprintln!("{:.16}: error: {}", node, err); + } + + Ok(()) + } + + pub async fn cmd_set_var( + &self, + all: bool, + variable: String, + value: String, + ) -> Result<(), Error> { + let res = self + .api_request(SetWorkerVariableRequest { + node: if all { + "*".to_string() + } else { + hex::encode(self.rpc_host) + }, + body: LocalSetWorkerVariableRequest { variable, value }, + }) + .await?; + + let mut table = vec![]; + for (node, kv) in res.success.iter() { + table.push(format!("{:.16}\t{}\t{}", node, kv.variable, kv.value)); + } + format_table(table); + + for (node, err) in res.error.iter() { + eprintln!("{:.16}: error: {}", node, err); + } + + Ok(()) + } +} + +fn format_worker_state(s: &WorkerStateResp) -> &'static str { + match s { + WorkerStateResp::Busy => "Busy", + WorkerStateResp::Throttled { .. } => "Busy*", + WorkerStateResp::Idle => "Idle", + WorkerStateResp::Done => "Done", + } +} diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 386a213b..7fd2defa 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -1,9 +1,8 @@ -use serde::{Deserialize, Serialize}; use structopt::StructOpt; use garage_util::version::garage_version; -use crate::cli::convert_db; +use crate::cli::local::convert_db; #[derive(StructOpt, Debug)] pub enum Command { @@ -31,6 +30,10 @@ pub enum Command { #[structopt(name = "key", version = garage_version())] Key(KeyOperation), + /// Operations on admin API tokens + #[structopt(name = "admin-token", version = garage_version())] + AdminToken(AdminTokenOperation), + /// Start repair of node data on remote node #[structopt(name = "repair", version = garage_version())] Repair(RepairOpt), @@ -59,8 +62,27 @@ pub enum Command { /// Convert metadata db between database engine formats #[structopt(name = "convert-db", version = garage_version())] ConvertDb(convert_db::ConvertDbOpt), + + /// Output openapi JSON schema for admin api + #[structopt(name = "admin-api-schema", version = garage_version(), setting(structopt::clap::AppSettings::Hidden))] + AdminApiSchema, + + /// Directly invoke the admin API using a JSON payload. + /// The result is printed to `stdout` in JSON format. + #[structopt(name = "json-api", version = garage_version())] + JsonApi { + /// The admin API endpoint to invoke, e.g. GetClusterStatus + endpoint: String, + /// The JSON payload, or `-` to read from `stdin` + #[structopt(default_value = "null")] + payload: String, + }, } +// ------------------------- +// ---- garage node ... ---- +// ------------------------- + #[derive(StructOpt, Debug)] pub enum NodeOperation { /// Print the full node ID (public key) of this Garage node, and its publicly reachable IP @@ -88,6 +110,10 @@ pub struct ConnectNodeOpt { pub(crate) node: String, } +// --------------------------- +// ---- garage layout ... ---- +// --------------------------- + #[derive(StructOpt, Debug)] pub enum LayoutOperation { /// Assign role to Garage node @@ -190,7 +216,11 @@ pub struct SkipDeadNodesOpt { pub(crate) allow_missing_data: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +// --------------------------- +// ---- garage bucket ... ---- +// --------------------------- + +#[derive(StructOpt, Debug)] pub enum BucketOperation { /// List buckets #[structopt(name = "list", version = garage_version())] @@ -235,9 +265,13 @@ pub enum BucketOperation { /// Clean up (abort) old incomplete multipart uploads #[structopt(name = "cleanup-incomplete-uploads", version = garage_version())] CleanupIncompleteUploads(CleanupIncompleteUploadsOpt), + + /// Inspect an object in a bucket + #[structopt(name = "inspect-object", version = garage_version())] + InspectObject(InspectObjectOpt), } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct WebsiteOpt { /// Create #[structopt(long = "allow")] @@ -259,13 +293,13 @@ pub struct WebsiteOpt { pub error_document: Option, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct BucketOpt { /// Bucket name pub name: String, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct DeleteBucketOpt { /// Bucket name pub name: String, @@ -275,7 +309,7 @@ pub struct DeleteBucketOpt { pub yes: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct AliasBucketOpt { /// Existing bucket name (its alias in global namespace or its full hex uuid) pub existing_bucket: String, @@ -288,7 +322,7 @@ pub struct AliasBucketOpt { pub local: Option, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct UnaliasBucketOpt { /// Bucket name pub name: String, @@ -298,7 +332,7 @@ pub struct UnaliasBucketOpt { pub local: Option, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct PermBucketOpt { /// Access key name or ID #[structopt(long = "key")] @@ -321,7 +355,7 @@ pub struct PermBucketOpt { pub bucket: String, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct SetQuotasOpt { /// Bucket name pub bucket: String, @@ -336,7 +370,7 @@ pub struct SetQuotasOpt { pub max_objects: Option, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct CleanupIncompleteUploadsOpt { /// Abort multipart uploads older than this value #[structopt(long = "older-than", default_value = "1d")] @@ -347,7 +381,19 @@ pub struct CleanupIncompleteUploadsOpt { pub buckets: Vec, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] +pub struct InspectObjectOpt { + /// Name or ID of bucket + pub bucket: String, + /// Key of object to inspect + pub key: String, +} + +// ------------------------ +// ---- garage key ... ---- +// ------------------------ + +#[derive(StructOpt, Debug)] pub enum KeyOperation { /// List keys #[structopt(name = "list", version = garage_version())] @@ -380,9 +426,21 @@ pub enum KeyOperation { /// Import key #[structopt(name = "import", version = garage_version())] Import(KeyImportOpt), + + /// Set parameters for an access key + #[structopt(name = "set", version = garage_version())] + Set(KeySetOpt), + + /// Delete all expired access keys + #[structopt(name = "delete-expired", version = garage_version())] + DeleteExpired { + /// Confirm deletion + #[structopt(long = "yes")] + yes: bool, + }, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct KeyInfoOpt { /// ID or name of the key pub key_pattern: String, @@ -391,14 +449,32 @@ pub struct KeyInfoOpt { pub show_secret: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct KeyNewOpt { /// Name of the key #[structopt(default_value = "Unnamed key")] pub name: String, + /// Set an expiration time for the access key + /// (see docs.rs/parse_duration for date format) + #[structopt(long = "expires-in")] + pub expires_in: Option, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] +pub struct KeySetOpt { + /// ID or name of the key + pub key_pattern: String, + + /// Set an expiration time for the access key + /// (see docs.rs/parse_duration for date format) + #[structopt(long = "expires-in")] + pub expires_in: Option, + /// Set the access key to never expire + #[structopt(long = "never-expires")] + pub never_expires: bool, +} + +#[derive(StructOpt, Debug)] pub struct KeyRenameOpt { /// ID or name of the key pub key_pattern: String, @@ -407,7 +483,7 @@ pub struct KeyRenameOpt { pub new_name: String, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct KeyDeleteOpt { /// ID or name of the key pub key_pattern: String, @@ -417,7 +493,7 @@ pub struct KeyDeleteOpt { pub yes: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct KeyPermOpt { /// ID or name of the key pub key_pattern: String, @@ -427,7 +503,7 @@ pub struct KeyPermOpt { pub create_bucket: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug)] +#[derive(StructOpt, Debug)] pub struct KeyImportOpt { /// Access key ID pub key_id: String, @@ -444,7 +520,110 @@ pub struct KeyImportOpt { pub yes: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +// -------------------------------- +// ---- garage admin-token ... ---- +// -------------------------------- + +#[derive(StructOpt, Debug)] +pub enum AdminTokenOperation { + /// List all admin API tokens + #[structopt(name = "list", version = garage_version())] + List, + + /// Fetch info about a specific admin API token + #[structopt(name = "info", version = garage_version())] + Info { + /// Name or prefix of the ID of the token to look up + api_token: String, + }, + + /// Create new admin API token + #[structopt(name = "create", version = garage_version())] + Create(AdminTokenCreateOp), + + /// Rename an admin API token + #[structopt(name = "rename", version = garage_version())] + Rename { + /// Name or prefix of the ID of the token to rename + api_token: String, + /// New name of the admintoken + new_name: String, + }, + + /// Set parameters for an admin API token + #[structopt(name = "set", version = garage_version())] + Set(AdminTokenSetOp), + + /// Delete an admin API token + #[structopt(name = "delete", version = garage_version())] + Delete { + /// Name or prefix of the ID of the token to delete + api_token: String, + /// Confirm deletion + #[structopt(long = "yes")] + yes: bool, + }, + + /// Delete all expired admin API tokens + #[structopt(name = "delete-expired", version = garage_version())] + DeleteExpired { + /// Confirm deletion + #[structopt(long = "yes")] + yes: bool, + }, +} + +#[derive(StructOpt, Debug, Clone)] +pub struct AdminTokenCreateOp { + /// Set a name for the token + pub name: Option, + /// Set an expiration time for the token (see docs.rs/parse_duration for date + /// format) + #[structopt(long = "expires-in")] + pub expires_in: Option, + /// Set a limited scope for the token, as a comma-separated list of + /// admin API functions (e.g. GetClusterStatus, etc.). The default scope + /// is `*`, which allows access to all admin API functions. + /// Note that granting a scope that allows `CreateAdminToken` or + /// `UpdateAdminToken` allows for privilege escalation, and is therefore + /// equivalent to `*`. + #[structopt(long = "scope")] + pub scope: Option, + /// Print only the newly generated API token to stdout + #[structopt(short = "q", long = "quiet")] + pub quiet: bool, +} + +#[derive(StructOpt, Debug, Clone)] +pub struct AdminTokenSetOp { + /// Name or prefix of the ID of the token to modify + pub api_token: String, + + /// Set an expiration time for the token (see docs.rs/parse_duration for date + /// format) + #[structopt(long = "expires-in")] + pub expires_in: Option, + /// Set the token to never expire + #[structopt(long = "never-expires")] + pub never_expires: bool, + + /// Set a limited scope for the token, as a comma-separated list of + /// admin API functions (e.g. GetClusterStatus, etc.), or `*` to allow + /// all admin API functions. + /// Use `--scope=+Scope1,Scope2` to add scopes to the existing list, + /// and `--scope=-Scope1,Scope2` to remove scopes from the existing list. + /// Note that granting a scope that allows `CreateAdminToken` or + /// `UpdateAdminToken` allows for privilege escalation, and is therefore + /// equivalent to `*`. + #[structopt(long = "scope")] + pub scope: Option, +} + +// --------------------------- +// ---- garage repair ... ---- +// --------------------------- + +#[derive(StructOpt, Debug, Clone)] pub struct RepairOpt { /// Launch repair operation on all nodes #[structopt(short = "a", long = "all-nodes")] @@ -458,7 +637,7 @@ pub struct RepairOpt { pub what: RepairWhat, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +#[derive(StructOpt, Debug, Eq, PartialEq, Clone)] pub enum RepairWhat { /// Do a full sync of metadata tables #[structopt(name = "tables", version = garage_version())] @@ -496,7 +675,7 @@ pub enum RepairWhat { Rebalance, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +#[derive(StructOpt, Debug, Eq, PartialEq, Clone)] pub enum ScrubCmd { /// Start scrub #[structopt(name = "start", version = garage_version())] @@ -510,15 +689,13 @@ pub enum ScrubCmd { /// Cancel scrub in progress #[structopt(name = "cancel", version = garage_version())] Cancel, - /// Set tranquility level for in-progress and future scrubs - #[structopt(name = "set-tranquility", version = garage_version())] - SetTranquility { - #[structopt()] - tranquility: u32, - }, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +// ----------------------------------- +// ---- garage offline-repair ... ---- +// ----------------------------------- + +#[derive(StructOpt, Debug, Clone)] pub struct OfflineRepairOpt { /// Confirm the launch of the repair operation #[structopt(long = "yes")] @@ -528,7 +705,7 @@ pub struct OfflineRepairOpt { pub what: OfflineRepairWhat, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +#[derive(StructOpt, Debug, Eq, PartialEq, Clone)] pub enum OfflineRepairWhat { /// Repair K2V item counters #[cfg(feature = "k2v")] @@ -539,19 +716,22 @@ pub enum OfflineRepairWhat { ObjectCounters, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +// -------------------------- +// ---- garage stats ... ---- +// -------------------------- + +#[derive(StructOpt, Debug, Clone)] pub struct StatsOpt { /// Gather statistics from all nodes #[structopt(short = "a", long = "all-nodes")] pub all_nodes: bool, - - /// Don't show global cluster stats (internal use in RPC) - #[structopt(skip)] - #[serde(default)] - pub skip_global: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +// --------------------------- +// ---- garage worker ... ---- +// --------------------------- + +#[derive(StructOpt, Debug, Eq, PartialEq, Clone)] pub enum WorkerOperation { /// List all workers on Garage node #[structopt(name = "list", version = garage_version())] @@ -584,7 +764,7 @@ pub enum WorkerOperation { }, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone, Copy)] +#[derive(StructOpt, Debug, Eq, PartialEq, Clone, Copy)] pub struct WorkerListOpt { /// Show only busy workers #[structopt(short = "b", long = "busy")] @@ -594,7 +774,11 @@ pub struct WorkerListOpt { pub errors: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +// -------------------------- +// ---- garage block ... ---- +// -------------------------- + +#[derive(StructOpt, Debug, Eq, PartialEq, Clone)] pub enum BlockOperation { /// List all blocks that currently have a resync error #[structopt(name = "list-errors", version = garage_version())] @@ -626,7 +810,11 @@ pub enum BlockOperation { }, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone, Copy)] +// ------------------------- +// ---- garage meta ... ---- +// ------------------------- + +#[derive(StructOpt, Debug, Eq, PartialEq, Clone, Copy)] pub enum MetaOperation { /// Save a snapshot of the metadata db file #[structopt(name = "snapshot", version = garage_version())] diff --git a/src/garage/cli/util.rs b/src/garage/cli/util.rs deleted file mode 100644 index 21c14f42..00000000 --- a/src/garage/cli/util.rs +++ /dev/null @@ -1,457 +0,0 @@ -use std::collections::HashMap; -use std::time::Duration; - -use format_table::format_table; -use garage_util::background::*; -use garage_util::crdt::*; -use garage_util::data::*; -use garage_util::error::*; -use garage_util::time::*; - -use garage_block::manager::BlockResyncErrorInfo; - -use garage_model::bucket_table::*; -use garage_model::key_table::*; -use garage_model::s3::mpu_table::{self, MultipartUpload}; -use garage_model::s3::object_table; -use garage_model::s3::version_table::*; - -use crate::cli::structs::WorkerListOpt; - -pub fn print_bucket_list(bl: Vec) { - println!("List of buckets:"); - - let mut table = vec![]; - for bucket in bl { - let aliases = bucket - .aliases() - .iter() - .filter(|(_, _, active)| *active) - .map(|(name, _, _)| name.to_string()) - .collect::>(); - let local_aliases_n = match &bucket - .local_aliases() - .iter() - .filter(|(_, _, active)| *active) - .collect::>()[..] - { - [] => "".into(), - [((k, n), _, _)] => format!("{}:{}", k, n), - s => format!("[{} local aliases]", s.len()), - }; - - table.push(format!( - "\t{}\t{}\t{}", - aliases.join(","), - local_aliases_n, - hex::encode(bucket.id), - )); - } - format_table(table); -} - -pub fn print_key_list(kl: Vec<(String, String)>) { - println!("List of keys:"); - let mut table = vec![]; - for key in kl { - table.push(format!("\t{}\t{}", key.0, key.1)); - } - format_table(table); -} - -pub fn print_key_info(key: &Key, relevant_buckets: &HashMap) { - let bucket_global_aliases = |b: &Uuid| { - if let Some(bucket) = relevant_buckets.get(b) { - if let Some(p) = bucket.state.as_option() { - return p - .aliases - .items() - .iter() - .filter(|(_, _, active)| *active) - .map(|(a, _, _)| a.clone()) - .collect::>() - .join(", "); - } - } - - "".to_string() - }; - - match &key.state { - Deletable::Present(p) => { - println!("Key name: {}", p.name.get()); - println!("Key ID: {}", key.key_id); - println!("Secret key: {}", p.secret_key); - println!("Can create buckets: {}", p.allow_create_bucket.get()); - println!("\nKey-specific bucket aliases:"); - let mut table = vec![]; - for (alias_name, _, alias) in p.local_aliases.items().iter() { - if let Some(bucket_id) = alias { - table.push(format!( - "\t{}\t{}\t{}", - alias_name, - bucket_global_aliases(bucket_id), - hex::encode(bucket_id) - )); - } - } - format_table(table); - - println!("\nAuthorized buckets:"); - let mut table = vec![]; - for (bucket_id, perm) in p.authorized_buckets.items().iter() { - if !perm.is_any() { - continue; - } - let rflag = if perm.allow_read { "R" } else { " " }; - let wflag = if perm.allow_write { "W" } else { " " }; - let oflag = if perm.allow_owner { "O" } else { " " }; - let local_aliases = p - .local_aliases - .items() - .iter() - .filter(|(_, _, a)| *a == Some(*bucket_id)) - .map(|(a, _, _)| a.clone()) - .collect::>() - .join(", "); - table.push(format!( - "\t{}{}{}\t{}\t{}\t{:?}", - rflag, - wflag, - oflag, - bucket_global_aliases(bucket_id), - local_aliases, - bucket_id - )); - } - format_table(table); - } - Deletable::Deleted => { - println!("Key {} is deleted.", key.key_id); - } - } -} - -pub fn print_bucket_info( - bucket: &Bucket, - relevant_keys: &HashMap, - counters: &HashMap, - mpu_counters: &HashMap, -) { - let key_name = |k| { - relevant_keys - .get(k) - .map(|k| k.params().unwrap().name.get().as_str()) - .unwrap_or("") - }; - - println!("Bucket: {}", hex::encode(bucket.id)); - match &bucket.state { - Deletable::Deleted => println!("Bucket is deleted."), - Deletable::Present(p) => { - let size = - bytesize::ByteSize::b(*counters.get(object_table::BYTES).unwrap_or(&0) as u64); - println!( - "\nSize: {} ({})", - size.to_string_as(true), - size.to_string_as(false) - ); - println!( - "Objects: {}", - *counters.get(object_table::OBJECTS).unwrap_or(&0) - ); - println!( - "Unfinished uploads (multipart and non-multipart): {}", - *counters.get(object_table::UNFINISHED_UPLOADS).unwrap_or(&0) - ); - println!( - "Unfinished multipart uploads: {}", - *mpu_counters.get(mpu_table::UPLOADS).unwrap_or(&0) - ); - let mpu_size = - bytesize::ByteSize::b(*mpu_counters.get(mpu_table::BYTES).unwrap_or(&0) as u64); - println!( - "Size of unfinished multipart uploads: {} ({})", - mpu_size.to_string_as(true), - mpu_size.to_string_as(false), - ); - - println!("\nWebsite access: {}", p.website_config.get().is_some()); - - let quotas = p.quotas.get(); - if quotas.max_size.is_some() || quotas.max_objects.is_some() { - println!("\nQuotas:"); - if let Some(ms) = quotas.max_size { - let ms = bytesize::ByteSize::b(ms); - println!( - " maximum size: {} ({})", - ms.to_string_as(true), - ms.to_string_as(false) - ); - } - if let Some(mo) = quotas.max_objects { - println!(" maximum number of objects: {}", mo); - } - } - - println!("\nGlobal aliases:"); - for (alias, _, active) in p.aliases.items().iter() { - if *active { - println!(" {}", alias); - } - } - - println!("\nKey-specific aliases:"); - let mut table = vec![]; - for ((key_id, alias), _, active) in p.local_aliases.items().iter() { - if *active { - table.push(format!("\t{} ({})\t{}", key_id, key_name(key_id), alias)); - } - } - format_table(table); - - println!("\nAuthorized keys:"); - let mut table = vec![]; - for (k, perm) in p.authorized_keys.items().iter() { - if !perm.is_any() { - continue; - } - let rflag = if perm.allow_read { "R" } else { " " }; - let wflag = if perm.allow_write { "W" } else { " " }; - let oflag = if perm.allow_owner { "O" } else { " " }; - table.push(format!( - "\t{}{}{}\t{}\t{}", - rflag, - wflag, - oflag, - k, - key_name(k) - )); - } - format_table(table); - } - }; -} - -pub fn find_matching_node( - cand: impl std::iter::Iterator, - pattern: &str, -) -> Result { - let mut candidates = vec![]; - for c in cand { - if hex::encode(c).starts_with(pattern) && !candidates.contains(&c) { - candidates.push(c); - } - } - if candidates.len() != 1 { - Err(Error::Message(format!( - "{} nodes match '{}'", - candidates.len(), - pattern, - ))) - } else { - Ok(candidates[0]) - } -} - -pub fn print_worker_list(wi: HashMap, wlo: WorkerListOpt) { - let mut wi = wi.into_iter().collect::>(); - wi.sort_by_key(|(tid, info)| { - ( - match info.state { - WorkerState::Busy | WorkerState::Throttled(_) => 0, - WorkerState::Idle => 1, - WorkerState::Done => 2, - }, - *tid, - ) - }); - - let mut table = vec!["TID\tState\tName\tTranq\tDone\tQueue\tErrors\tConsec\tLast".to_string()]; - for (tid, info) in wi.iter() { - if wlo.busy && !matches!(info.state, WorkerState::Busy | WorkerState::Throttled(_)) { - continue; - } - if wlo.errors && info.errors == 0 { - continue; - } - - let tf = timeago::Formatter::new(); - let err_ago = info - .last_error - .as_ref() - .map(|(_, t)| tf.convert(Duration::from_millis(now_msec() - t))) - .unwrap_or_default(); - let (total_err, consec_err) = if info.errors > 0 { - (info.errors.to_string(), info.consecutive_errors.to_string()) - } else { - ("-".into(), "-".into()) - }; - - table.push(format!( - "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}", - tid, - info.state, - info.name, - info.status - .tranquility - .as_ref() - .map(ToString::to_string) - .unwrap_or_else(|| "-".into()), - info.status.progress.as_deref().unwrap_or("-"), - info.status - .queue_length - .as_ref() - .map(ToString::to_string) - .unwrap_or_else(|| "-".into()), - total_err, - consec_err, - err_ago, - )); - } - format_table(table); -} - -pub fn print_worker_info(tid: usize, info: WorkerInfo) { - let mut table = vec![]; - table.push(format!("Task id:\t{}", tid)); - table.push(format!("Worker name:\t{}", info.name)); - match info.state { - WorkerState::Throttled(t) => { - table.push(format!( - "Worker state:\tBusy (throttled, paused for {:.3}s)", - t - )); - } - s => { - table.push(format!("Worker state:\t{}", s)); - } - }; - if let Some(tql) = info.status.tranquility { - table.push(format!("Tranquility:\t{}", tql)); - } - - table.push("".into()); - table.push(format!("Total errors:\t{}", info.errors)); - table.push(format!("Consecutive errs:\t{}", info.consecutive_errors)); - if let Some((s, t)) = info.last_error { - table.push(format!("Last error:\t{}", s)); - let tf = timeago::Formatter::new(); - table.push(format!( - "Last error time:\t{}", - tf.convert(Duration::from_millis(now_msec() - t)) - )); - } - - table.push("".into()); - if let Some(p) = info.status.progress { - table.push(format!("Progress:\t{}", p)); - } - if let Some(ql) = info.status.queue_length { - table.push(format!("Queue length:\t{}", ql)); - } - if let Some(pe) = info.status.persistent_errors { - table.push(format!("Persistent errors:\t{}", pe)); - } - - for (i, s) in info.status.freeform.iter().enumerate() { - if i == 0 { - if table.last() != Some(&"".into()) { - table.push("".into()); - } - table.push(format!("Message:\t{}", s)); - } else { - table.push(format!("\t{}", s)); - } - } - format_table(table); -} - -pub fn print_worker_vars(wv: Vec<(Uuid, String, String)>) { - let table = wv - .into_iter() - .map(|(n, k, v)| format!("{:?}\t{}\t{}", n, k, v)) - .collect::>(); - format_table(table); -} - -pub fn print_block_error_list(el: Vec) { - let now = now_msec(); - let tf = timeago::Formatter::new(); - let mut tf2 = timeago::Formatter::new(); - tf2.ago(""); - - let mut table = vec!["Hash\tRC\tErrors\tLast error\tNext try".into()]; - for e in el { - let next_try = if e.next_try > now { - tf2.convert(Duration::from_millis(e.next_try - now)) - } else { - "asap".to_string() - }; - table.push(format!( - "{}\t{}\t{}\t{}\tin {}", - hex::encode(e.hash.as_slice()), - e.refcount, - e.error_count, - tf.convert(Duration::from_millis(now - e.last_try)), - next_try - )); - } - format_table(table); -} - -pub fn print_block_info( - hash: Hash, - refcount: u64, - versions: Vec>, - uploads: Vec, -) { - println!("Block hash: {}", hex::encode(hash.as_slice())); - println!("Refcount: {}", refcount); - println!(); - - let mut table = vec!["Version\tBucket\tKey\tMPU\tDeleted".into()]; - let mut nondeleted_count = 0; - for v in versions.iter() { - match v { - Ok(ver) => { - match &ver.backlink { - VersionBacklink::Object { bucket_id, key } => { - table.push(format!( - "{:?}\t{:?}\t{}\t\t{:?}", - ver.uuid, - bucket_id, - key, - ver.deleted.get() - )); - } - VersionBacklink::MultipartUpload { upload_id } => { - let upload = uploads.iter().find(|x| x.upload_id == *upload_id); - table.push(format!( - "{:?}\t{:?}\t{}\t{:?}\t{:?}", - ver.uuid, - upload.map(|u| u.bucket_id).unwrap_or_default(), - upload.map(|u| u.key.as_str()).unwrap_or_default(), - upload_id, - ver.deleted.get() - )); - } - } - if !ver.deleted.get() { - nondeleted_count += 1; - } - } - Err(vh) => { - table.push(format!("{:?}\t\t\t\tyes", vh)); - } - } - } - format_table(table); - - if refcount != nondeleted_count { - println!(); - println!( - "Warning: refcount does not match number of non-deleted versions, you should try `garage repair block-rc`." - ); - } -} diff --git a/src/garage/main.rs b/src/garage/main.rs index 2703bedd..403ba55c 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -4,12 +4,9 @@ #[macro_use] extern crate tracing; -mod admin; mod cli; -mod repair; mod secrets; mod server; -#[cfg(feature = "telemetry-otlp")] mod tracing_setup; #[cfg(not(any(feature = "bundled-libs", feature = "system-libs")))] @@ -25,6 +22,7 @@ use std::net::SocketAddr; use std::path::PathBuf; use structopt::StructOpt; +use utoipa::OpenApi; use garage_net::util::parse_and_resolve_peer_addr; use garage_net::NetworkKey; @@ -34,10 +32,9 @@ use garage_util::error::*; use garage_rpc::system::*; use garage_rpc::*; -use garage_model::helper::error::Error as HelperError; +use garage_api_admin::api_server::{AdminRpc as ProxyRpc, ADMIN_RPC_PATH as PROXY_RPC_PATH}; -use admin::*; -use cli::*; +use cli::structs::*; use secrets::Secrets; #[derive(StructOpt, Debug)] @@ -46,7 +43,7 @@ use secrets::Secrets; about = "S3-compatible object store for self-hosted geo-distributed deployments" )] struct Opt { - /// Host to connect to for admin operations, in the format: @: + /// Host to connect to for admin operations, in the format: `@:` #[structopt(short = "h", long = "rpc-host", env = "GARAGE_RPC_HOST")] pub rpc_host: Option, @@ -70,24 +67,30 @@ struct Opt { async fn main() { // Initialize version and features info let features = &[ - #[cfg(feature = "k2v")] - "k2v", - #[cfg(feature = "lmdb")] - "lmdb", - #[cfg(feature = "sqlite")] - "sqlite", - #[cfg(feature = "consul-discovery")] - "consul-discovery", - #[cfg(feature = "kubernetes-discovery")] - "kubernetes-discovery", - #[cfg(feature = "metrics")] - "metrics", - #[cfg(feature = "telemetry-otlp")] - "telemetry-otlp", #[cfg(feature = "bundled-libs")] "bundled-libs", + #[cfg(feature = "consul-discovery")] + "consul-discovery", + #[cfg(feature = "fjall")] + "fjall", + #[cfg(feature = "journald")] + "journald", + #[cfg(feature = "k2v")] + "k2v", + #[cfg(feature = "kubernetes-discovery")] + "kubernetes-discovery", + #[cfg(feature = "lmdb")] + "lmdb", + #[cfg(feature = "metrics")] + "metrics", + #[cfg(feature = "sqlite")] + "sqlite", + #[cfg(feature = "syslog")] + "syslog", #[cfg(feature = "system-libs")] "system-libs", + #[cfg(feature = "telemetry-otlp")] + "telemetry-otlp", ][..]; if let Some(git_version) = option_env!("GIT_VERSION") { garage_util::version::init_version(git_version); @@ -145,13 +148,22 @@ async fn main() { let res = match opt.cmd { Command::Server => server::run_server(opt.config_file, opt.secrets).await, Command::OfflineRepair(repair_opt) => { - repair::offline::offline_repair(opt.config_file, opt.secrets, repair_opt).await + cli::local::repair::offline_repair(opt.config_file, opt.secrets, repair_opt).await } Command::ConvertDb(conv_opt) => { - cli::convert_db::do_conversion(conv_opt).map_err(From::from) + cli::local::convert_db::do_conversion(conv_opt).map_err(From::from) } Command::Node(NodeOperation::NodeId(node_id_opt)) => { - node_id_command(opt.config_file, node_id_opt.quiet) + cli::local::init::node_id_command(opt.config_file, node_id_opt.quiet) + } + Command::AdminApiSchema => { + println!( + "{}", + garage_api_admin::openapi::ApiDoc::openapi() + .to_pretty_json() + .unwrap() + ); + Ok(()) } _ => cli_command(opt).await, }; @@ -289,7 +301,7 @@ async fn cli_command(opt: Opt) -> Result<(), Error> { (id, addrs[0], false) } else { let node_id = garage_rpc::system::read_node_id(&config.as_ref().unwrap().metadata_dir) - .err_context(READ_KEY_ERROR)?; + .err_context(cli::local::init::READ_KEY_ERROR)?; if let Some(a) = config.as_ref().and_then(|c| c.rpc_public_addr.as_ref()) { use std::net::ToSocketAddrs; let a = a @@ -318,13 +330,12 @@ async fn cli_command(opt: Opt) -> Result<(), Error> { Err(e).err_context("Unable to connect to destination RPC host. Check that you are using the same value of rpc_secret as them, and that you have their correct full-length node ID (public key).")?; } - let system_rpc_endpoint = netapp.endpoint::(SYSTEM_RPC_PATH.into()); - let admin_rpc_endpoint = netapp.endpoint::(ADMIN_RPC_PATH.into()); + let proxy_rpc_endpoint = netapp.endpoint::(PROXY_RPC_PATH.into()); - match cli_command_dispatch(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await { - Err(HelperError::Internal(i)) => Err(Error::Message(format!("Internal error: {}", i))), - Err(HelperError::BadRequest(b)) => Err(Error::Message(b)), - Err(e) => Err(Error::Message(format!("{}", e))), - Ok(x) => Ok(x), - } + let cli = cli::remote::Cli { + proxy_rpc_endpoint, + rpc_host: id, + }; + + cli.handle(opt.cmd).await } diff --git a/src/garage/repair/mod.rs b/src/garage/repair/mod.rs deleted file mode 100644 index 4699ace5..00000000 --- a/src/garage/repair/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod offline; -pub mod online; diff --git a/src/garage/server.rs b/src/garage/server.rs index b81ae334..f9cd5631 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -14,10 +14,8 @@ use garage_web::WebServer; #[cfg(feature = "k2v")] use garage_api_k2v::api_server::K2VApiServer; -use crate::admin::*; use crate::secrets::{fill_secrets, Secrets}; -#[cfg(feature = "telemetry-otlp")] -use crate::tracing_setup::*; +use crate::tracing_setup::init_tracing; async fn wait_from(mut chan: watch::Receiver) { while !*chan.borrow() { @@ -53,19 +51,15 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er info!("Spawning Garage workers..."); garage.spawn_workers(&background)?; - if config.admin.trace_sink.is_some() { + if let Some(admin_trace_sink) = &config.admin.trace_sink { info!("Initialize tracing..."); - - #[cfg(feature = "telemetry-otlp")] - init_tracing(config.admin.trace_sink.as_ref().unwrap(), garage.system.id)?; - - #[cfg(not(feature = "telemetry-otlp"))] - error!("Garage was built without OTLP exporter, admin.trace_sink is ignored."); + init_tracing(admin_trace_sink, garage.system.id)?; } info!("Initialize Admin API server and metrics collector..."); let admin_server = AdminApiServer::new( garage.clone(), + background.clone(), #[cfg(feature = "metrics")] metrics_exporter, ); @@ -73,9 +67,6 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er info!("Launching internal Garage cluster communications..."); let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone())); - info!("Create admin RPC handler..."); - AdminRpcHandler::new(garage.clone(), background.clone()); - // ---- Launch public-facing API servers ---- let mut servers = vec![]; @@ -93,7 +84,7 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er )); } - if config.k2v_api.is_some() { + if let Some(k2v_api) = &config.k2v_api { #[cfg(feature = "k2v")] { info!("Initializing K2V API server..."); @@ -101,7 +92,7 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er "K2V API", tokio::spawn(K2VApiServer::run( garage.clone(), - config.k2v_api.as_ref().unwrap().api_bind_addr.clone(), + k2v_api.api_bind_addr.clone(), config.s3_api.s3_region.clone(), watch_cancel.clone(), )), @@ -113,7 +104,7 @@ pub async fn run_server(config_file: PathBuf, secrets: Secrets) -> Result<(), Er if let Some(web_config) = &config.s3_web { info!("Initializing web server..."); - let web_server = WebServer::new(garage.clone(), &web_config); + let web_server = WebServer::new(garage.clone(), web_config); servers.push(( "Web", tokio::spawn(web_server.run(web_config.bind_addr.clone(), watch_cancel.clone())), diff --git a/src/garage/tests/common/client.rs b/src/garage/tests/common/client.rs index 7a6612cb..164888a4 100644 --- a/src/garage/tests/common/client.rs +++ b/src/garage/tests/common/client.rs @@ -12,7 +12,7 @@ pub fn build_client(key: &Key) -> Client { .endpoint_url(format!("http://127.0.0.1:{}", DEFAULT_PORT)) .region(super::REGION) .credentials_provider(credentials) - .behavior_version(BehaviorVersion::v2024_03_28()) + .behavior_version(BehaviorVersion::latest()) .build(); Client::from_conf(config) diff --git a/src/garage/tests/common/custom_requester.rs b/src/garage/tests/common/custom_requester.rs index 6a8eed38..ee78ad2d 100644 --- a/src/garage/tests/common/custom_requester.rs +++ b/src/garage/tests/common/custom_requester.rs @@ -244,7 +244,7 @@ impl<'a> RequestBuilder<'a> { ); all_headers.insert( HeaderName::from_static("x-amz-trailer"), - HeaderValue::from_str(&trailer_algorithm).unwrap(), + HeaderValue::from_str(trailer_algorithm).unwrap(), ); all_headers.insert( @@ -252,8 +252,8 @@ impl<'a> RequestBuilder<'a> { to_streaming_unsigned_trailer_body( &self.body, *chunk_size, - &trailer_algorithm, - &trailer_value, + trailer_algorithm, + trailer_value, ) .len() .to_string() @@ -330,8 +330,8 @@ impl<'a> RequestBuilder<'a> { } => to_streaming_unsigned_trailer_body( &self.body, *chunk_size, - &trailer_algorithm, - &trailer_value, + trailer_algorithm, + trailer_value, ), _ => self.body.clone(), }; diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index 2b0a381c..45c62d40 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -1,7 +1,8 @@ -use std::mem::MaybeUninit; use std::path::{Path, PathBuf}; use std::process; -use std::sync::Once; +use std::sync::{Mutex, OnceLock}; + +use serde_json::json; use super::ext::*; @@ -18,7 +19,7 @@ pub struct Key { } pub struct Instance { - process: process::Child, + process: Mutex, pub path: PathBuf, pub default_key: Key, pub s3_port: u16, @@ -109,7 +110,7 @@ api_bind_addr = "127.0.0.1:{admin_port}" .expect("Could not start garage"); Instance { - process: child, + process: Mutex::new(child), path, default_key: Key::default(), s3_port: port, @@ -160,9 +161,11 @@ api_bind_addr = "127.0.0.1:{admin_port}" .expect_success_status("Could not apply garage node layout"); } - fn terminate(&mut self) { + fn terminate(&self) { // TODO: Terminate "gracefully" the process with SIGTERM instead of directly SIGKILL it. self.process + .lock() + .expect("could not lock access to garage child process mutex") .kill() .expect("Could not terminate garage process"); } @@ -195,57 +198,37 @@ api_bind_addr = "127.0.0.1:{admin_port}" let mut key = Key::default(); let mut cmd = self.command(); - let base = cmd.args(["key", "create"]); + let base = cmd.args(["json-api", "CreateKey"]); let with_name = match maybe_name { - Some(name) => base.args([name]), - None => base, + Some(name) => base.args([serde_json::to_string(&json!({"name": name})).unwrap()]), + None => base.args(["{}"]), }; let output = with_name.expect_success_output("Could not create key"); - let stdout = String::from_utf8(output.stdout).unwrap(); + let stdout: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); - for line in stdout.lines() { - if let Some(key_id) = line.strip_prefix("Key ID: ") { - key.id = key_id.to_owned(); - continue; - } - if let Some(key_secret) = line.strip_prefix("Secret key: ") { - key.secret = key_secret.to_owned(); - continue; - } - } - assert!(!key.id.is_empty(), "Invalid key: Key ID is empty"); - assert!(!key.secret.is_empty(), "Invalid key: Key secret is empty"); + key.id = stdout["accessKeyId"].as_str().unwrap().to_string(); + key.secret = stdout["secretAccessKey"].as_str().unwrap().to_string(); key } } -static mut INSTANCE: MaybeUninit = MaybeUninit::uninit(); -static INSTANCE_INIT: Once = Once::new(); +static INSTANCE: OnceLock = OnceLock::new(); #[static_init::destructor] extern "C" fn terminate_instance() { - if INSTANCE_INIT.is_completed() { - // This block is sound as it depends on `INSTANCE_INIT` being completed, meaning `INSTANCE` - // is actually initialized. - unsafe { - INSTANCE.assume_init_mut().terminate(); - } + if let Some(instance) = INSTANCE.get() { + instance.terminate(); } } pub fn instance() -> &'static Instance { - INSTANCE_INIT.call_once(|| unsafe { + INSTANCE.get_or_init(|| { let mut instance = Instance::new(); instance.setup(); - - INSTANCE.write(instance); - }); - - // This block is sound as it depends on `INSTANCE_INIT` being completed by calling `call_once` (blocking), - // meaning `INSTANCE` is actually initialized. - unsafe { INSTANCE.assume_init_ref() } + instance + }) } pub fn command(config_path: &Path) -> process::Command { diff --git a/src/garage/tests/s3/multipart.rs b/src/garage/tests/s3/multipart.rs index cc424f59..cd1c754a 100644 --- a/src/garage/tests/s3/multipart.rs +++ b/src/garage/tests/s3/multipart.rs @@ -314,7 +314,7 @@ async fn test_multipart_with_checksum() { .build(); let expected_checksum = calculate_sha1( - &vec![ + &[ BASE64_STANDARD.decode(&ck1).unwrap(), BASE64_STANDARD.decode(&ck2).unwrap(), BASE64_STANDARD.decode(&ck3).unwrap(), diff --git a/src/garage/tests/s3/ssec.rs b/src/garage/tests/s3/ssec.rs index d8f11950..02c9b14a 100644 --- a/src/garage/tests/s3/ssec.rs +++ b/src/garage/tests/s3/ssec.rs @@ -19,7 +19,7 @@ async fn test_ssec_object() { .map(|x| ((x * 3792) % 256) as u8) .collect::>(); - for data in vec![bytes1, bytes2] { + for data in [bytes1, bytes2] { let stream = ByteStream::new(data.clone().into()); // Write encrypted object @@ -399,6 +399,7 @@ async fn test_multipart_upload() { } } +#[expect(clippy::too_many_arguments)] async fn test_read_encrypted( ctx: &Context, bucket: &str, diff --git a/src/garage/tests/s3/streaming_signature.rs b/src/garage/tests/s3/streaming_signature.rs index a86feefc..abd4f1ec 100644 --- a/src/garage/tests/s3/streaming_signature.rs +++ b/src/garage/tests/s3/streaming_signature.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use base64::prelude::*; -use crc32fast::Hasher as Crc32; +use crc_fast::{checksum as crc_checksum, CrcAlgorithm}; use crate::common; use crate::common::ext::CommandExt; @@ -69,9 +69,8 @@ async fn test_putobject_streaming() { { let etag = "\"46cf18a9b447991b450cad3facf5937e\""; - let mut crc32 = Crc32::new(); - crc32.update(&BODY[..]); - let crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(crc32.finalize())[..]); + let crc32 = crc_checksum(CrcAlgorithm::Crc32IsoHdlc, &BODY[..]) as u32; + let crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(crc32)[..]); let mut headers = HashMap::new(); headers.insert("x-amz-checksum-crc32".to_owned(), crc32.clone()); @@ -129,7 +128,8 @@ async fn test_putobject_streaming_unsigned_trailer() { let mut headers = HashMap::new(); headers.insert("content-type".to_owned(), content_type.to_owned()); - let empty_crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(Crc32::new().finalize())[..]); + let empty_crc32 = crc_checksum(CrcAlgorithm::Crc32IsoHdlc, &[]) as u32; + let empty_crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(empty_crc32)[..]); let res = ctx .custom_request @@ -180,9 +180,8 @@ async fn test_putobject_streaming_unsigned_trailer() { { let etag = "\"46cf18a9b447991b450cad3facf5937e\""; - let mut crc32 = Crc32::new(); - crc32.update(&BODY[..]); - let crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(crc32.finalize())[..]); + let crc32 = crc_checksum(CrcAlgorithm::Crc32IsoHdlc, &BODY[..]) as u32; + let crc32 = BASE64_STANDARD.encode(&u32::to_be_bytes(crc32)[..]); // try sending with wrong crc32, check that it fails let err_res = ctx diff --git a/src/garage/tests/s3/website.rs b/src/garage/tests/s3/website.rs index bbac3de5..d31d9a77 100644 --- a/src/garage/tests/s3/website.rs +++ b/src/garage/tests/s3/website.rs @@ -5,7 +5,10 @@ use crate::json_body; use assert_json_diff::assert_json_eq; use aws_sdk_s3::{ primitives::ByteStream, - types::{CorsConfiguration, CorsRule, ErrorDocument, IndexDocument, WebsiteConfiguration}, + types::{ + Condition, CorsConfiguration, CorsRule, ErrorDocument, IndexDocument, Protocol, Redirect, + RoutingRule, WebsiteConfiguration, + }, }; use http::{Request, StatusCode}; use http_body_util::BodyExt; @@ -455,12 +458,18 @@ async fn test_website_check_domain() { res_body, json!({ "code": "InvalidRequest", - "message": "Bad request: No domain query string found", + "message": "Bad request: Missing argument `domain` for endpoint", "region": "garage-integ-test", "path": "/check", }) ); + // FIXME: Edge case with empty domain + // Currently, empty domain is interpreted as an absent parameter + // due to logic in router_macros.rs, so this test fails. + // Maybe we want empty parameters to be acceptable? But that might + // break a lot of S3 stuff. + /* let admin_req = || { Request::builder() .method("GET") @@ -484,6 +493,7 @@ async fn test_website_check_domain() { "path": "/check", }) ); + */ let admin_req = || { Request::builder() @@ -534,6 +544,491 @@ async fn test_website_check_domain() { ); } +#[tokio::test] +async fn test_website_redirect_full_bucket() { + const BCKT_NAME: &str = "my-redirect-full"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + let conf = WebsiteConfiguration::builder() + .routing_rules( + RoutingRule::builder() + .condition(Condition::builder().key_prefix_equals("").build()) + .redirect( + Redirect::builder() + .protocol(Protocol::Https) + .host_name("other.tld") + .replace_key_prefix_with("") + .build(), + ) + .build(), + ) + .build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf) + .send() + .await + .unwrap(); + + { + let req = Request::builder() + .method("GET") + .uri(format!("http://127.0.0.1:{}/my-path", ctx.garage.web_port)) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap(); + + let client = Client::builder(TokioExecutor::new()).build_http(); + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(hyper::header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "https://other.tld/my-path" + ); + } + + { + let req = Request::builder() + .method("GET") + .uri(format!("http://127.0.0.1:{}/my-path/", ctx.garage.web_port)) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap(); + + let client = Client::builder(TokioExecutor::new()).build_http(); + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(hyper::header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "https://other.tld/my-path/" + ); + } + + { + let req = Request::builder() + .method("GET") + .uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port)) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap(); + + let client = Client::builder(TokioExecutor::new()).build_http(); + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(hyper::header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "https://other.tld/" + ); + } +} + +#[tokio::test] +async fn test_website_redirect() { + const BCKT_NAME: &str = "my-redirect"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + ctx.client + .put_object() + .bucket(&bucket) + .key("index.html") + .body(ByteStream::from_static(b"index")) + .send() + .await + .unwrap(); + ctx.client + .put_object() + .bucket(&bucket) + .key("404.html") + .body(ByteStream::from_static(b"main 404")) + .send() + .await + .unwrap(); + ctx.client + .put_object() + .bucket(&bucket) + .key("static-file") + .body(ByteStream::from_static(b"static file")) + .send() + .await + .unwrap(); + + let mut conf = WebsiteConfiguration::builder() + .index_document( + IndexDocument::builder() + .suffix("home.html") + .build() + .unwrap(), + ) + .error_document(ErrorDocument::builder().key("404.html").build().unwrap()); + + for (prefix, condition) in [("unconditional", false), ("conditional", true)] { + let code = condition.then(|| "404".to_string()); + conf = conf + // simple redirect + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-prefix/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("302") + .replace_key_prefix_with("other-prefix/") + .build(), + ) + .build(), + ) + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-prefix-307/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("307") + .replace_key_prefix_with("other-prefix/") + .build(), + ) + .build(), + ) + // simple redirect + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-fixed/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("302") + .replace_key_with("fixed_key") + .build(), + ) + .build(), + ) + // stream other file + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-fixed/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("200") + .replace_key_with("static-file") + .build(), + ) + .build(), + ) + // stream other file as error + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-404/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("404") + .replace_key_with("static-file") + .build(), + ) + .build(), + ) + // fail to stream other file + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-missing/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("200") + .replace_key_with("missing-file") + .build(), + ) + .build(), + ); + } + let conf = conf.build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf.clone()) + .send() + .await + .unwrap(); + + let stored_cfg = ctx + .client + .get_bucket_website() + .bucket(&bucket) + .send() + .await + .unwrap(); + assert_eq!(stored_cfg.index_document, conf.index_document); + assert_eq!(stored_cfg.error_document, conf.error_document); + assert_eq!(stored_cfg.routing_rules, conf.routing_rules); + + let req = |path| { + Request::builder() + .method("GET") + .uri(format!( + "http://127.0.0.1:{}/{}/path", + ctx.garage.web_port, path + )) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap() + }; + + test_redirect_helper("unconditional", true, &req).await; + test_redirect_helper("conditional", true, &req).await; + for prefix in ["unconditional", "conditional"] { + for rule_path in [ + "redirect-prefix", + "redirect-prefix-307", + "redirect-fixed", + "stream-fixed", + "stream-404", + "stream-missing", + ] { + ctx.client + .put_object() + .bucket(&bucket) + .key(format!("{prefix}/{rule_path}/path")) + .body(ByteStream::from_static(b"i exist")) + .send() + .await + .unwrap(); + } + } + test_redirect_helper("unconditional", true, &req).await; + test_redirect_helper("conditional", false, &req).await; +} + +async fn test_redirect_helper( + prefix: &str, + should_see_redirect: bool, + req: impl Fn(String) -> Request>, +) { + use http::header; + let client = Client::builder(TokioExecutor::new()).build_http(); + let expected_body = b"i exist".as_ref(); + + let resp = client + .request(req(format!("{prefix}/redirect-prefix"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/other-prefix/path" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + + let resp = client + .request(req(format!("{prefix}/redirect-prefix-307"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::TEMPORARY_REDIRECT); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/other-prefix/path" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + + let resp = client + .request(req(format!("{prefix}/redirect-fixed"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/fixed_key" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-fixed"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-404"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-404"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } +} + +#[tokio::test] +async fn test_website_invalid_redirect() { + const BCKT_NAME: &str = "my-invalid-redirect"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + let conf = WebsiteConfiguration::builder() + .routing_rules( + RoutingRule::builder() + .condition(Condition::builder().key_prefix_equals("").build()) + .redirect( + Redirect::builder() + .protocol(Protocol::Https) + .host_name("other.tld") + .replace_key_prefix_with("") + // we don't allow 200 with hostname + .http_redirect_code("200") + .build(), + ) + .build(), + ) + .build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf) + .send() + .await + .unwrap_err(); +} + #[tokio::test] async fn test_website_puny() { const BCKT_NAME: &str = "xn--pda.eu"; diff --git a/src/garage/tracing_setup.rs b/src/garage/tracing_setup.rs index 55fc4094..d5568c82 100644 --- a/src/garage/tracing_setup.rs +++ b/src/garage/tracing_setup.rs @@ -1,37 +1,53 @@ -use std::time::Duration; +pub use telemetry::init_tracing; -use opentelemetry::sdk::{ - trace::{self, IdGenerator, Sampler}, - Resource, -}; -use opentelemetry::KeyValue; -use opentelemetry_otlp::WithExportConfig; +#[cfg(not(feature = "telemetry-otlp"))] +mod telemetry { + use garage_util::data::Uuid; + use garage_util::error::Error; -use garage_util::data::*; -use garage_util::error::*; - -pub fn init_tracing(export_to: &str, node_id: Uuid) -> Result<(), Error> { - let node_id = hex::encode(&node_id.as_slice()[..8]); - - opentelemetry_otlp::new_pipeline() - .tracing() - .with_exporter( - opentelemetry_otlp::new_exporter() - .tonic() - .with_endpoint(export_to) - .with_timeout(Duration::from_secs(3)), - ) - .with_trace_config( - trace::config() - .with_id_generator(IdGenerator::default()) - .with_sampler(Sampler::AlwaysOn) - .with_resource(Resource::new(vec![ - KeyValue::new("service.name", "garage"), - KeyValue::new("service.instance.id", node_id), - ])), - ) - .install_batch(opentelemetry::runtime::Tokio) - .ok_or_message("Unable to initialize tracing")?; - - Ok(()) + pub fn init_tracing(_: &str, _: Uuid) -> Result<(), Error> { + error!("Garage was built without OTLP exporter, admin.trace_sink is ignored."); + Ok(()) + } +} + +#[cfg(feature = "telemetry-otlp")] +mod telemetry { + use std::time::Duration; + + use opentelemetry::sdk::{ + trace::{self, IdGenerator, Sampler}, + Resource, + }; + use opentelemetry::KeyValue; + use opentelemetry_otlp::WithExportConfig; + + use garage_util::data::*; + use garage_util::error::*; + + pub fn init_tracing(export_to: &str, node_id: Uuid) -> Result<(), Error> { + let node_id = hex::encode(&node_id.as_slice()[..8]); + + opentelemetry_otlp::new_pipeline() + .tracing() + .with_exporter( + opentelemetry_otlp::new_exporter() + .tonic() + .with_endpoint(export_to) + .with_timeout(Duration::from_secs(3)), + ) + .with_trace_config( + trace::config() + .with_id_generator(IdGenerator::default()) + .with_sampler(Sampler::AlwaysOn) + .with_resource(Resource::new(vec![ + KeyValue::new("service.name", "garage"), + KeyValue::new("service.instance.id", node_id), + ])), + ) + .install_batch(opentelemetry::runtime::Tokio) + .ok_or_message("Unable to initialize tracing")?; + + Ok(()) + } } diff --git a/src/k2v-client/Cargo.toml b/src/k2v-client/Cargo.toml index bbd09b19..ccbc7aa3 100644 --- a/src/k2v-client/Cargo.toml +++ b/src/k2v-client/Cargo.toml @@ -1,7 +1,10 @@ [package] name = "k2v-client" version = "0.0.4" -authors = ["Trinity Pointard ", "Alex Auvolat "] +authors = [ + "Trinity Pointard ", + "Alex Auvolat ", +] edition = "2018" license = "AGPL-3.0" description = "Client library for the Garage K2V protocol" diff --git a/src/k2v-client/lib.rs b/src/k2v-client/lib.rs index fe8fd3e0..d521422a 100644 --- a/src/k2v-client/lib.rs +++ b/src/k2v-client/lib.rs @@ -371,7 +371,7 @@ impl K2vClient { use sha2::{Digest, Sha256}; let mut hasher = Sha256::new(); hasher.update(req.body()); - let hash = hex::encode(&hasher.finalize()); + let hash = hex::encode(hasher.finalize()); req.headers_mut() .insert(AMZ_CONTENT_SHA256, hash.try_into().unwrap()); @@ -647,15 +647,15 @@ struct PollRangeResponse { } impl<'a> Filter<'a> { - fn query_params(&self) -> Vec<(&'static str, std::borrow::Cow)> { + fn query_params(&self) -> Vec<(&'static str, std::borrow::Cow<'_, str>)> { let mut res = Vec::<(&'static str, std::borrow::Cow)>::with_capacity(8); - if let Some(start) = self.start.as_deref() { + if let Some(start) = self.start { res.push(("start", start.into())); } - if let Some(end) = self.end.as_deref() { + if let Some(end) = self.end { res.push(("end", end.into())); } - if let Some(prefix) = self.prefix.as_deref() { + if let Some(prefix) = self.prefix { res.push(("prefix", prefix.into())); } if let Some(limit) = &self.limit { diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 289c0024..0f8d040c 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_model" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -21,6 +21,7 @@ garage_block.workspace = true garage_util.workspace = true garage_net.workspace = true +argon2.workspace = true async-trait.workspace = true blake2.workspace = true chrono.workspace = true @@ -40,8 +41,8 @@ futures.workspace = true tokio.workspace = true [features] -default = [ "lmdb", "sqlite" ] -k2v = [ "garage_util/k2v" ] -lmdb = [ "garage_db/lmdb" ] -sqlite = [ "garage_db/sqlite" ] -fjall = [ "garage_db/fjall" ] +default = ["lmdb", "sqlite"] +k2v = ["garage_util/k2v"] +lmdb = ["garage_db/lmdb"] +sqlite = ["garage_db/sqlite"] +fjall = ["garage_db/fjall"] diff --git a/src/model/admin_token_table.rs b/src/model/admin_token_table.rs new file mode 100644 index 00000000..4946842b --- /dev/null +++ b/src/model/admin_token_table.rs @@ -0,0 +1,191 @@ +use base64::prelude::*; + +use garage_util::crdt::{self, Crdt}; +use garage_util::time::now_msec; + +use garage_table::{EmptyKey, Entry, TableSchema}; + +pub use crate::key_table::KeyFilter; + +mod v2 { + use garage_util::crdt; + use serde::{Deserialize, Serialize}; + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct AdminApiToken { + /// An admin API token is a bearer token of the following form: + /// `.` + /// Only the prefix is saved here, it is used as an identifier. + /// The entire API token is hashed and saved in `token_hash` in `state`. + pub prefix: String, + + /// If the token is not deleted, its parameters + pub state: crdt::Deletable, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct AdminApiTokenParams { + /// Creation date + pub created: u64, + + /// The entire API token hashed as a password + pub token_hash: String, + + /// User-defined name + pub name: crdt::Lww, + + /// The optional time of expiration of the token + pub expiration: crdt::Lww>, + + /// The scope of the token, i.e. list of authorized admin API calls + pub scope: crdt::Lww, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct AdminApiTokenScope(pub Vec); + + impl garage_util::migrate::InitialFormat for AdminApiToken { + const VERSION_MARKER: &'static [u8] = b"G2admtok"; + } +} + +pub use v2::*; + +impl Crdt for AdminApiTokenParams { + fn merge(&mut self, o: &Self) { + self.name.merge(&o.name); + self.expiration.merge(&o.expiration); + self.scope.merge(&o.scope); + } +} + +impl Crdt for AdminApiToken { + fn merge(&mut self, other: &Self) { + self.state.merge(&other.state); + } +} + +impl Crdt for AdminApiTokenScope { + fn merge(&mut self, other: &Self) { + self.0.retain(|x| other.0.contains(x)); + } +} + +impl AdminApiToken { + /// Create a new admin API token. + /// Returns the AdminApiToken object, which contains the hashed bearer token, + /// as well as the plaintext bearer token. + pub fn new(name: &str) -> (Self, String) { + use argon2::{ + password_hash::{rand_core::OsRng, PasswordHasher, SaltString}, + Argon2, + }; + + let prefix = hex::encode(&rand::random::<[u8; 12]>()[..]); + let secret = BASE64_URL_SAFE_NO_PAD.encode(&rand::random::<[u8; 32]>()[..]); + let token = format!("{}.{}", prefix, secret); + + let salt = SaltString::generate(&mut OsRng); + let argon2 = Argon2::default(); + let hashed_token = argon2 + .hash_password(token.as_bytes(), &salt) + .expect("could not hash admin API token") + .to_string(); + + let ret = AdminApiToken { + prefix, + state: crdt::Deletable::present(AdminApiTokenParams { + created: now_msec(), + token_hash: hashed_token, + name: crdt::Lww::new(name.to_string()), + expiration: crdt::Lww::new(None), + scope: crdt::Lww::new(AdminApiTokenScope(vec!["*".to_string()])), + }), + }; + + (ret, token) + } + + pub fn delete(prefix: String) -> Self { + Self { + prefix, + state: crdt::Deletable::Deleted, + } + } + + /// Returns true if this represents a deleted admin token + pub fn is_deleted(&self) -> bool { + self.state.is_deleted() + } + + /// Returns an option representing the params (None if in deleted state) + pub fn params(&self) -> Option<&AdminApiTokenParams> { + self.state.as_option() + } + + /// Mutable version of `.state()` + pub fn params_mut(&mut self) -> Option<&mut AdminApiTokenParams> { + self.state.as_option_mut() + } + + /// Scope, if not deleted, or empty slice + pub fn scope(&self) -> &[String] { + self.state + .as_option() + .map(|x| &x.scope.get().0[..]) + .unwrap_or_default() + } +} + +impl AdminApiTokenParams { + pub fn is_expired(&self, ts_now: u64) -> bool { + match *self.expiration.get() { + None => false, + Some(exp) => ts_now >= exp, + } + } + + pub fn has_scope(&self, endpoint: &str) -> bool { + self.scope.get().0.iter().any(|x| x == "*" || x == endpoint) + } +} + +impl Entry for AdminApiToken { + fn partition_key(&self) -> &EmptyKey { + &EmptyKey + } + fn sort_key(&self) -> &String { + &self.prefix + } + fn is_tombstone(&self) -> bool { + self.is_deleted() + } +} + +pub struct AdminApiTokenTable; + +impl TableSchema for AdminApiTokenTable { + const TABLE_NAME: &'static str = "admin_token"; + + type P = EmptyKey; + type S = String; + type E = AdminApiToken; + type Filter = KeyFilter; + type Precondition = (); + + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { + match filter { + KeyFilter::Deleted(df) => df.apply(entry.state.is_deleted()), + KeyFilter::MatchesAndNotDeleted(pat) => { + let pat = pat.to_lowercase(); + entry + .params() + .map(|p| { + entry.prefix.to_lowercase().starts_with(&pat) + || p.name.get().to_lowercase() == pat + }) + .unwrap_or(false) + } + } + } +} diff --git a/src/model/bucket_alias_table.rs b/src/model/bucket_alias_table.rs index 276d0d1c..30ea593c 100644 --- a/src/model/bucket_alias_table.rs +++ b/src/model/bucket_alias_table.rs @@ -61,6 +61,7 @@ impl TableSchema for BucketAliasTable { type S = String; type E = BucketAlias; type Filter = DeletedFilter; + type Precondition = (); fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { filter.apply(entry.is_deleted()) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index f1cc032e..7809f5da 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -119,7 +119,122 @@ mod v08 { impl garage_util::migrate::InitialFormat for Bucket {} } -pub use v08::*; +mod v2 { + use crate::permission::BucketKeyPerm; + use garage_util::crdt; + use garage_util::data::Uuid; + use serde::{Deserialize, Serialize}; + + use super::v08; + + pub use v08::{BucketQuotas, CorsRule, LifecycleExpiration, LifecycleFilter, LifecycleRule}; + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Bucket { + /// ID of the bucket + pub id: Uuid, + /// State, and configuration if not deleted, of the bucket + pub state: crdt::Deletable, + } + + /// Configuration for a bucket + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct BucketParams { + /// Bucket's creation date + pub creation_date: u64, + /// Map of key with access to the bucket, and what kind of access they give + pub authorized_keys: crdt::Map, + + /// Map of aliases that are or have been given to this bucket + /// in the global namespace + /// (not authoritative: this is just used as an indication to + /// map back to aliases when doing ListBuckets) + pub aliases: crdt::LwwMap, + /// Map of aliases that are or have been given to this bucket + /// in namespaces local to keys + /// key = (access key id, alias name) + pub local_aliases: crdt::LwwMap<(String, String), bool>, + + /// Whether this bucket is allowed for website access + /// (under all of its global alias names), + /// and if so, the website configuration XML document + pub website_config: crdt::Lww>, + /// CORS rules + pub cors_config: crdt::Lww>>, + /// Lifecycle configuration + pub lifecycle_config: crdt::Lww>>, + /// Bucket quotas + pub quotas: crdt::Lww, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct WebsiteConfig { + pub index_document: String, + pub error_document: Option, + // this field is currently unused, but present so adding it in the future doesn't + // need a new migration + pub redirect_all: Option, + pub routing_rules: Vec, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RedirectAll { + pub hostname: String, + pub protocol: String, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RoutingRule { + pub condition: Option, + pub redirect: Redirect, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RedirectCondition { + pub http_error_code: Option, + pub prefix: Option, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Redirect { + pub hostname: Option, + pub http_redirect_code: u16, + pub protocol: Option, + pub replace_key_prefix: Option, + pub replace_key: Option, + } + + impl garage_util::migrate::Migrate for Bucket { + const VERSION_MARKER: &'static [u8] = b"G2bkt"; + + type Previous = v08::Bucket; + + fn migrate(old: v08::Bucket) -> Bucket { + Bucket { + id: old.id, + state: old.state.map(|x| BucketParams { + creation_date: x.creation_date, + authorized_keys: x.authorized_keys, + aliases: x.aliases, + local_aliases: x.local_aliases, + website_config: x.website_config.map(|wc_opt| { + wc_opt.map(|wc| WebsiteConfig { + index_document: wc.index_document, + error_document: wc.error_document, + redirect_all: None, + routing_rules: vec![], + }) + }), + cors_config: x.cors_config, + lifecycle_config: x.lifecycle_config, + quotas: x.quotas, + }), + } + } + } +} + +pub use v2::*; impl AutoCrdt for BucketQuotas { const WARN_IF_DIFFERENT: bool = true; @@ -256,6 +371,7 @@ impl TableSchema for BucketTable { type S = Uuid; type E = Bucket; type Filter = DeletedFilter; + type Precondition = (); fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { filter.apply(entry.is_deleted()) diff --git a/src/model/garage.rs b/src/model/garage.rs index f4f6f693..4ba11bc5 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -24,6 +24,7 @@ use crate::s3::mpu_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; +use crate::admin_token_table::*; use crate::bucket_alias_table::*; use crate::bucket_table::*; use crate::helper; @@ -50,6 +51,8 @@ pub struct Garage { /// The block manager pub block_manager: Arc, + /// Table containing admin API keys + pub admin_token_table: Arc>, /// Table containing buckets pub bucket_table: Arc>, /// Table containing bucket aliases @@ -147,29 +150,30 @@ impl Garage { info!("Initialize membership management system..."); let system = System::new(network_key, replication_factor, consistency_mode, &config)?; - let data_rep_param = TableShardedReplication { - system: system.clone(), - replication_factor: replication_factor.into(), - write_quorum: replication_factor.write_quorum(consistency_mode), - read_quorum: 1, - }; - let meta_rep_param = TableShardedReplication { - system: system.clone(), - replication_factor: replication_factor.into(), - write_quorum: replication_factor.write_quorum(consistency_mode), - read_quorum: replication_factor.read_quorum(consistency_mode), + layout_manager: system.layout_manager.clone(), + consistency_mode, }; let control_rep_param = TableFullReplication { system: system.clone(), + consistency_mode, }; info!("Initialize block manager..."); - let block_manager = BlockManager::new(&db, &config, data_rep_param, system.clone())?; + let block_write_quorum = replication_factor.write_quorum(consistency_mode); + let block_manager = BlockManager::new(&db, &config, block_write_quorum, system.clone())?; block_manager.register_bg_vars(&mut bg_vars); // ---- admin tables ---- + info!("Initialize admin_token_table..."); + let admin_token_table = Table::new( + AdminApiTokenTable, + control_rep_param.clone(), + system.clone(), + &db, + ); + info!("Initialize bucket_table..."); let bucket_table = Table::new(BucketTable, control_rep_param.clone(), system.clone(), &db); @@ -259,6 +263,7 @@ impl Garage { db, system, block_manager, + admin_token_table, bucket_table, bucket_alias_table, key_table, @@ -278,6 +283,7 @@ impl Garage { pub fn spawn_workers(self: &Arc, bg: &BackgroundRunner) -> Result<(), Error> { self.block_manager.spawn_workers(bg); + self.admin_token_table.spawn_workers(bg); self.bucket_table.spawn_workers(bg); self.bucket_alias_table.spawn_workers(bg); self.key_table.spawn_workers(bg); diff --git a/src/model/helper/bucket.rs b/src/model/helper/bucket.rs index e5506d7e..aab13bac 100644 --- a/src/model/helper/bucket.rs +++ b/src/model/helper/bucket.rs @@ -1,7 +1,7 @@ use std::time::Duration; use garage_util::data::*; -use garage_util::error::OkOrMessage; +use garage_util::error::{Error as GarageError, OkOrMessage}; use garage_util::time::*; use garage_table::util::*; @@ -16,104 +16,172 @@ pub struct BucketHelper<'a>(pub(crate) &'a Garage); #[allow(clippy::ptr_arg)] impl<'a> BucketHelper<'a> { - pub async fn resolve_global_bucket_name( + // ================ + // Local functions to find buckets FAST. + // This is only for the fast path in API requests. + // They do not provide the read-after-write guarantee + // when used in conjunction with other operations that + // modify buckets and bucket aliases. + // ================ + + /// Return bucket corresponding to global bucket name, if it exists + /// (and is not a tombstone entry). + /// + /// The name can be of two forms: + /// 1. A global bucket alias + /// 2. The full ID of a bucket encoded in hex + /// + /// Note that there is no possible ambiguity between the two forms, + /// as the maximum length of a bucket name is 63 characters, and the full + /// hex id is 64 chars long. + /// + /// This will not do any network interaction to check the alias and + /// bucket tables, it will only check the local copy of the table. + /// As a consequence, it does not provide read-after-write guarantees. + pub fn resolve_global_bucket_fast( &self, bucket_name: &String, - ) -> Result, Error> { - // Bucket names in Garage are aliases, true bucket identifiers - // are 32-byte UUIDs. This function resolves bucket names into - // their full identifier by looking up in the bucket_alias_table. - // This function also allows buckets to be identified by their - // full UUID (hex-encoded). Here, if the name to be resolved is a - // hex string of the correct length, it is directly parsed as a bucket - // identifier which is returned. There is no risk of this conflicting - // with an actual bucket name: bucket names are max 63 chars long by - // the AWS spec, and hex-encoded UUIDs are 64 chars long. + ) -> Result, GarageError> { let hexbucket = hex::decode(bucket_name.as_str()) .ok() .and_then(|by| Uuid::try_from(&by)); - if let Some(bucket_id) = hexbucket { - Ok(self - .0 - .bucket_table - .get(&EmptyKey, &bucket_id) - .await? - .filter(|x| !x.state.is_deleted()) - .map(|_| bucket_id)) - } else { - Ok(self - .0 - .bucket_alias_table - .get(&EmptyKey, bucket_name) - .await? - .and_then(|x| *x.state.get())) - } + let bucket_id = match hexbucket { + Some(id) => id, + None => { + let alias = self + .0 + .bucket_alias_table + .get_local(&EmptyKey, bucket_name)? + .and_then(|x| *x.state.get()); + match alias { + Some(id) => id, + None => return Ok(None), + } + } + }; + Ok(self + .0 + .bucket_table + .get_local(&EmptyKey, &bucket_id)? + .filter(|x| !x.state.is_deleted())) } + /// Return bucket corresponding to a bucket name from the perspective of + /// a given access key, if it exists (and is not a tombstone entry). + /// + /// The name can be of three forms: + /// 1. A global bucket alias + /// 2. A local bucket alias + /// 3. The full ID of a bucket encoded in hex + /// + /// This will not do any network interaction, it will only check the local + /// copy of the bucket and global alias table. It will also resolve local + /// aliases directly using the data provided in the `api_key` parameter. + /// As a consequence, it does not provide read-after-write guarantees. + /// + /// In case no such bucket is found, this function returns a NoSuchBucket error. #[allow(clippy::ptr_arg)] - pub async fn resolve_bucket(&self, bucket_name: &String, api_key: &Key) -> Result { + pub fn resolve_bucket_fast( + &self, + bucket_name: &String, + api_key: &Key, + ) -> Result { let api_key_params = api_key .state .as_option() .ok_or_message("Key should not be deleted at this point")?; - if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) { - Ok(*bucket_id) - } else { - Ok(self - .resolve_global_bucket_name(bucket_name) - .await? - .ok_or_else(|| Error::NoSuchBucket(bucket_name.to_string()))?) - } + let bucket_opt = + if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) { + self.0 + .bucket_table + .get_local(&EmptyKey, bucket_id)? + .filter(|x| !x.state.is_deleted()) + } else { + self.resolve_global_bucket_fast(bucket_name)? + }; + bucket_opt.ok_or_else(|| Error::NoSuchBucket(bucket_name.to_string())) } - /// Find a bucket by its global alias or a prefix of its uuid - pub async fn admin_get_existing_matching_bucket( + // ================ + // Global functions that do quorum reads/writes, + // for admin operations. + // ================ + + /// This is the same as `resolve_global_bucket_fast`, + /// except that it does quorum reads to ensure consistency. + pub async fn resolve_global_bucket( &self, - pattern: &String, - ) -> Result { - if let Some(uuid) = self.resolve_global_bucket_name(pattern).await? { - return Ok(uuid); - } else if pattern.len() >= 2 { - let hexdec = pattern - .get(..pattern.len() & !1) - .and_then(|x| hex::decode(x).ok()); - if let Some(hex) = hexdec { - let mut start = [0u8; 32]; - start - .as_mut_slice() - .get_mut(..hex.len()) - .ok_or_bad_request("invalid length")? - .copy_from_slice(&hex); - let mut candidates = self + bucket_name: &String, + ) -> Result, GarageError> { + let hexbucket = hex::decode(bucket_name.as_str()) + .ok() + .and_then(|by| Uuid::try_from(&by)); + let bucket_id = match hexbucket { + Some(id) => id, + None => { + let alias = self .0 - .bucket_table - .get_range( - &EmptyKey, - Some(start.into()), - Some(DeletedFilter::NotDeleted), - 10, - EnumerationOrder::Forward, - ) + .bucket_alias_table + .get(&EmptyKey, bucket_name) .await? - .into_iter() - .collect::>(); - candidates.retain(|x| hex::encode(x.id).starts_with(pattern)); - if candidates.len() == 1 { - return Ok(candidates.into_iter().next().unwrap().id); + .and_then(|x| *x.state.get()); + match alias { + Some(id) => id, + None => return Ok(None), } } + }; + Ok(self + .0 + .bucket_table + .get(&EmptyKey, &bucket_id) + .await? + .filter(|x| !x.state.is_deleted())) + } + + /// Return bucket corresponding to a bucket name from the perspective of + /// a given access key, if it exists (and is not a tombstone entry). + /// + /// This is the same as `resolve_bucket_fast`, with the following differences: + /// + /// - this function does quorum reads to ensure consistency. + /// - this function fetches the Key entry from the key table to ensure up-to-date data + /// - this function returns None if the bucket is not found, instead of HelperError::NoSuchBucket + #[allow(clippy::ptr_arg)] + pub async fn resolve_bucket( + &self, + bucket_name: &String, + key_id: &String, + ) -> Result, GarageError> { + let local_alias = self + .0 + .key_table + .get(&EmptyKey, key_id) + .await? + .and_then(|k| k.state.into_option()) + .ok_or_else(|| GarageError::Message(format!("access key {} has been deleted", key_id)))? + .local_aliases + .get(bucket_name) + .copied() + .flatten(); + + if let Some(bucket_id) = local_alias { + Ok(self + .0 + .bucket_table + .get(&EmptyKey, &bucket_id) + .await? + .filter(|x| !x.state.is_deleted())) + } else { + Ok(self.resolve_global_bucket(bucket_name).await?) } - Err(Error::BadRequest(format!( - "Bucket not found / several matching buckets: {}", - pattern - ))) } /// Returns a Bucket if it is present in bucket table, /// even if it is in deleted state. Querying a non-existing /// bucket ID returns an internal error. - pub async fn get_internal_bucket(&self, bucket_id: Uuid) -> Result { + pub(crate) async fn get_internal_bucket(&self, bucket_id: Uuid) -> Result { Ok(self .0 .bucket_table @@ -159,7 +227,7 @@ impl<'a> BucketHelper<'a> { .0 .system .cluster_layout() - .all_nongateway_nodes() + .all_nongateway_nodes()? .to_vec(); let k2vindexes = self .0 diff --git a/src/model/helper/key.rs b/src/model/helper/key.rs index b8a99d55..00d8d5c6 100644 --- a/src/model/helper/key.rs +++ b/src/model/helper/key.rs @@ -3,7 +3,7 @@ use garage_util::error::OkOrMessage; use crate::garage::Garage; use crate::helper::error::*; -use crate::key_table::{Key, KeyFilter}; +use crate::key_table::Key; pub struct KeyHelper<'a>(pub(crate) &'a Garage); @@ -33,33 +33,4 @@ impl<'a> KeyHelper<'a> { .filter(|b| !b.state.is_deleted()) .ok_or_else(|| Error::NoSuchAccessKey(key_id.to_string())) } - - /// Returns a Key if it is present in key table, - /// looking it up by key ID or by a match on its name, - /// only if it is in non-deleted state. - /// Querying a non-existing key ID or a deleted key - /// returns a bad request error. - pub async fn get_existing_matching_key(&self, pattern: &str) -> Result { - let candidates = self - .0 - .key_table - .get_range( - &EmptyKey, - None, - Some(KeyFilter::MatchesAndNotDeleted(pattern.to_string())), - 10, - EnumerationOrder::Forward, - ) - .await? - .into_iter() - .collect::>(); - if candidates.len() != 1 { - Err(Error::BadRequest(format!( - "{} matching keys", - candidates.len() - ))) - } else { - Ok(candidates.into_iter().next().unwrap()) - } - } } diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index aa13ee7b..bb583e91 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -84,17 +84,16 @@ impl Entry for CounterEntry { impl CounterEntry { pub fn filtered_values(&self, layout: &LayoutHelper) -> HashMap { - let nodes = layout.all_nongateway_nodes(); - self.filtered_values_with_nodes(&nodes) + self.filtered_values_internal(layout.all_nongateway_nodes().ok()) } - pub fn filtered_values_with_nodes(&self, nodes: &[Uuid]) -> HashMap { + fn filtered_values_internal(&self, nodes_opt: Option<&[Uuid]>) -> HashMap { let mut ret = HashMap::new(); for (name, vals) in self.values.iter() { let new_vals = vals .node_values .iter() - .filter(|(n, _)| nodes.contains(n)) + .filter(|(n, _)| nodes_opt.map(|nodes| nodes.contains(n)).unwrap_or(true)) .map(|(_, (_, v))| *v) .collect::>(); if !new_vals.is_empty() { @@ -146,6 +145,7 @@ impl TableSchema for CounterTable { type S = T::CS; type E = CounterEntry; type Filter = (DeletedFilter, Vec); + type Precondition = (); fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { if filter.0 == DeletedFilter::Any { @@ -153,7 +153,7 @@ impl TableSchema for CounterTable { } let is_tombstone = entry - .filtered_values_with_nodes(&filter.1[..]) + .filtered_values_internal(Some(&filter.1[..])) .iter() .all(|(_, v)| *v == 0); filter.0.apply(is_tombstone) diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 9e3ba5a5..4e92c5f6 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -219,6 +219,7 @@ impl TableSchema for K2VItemTable { type S = String; type E = K2VItem; type Filter = ItemFilter; + type Precondition = (); fn updated( &self, diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index 821f4549..8fcf8309 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -126,7 +126,7 @@ impl K2VRpcHandler { .item_table .data .replication - .storage_nodes(&partition.hash()); + .storage_nodes(&partition.hash())?; who.sort(); self.system @@ -165,7 +165,7 @@ impl K2VRpcHandler { .item_table .data .replication - .storage_nodes(&partition.hash()); + .storage_nodes(&partition.hash())?; who.sort(); call_list.entry(who).or_default().push(InsertedItem { @@ -222,7 +222,7 @@ impl K2VRpcHandler { .item_table .data .replication - .storage_nodes(&poll_key.partition.hash()); + .storage_nodes(&poll_key.partition.hash())?; let rpc = self.system.rpc_helper().try_call_many( &self.endpoint, @@ -233,7 +233,7 @@ impl K2VRpcHandler { timeout_msec, }, RequestStrategy::with_priority(PRIO_NORMAL) - .with_quorum(self.item_table.data.replication.read_quorum()) + .with_quorum(self.item_table.data.replication.read_quorum()?) .send_all_at_once(true) .without_timeout(), ); @@ -283,8 +283,8 @@ impl K2VRpcHandler { .item_table .data .replication - .storage_nodes(&range.partition.hash()); - let quorum = self.item_table.data.replication.read_quorum(); + .storage_nodes(&range.partition.hash())?; + let quorum = self.item_table.data.replication.read_quorum()?; let msg = K2VRpc::PollRange { range, seen_str, @@ -451,10 +451,7 @@ impl K2VRpcHandler { let mut value = self .item_table - .data - .read_entry(&key.partition, &key.sort_key)? - .map(|bytes| self.item_table.data.decode_entry(&bytes[..])) - .transpose()? + .get_local(&key.partition, &key.sort_key)? .unwrap_or_else(|| { K2VItem::new( key.partition.bucket_id, diff --git a/src/model/key_table.rs b/src/model/key_table.rs index efb95f08..addc6ac7 100644 --- a/src/model/key_table.rs +++ b/src/model/key_table.rs @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize}; use garage_util::crdt::{self, Crdt}; use garage_util::data::*; +use garage_util::time::now_msec; use garage_table::{DeletedFilter, EmptyKey, Entry, TableSchema}; @@ -48,13 +49,82 @@ mod v08 { impl garage_util::migrate::InitialFormat for Key {} } -pub use v08::*; +mod v2 { + use crate::permission::BucketKeyPerm; + use garage_util::crdt; + use garage_util::data::Uuid; + use serde::{Deserialize, Serialize}; + + use super::v08; + + /// An api key + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Key { + /// The id of the key (immutable), used as partition key + pub key_id: String, + + /// Internal state of the key + pub state: crdt::Deletable, + } + + /// Configuration for a key + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct KeyParams { + /// Key's creation date, if known (older versions of Garage didn't keep track + /// of this information) + pub created: Option, + /// The secret_key associated (immutable) + pub secret_key: String, + + /// Name for the key + pub name: crdt::Lww, + /// The optional time of expiration of the key + pub expiration: crdt::Lww>, + + /// Flag to allow users having this key to create buckets + pub allow_create_bucket: crdt::Lww, + + /// If the key is present: it gives some permissions, + /// a map of bucket IDs (uuids) to permissions. + /// Otherwise no permissions are granted to key + pub authorized_buckets: crdt::Map, + + /// A key can have a local view of buckets names it is + /// the only one to see, this is the namespace for these aliases + pub local_aliases: crdt::LwwMap>, + } + + impl garage_util::migrate::Migrate for Key { + const VERSION_MARKER: &'static [u8] = b"G2key"; + + type Previous = v08::Key; + + fn migrate(old: v08::Key) -> Key { + Key { + key_id: old.key_id, + state: old.state.map(|x| KeyParams { + created: None, + secret_key: x.secret_key, + name: x.name, + expiration: crdt::Lww::raw(0, None), + allow_create_bucket: x.allow_create_bucket, + authorized_buckets: x.authorized_buckets, + local_aliases: x.local_aliases, + }), + } + } + } +} + +pub use v2::*; impl KeyParams { fn new(secret_key: &str, name: &str) -> Self { KeyParams { + created: Some(now_msec()), secret_key: secret_key.to_string(), name: crdt::Lww::new(name.to_string()), + expiration: crdt::Lww::new(None), allow_create_bucket: crdt::Lww::new(false), authorized_buckets: crdt::Map::new(), local_aliases: crdt::LwwMap::new(), @@ -65,6 +135,7 @@ impl KeyParams { impl Crdt for KeyParams { fn merge(&mut self, o: &Self) { self.name.merge(&o.name); + self.expiration.merge(&o.expiration); self.allow_create_bucket.merge(&o.allow_create_bucket); self.authorized_buckets.merge(&o.authorized_buckets); self.local_aliases.merge(&o.local_aliases); @@ -88,7 +159,7 @@ impl Key { return Err("The specified key ID is not a valid Garage key ID (starts with `GK`, followed by 12 hex-encoded bytes)"); } - if secret_key.len() != 64 || hex::decode(&secret_key).is_err() { + if secret_key.len() != 64 || hex::decode(secret_key).is_err() { return Err("The specified secret key is not a valid Garage secret key (composed of 32 hex-encoded bytes)"); } @@ -145,6 +216,15 @@ impl Key { } } +impl KeyParams { + pub fn is_expired(&self, ts_now: u64) -> bool { + match *self.expiration.get() { + None => false, + Some(exp) => ts_now >= exp, + } + } +} + impl Entry for Key { fn partition_key(&self) -> &EmptyKey { &EmptyKey @@ -175,6 +255,7 @@ impl TableSchema for KeyTable { type S = String; type E = Key; type Filter = KeyFilter; + type Precondition = (); fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { match filter { diff --git a/src/model/lib.rs b/src/model/lib.rs index 1939a7a9..b4dc1e81 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -5,6 +5,7 @@ pub mod permission; pub mod index_counter; +pub mod admin_token_table; pub mod bucket_alias_table; pub mod bucket_table; pub mod key_table; diff --git a/src/model/s3/block_ref_table.rs b/src/model/s3/block_ref_table.rs index 57eb7b16..4da920d0 100644 --- a/src/model/s3/block_ref_table.rs +++ b/src/model/s3/block_ref_table.rs @@ -65,6 +65,7 @@ impl TableSchema for BlockRefTable { type S = Uuid; type E = BlockRef; type Filter = DeletedFilter; + type Precondition = (); fn updated( &self, @@ -98,7 +99,7 @@ pub fn block_ref_recount_fn( .upgrade() .ok_or_message("cannot upgrade weak ptr to block_ref_table") .map_err(db::TxError::Abort)?; - Ok(calculate_refcount(&table, tx, block)?) + calculate_refcount(&table, tx, block) }) } diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index af00437e..a400b5bf 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -41,6 +41,7 @@ pub struct LifecycleWorker { persister: PersisterShared, } +#[expect(clippy::large_enum_variant)] enum State { Completed(NaiveDate), Running { @@ -368,6 +369,7 @@ async fn process_object( Ok(Skip::NextObject) } +#[expect(clippy::nonminimal_bool)] fn check_size_filter(version_data: &ObjectVersionData, filter: &LifecycleFilter) -> bool { let size = match version_data { ObjectVersionData::Inline(meta, _) | ObjectVersionData::FirstBlock(meta, _) => meta.size, diff --git a/src/model/s3/mpu_table.rs b/src/model/s3/mpu_table.rs index c9f79caf..6353dd8e 100644 --- a/src/model/s3/mpu_table.rs +++ b/src/model/s3/mpu_table.rs @@ -160,8 +160,8 @@ impl Crdt for MpuPart { (x, _) => x, }; self.checksum = match (self.checksum.take(), &other.checksum) { - (None, Some(_)) => other.checksum.clone(), - (Some(x), Some(y)) if x < *y => other.checksum.clone(), + (None, Some(_)) => other.checksum, + (Some(x), Some(y)) if x < *y => other.checksum, (x, _) => x, }; } @@ -179,6 +179,7 @@ impl TableSchema for MultipartUploadTable { type S = EmptyKey; type E = MultipartUpload; type Filter = DeletedFilter; + type Precondition = (); fn updated( &self, diff --git a/src/model/s3/object_table.rs b/src/model/s3/object_table.rs index 6c33b79b..303487bf 100644 --- a/src/model/s3/object_table.rs +++ b/src/model/s3/object_table.rs @@ -257,6 +257,13 @@ mod v010 { /// (compression happens before encryption, whereas for non-encrypted /// objects, compression is handled at the level of the block manager) compressed: bool, + /// Whether the encryption uses an Object Encryption Key derived + /// from the master SSE-C key, instead of the master SSE-C key itself. + /// This is the case of objects created in Garage v2+. + /// This field is kept for compatibility with Garage v2.0.0-beta1, + /// which did not yet implement the v2 module below. + #[serde(default)] + use_oek: bool, }, Plaintext { /// Plain-text headers @@ -277,6 +284,7 @@ mod v010 { pub enum ChecksumAlgorithm { Crc32, Crc32c, + Crc64Nvme, Sha1, Sha256, } @@ -286,6 +294,7 @@ mod v010 { pub enum ChecksumValue { Crc32(#[serde(with = "serde_bytes")] [u8; 4]), Crc32c(#[serde(with = "serde_bytes")] [u8; 4]), + Crc64Nvme(#[serde(with = "serde_bytes")] [u8; 8]), Sha1(#[serde(with = "serde_bytes")] [u8; 20]), Sha256(#[serde(with = "serde_bytes")] [u8; 32]), } @@ -371,7 +380,222 @@ mod v010 { } } -pub use v010::*; +mod v2 { + use garage_util::data::{Hash, Uuid}; + use garage_util::migrate::Migrate; + use serde::{Deserialize, Serialize}; + + use super::v010; + pub use v010::{ChecksumAlgorithm, ChecksumValue}; + + /// An object + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Object { + /// The bucket in which the object is stored, used as partition key + pub bucket_id: Uuid, + + /// The key at which the object is stored in its bucket, used as sorting key + pub key: String, + + /// The list of currently stored versions of the object + pub(super) versions: Vec, + } + + /// Information about a version of an object + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersion { + /// Id of the version + pub uuid: Uuid, + /// Timestamp of when the object was created + pub timestamp: u64, + /// State of the version + pub state: ObjectVersionState, + } + + /// State of an object version + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub enum ObjectVersionState { + /// The version is being received + Uploading { + /// Indicates whether this is a multipart upload + multipart: bool, + /// Checksum algorithm and algorithm type to use + checksum_algorithm: Option<(ChecksumAlgorithm, ChecksumType)>, + /// Encryption params + headers to be included in the final object + encryption: ObjectVersionEncryption, + }, + /// The version is fully received + Complete(ObjectVersionData), + /// The version uploaded containded errors or the upload was explicitly aborted + Aborted, + } + + /// Data stored in object version + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub enum ObjectVersionData { + /// The object was deleted, this Version is a tombstone to mark it as such + DeleteMarker, + /// The object is short, it's stored inlined. + /// It is never compressed. For encrypted objects, it is encrypted using + /// AES256-GCM, like the encrypted headers. + Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec), + /// The object is not short, Hash of first block is stored here, next segments hashes are + /// stored in the version table + FirstBlock(ObjectVersionMeta, Hash), + } + + /// Metadata about the object version + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersionMeta { + /// Size of the object. If object is encrypted/compressed, + /// this is always the size of the unencrypted/uncompressed data + pub size: u64, + /// etag of the object + pub etag: String, + /// Encryption params + headers (encrypted or plaintext) + pub encryption: ObjectVersionEncryption, + } + + /// Encryption information + metadata + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub enum ObjectVersionEncryption { + SseC { + /// Encrypted serialized ObjectVersionInner struct. + /// This is never compressed, just encrypted using AES256-GCM. + #[serde(with = "serde_bytes")] + inner: Vec, + /// Whether data blocks are compressed in addition to being encrypted + /// (compression happens before encryption, whereas for non-encrypted + /// objects, compression is handled at the level of the block manager) + compressed: bool, + /// Whether the encryption uses an Object Encryption Key derived + /// from the master SSE-C key, instead of the master SSE-C key itself. + /// This is the case of objects created in Garage v2+ + use_oek: bool, + }, + Plaintext { + /// Plain-text headers + inner: ObjectVersionMetaInner, + }, + } + + /// Vector of headers, as tuples of the format (header name, header value) + /// Note: checksum can be Some(_) with checksum_type = None for objects that + /// have been migrated from Garage version before v2.0, as the distinction between + /// full-object and composite checksums was not implemented yet. + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersionMetaInner { + pub headers: HeaderList, + pub checksum: Option, + // checksum_type has to be stored separately, because when migrating + // from older versions of Garage, we can't know the correct value in + // ObjectVersionMetaInner::migrate (because it cannot take an argument + // that says whether the object was multipart or not) + pub checksum_type: Option, + } + + pub type HeaderList = Vec<(String, String)>; + + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug, Serialize, Deserialize)] + pub enum ChecksumType { + FullObject, + Composite, + } + + impl garage_util::migrate::Migrate for Object { + const VERSION_MARKER: &'static [u8] = b"G2s3ob"; + + type Previous = v010::Object; + + fn migrate(old: v010::Object) -> Object { + Object { + bucket_id: old.bucket_id, + key: old.key, + versions: old.versions.into_iter().map(migrate_version).collect(), + } + } + } + + fn migrate_version(old: v010::ObjectVersion) -> ObjectVersion { + ObjectVersion { + uuid: old.uuid, + timestamp: old.timestamp, + state: match old.state { + v010::ObjectVersionState::Uploading { + multipart, + checksum_algorithm, + encryption, + } => ObjectVersionState::Uploading { + multipart, + checksum_algorithm: checksum_algorithm.map(|algo| match multipart { + false => (algo, ChecksumType::FullObject), + true => (algo, ChecksumType::Composite), + }), + encryption: migrate_encryption(encryption), + }, + v010::ObjectVersionState::Complete(d) => { + ObjectVersionState::Complete(migrate_data(d)) + } + v010::ObjectVersionState::Aborted => ObjectVersionState::Aborted, + }, + } + } + + fn migrate_data(old: v010::ObjectVersionData) -> ObjectVersionData { + match old { + v010::ObjectVersionData::DeleteMarker => ObjectVersionData::DeleteMarker, + v010::ObjectVersionData::Inline(meta, data) => { + ObjectVersionData::Inline(migrate_meta(meta), data) + } + v010::ObjectVersionData::FirstBlock(meta, fb) => { + ObjectVersionData::FirstBlock(migrate_meta(meta), fb) + } + } + } + + fn migrate_meta(old: v010::ObjectVersionMeta) -> ObjectVersionMeta { + ObjectVersionMeta { + size: old.size, + etag: old.etag, + encryption: migrate_encryption(old.encryption), + } + } + + fn migrate_encryption(old: v010::ObjectVersionEncryption) -> ObjectVersionEncryption { + match old { + v010::ObjectVersionEncryption::SseC { + inner, + compressed, + use_oek, + } => ObjectVersionEncryption::SseC { + inner, + compressed, + use_oek, + }, + v010::ObjectVersionEncryption::Plaintext { inner } => { + ObjectVersionEncryption::Plaintext { + inner: ObjectVersionMetaInner::migrate(inner), + } + } + } + } + + impl Migrate for ObjectVersionMetaInner { + const VERSION_MARKER: &'static [u8] = b"G2s3om"; + + type Previous = v010::ObjectVersionMetaInner; + + fn migrate(old: v010::ObjectVersionMetaInner) -> ObjectVersionMetaInner { + ObjectVersionMetaInner { + headers: old.headers, + checksum: old.checksum, + checksum_type: None, + } + } + } +} + +pub use v2::*; impl Object { /// Initialize an Object struct from parts @@ -487,6 +711,7 @@ impl ChecksumValue { match self { ChecksumValue::Crc32(_) => ChecksumAlgorithm::Crc32, ChecksumValue::Crc32c(_) => ChecksumAlgorithm::Crc32c, + ChecksumValue::Crc64Nvme(_) => ChecksumAlgorithm::Crc64Nvme, ChecksumValue::Sha1(_) => ChecksumAlgorithm::Sha1, ChecksumValue::Sha256(_) => ChecksumAlgorithm::Sha256, } @@ -544,6 +769,16 @@ pub enum ObjectFilter { IsUploading { check_multipart: Option }, } +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum ObjectPrecondition { + /// Match if the object doesn't exist (or is a tombstone), and this "new" + /// version is newer than the local copy + IsAbsent, + /// Match if the object's last version has a matching etag, and this "new" + /// version is newer than the local copy + HasEtag(String), +} + impl TableSchema for ObjectTable { const TABLE_NAME: &'static str = "object"; @@ -551,6 +786,7 @@ impl TableSchema for ObjectTable { type S = String; type E = Object; type Filter = ObjectFilter; + type Precondition = ObjectPrecondition; fn updated( &self, @@ -647,6 +883,47 @@ impl TableSchema for ObjectTable { .any(|v| v.is_uploading(*check_multipart)), } } + + fn matches_condition(local_entry: Option<&Self::E>, new_entry: &Self::E, condition: &Self::Precondition) -> bool { + let Some(last_new_version) = new_entry.versions().into_iter().rev().find(|version| version.is_complete()) else { + // transactional update must be made with a complete version + return false; + }; + + let last_stored_version = local_entry.and_then(|local_entry| local_entry.versions() + .into_iter() + .rev() + .find(|version| version.is_complete())); + + if last_stored_version.map_or(false, |last_stored_version| last_stored_version.cmp_key() > last_new_version.cmp_key()) { + // our update is older than the newest complete version, we can discard it + return false + } + + match condition { + ObjectPrecondition::IsAbsent => { + let Some(last_stored_version) = last_stored_version else { + // no version stored, the object doesn't exist + return true + }; + !last_stored_version.is_data() + } + ObjectPrecondition::HasEtag(etag) => { + let Some(last_stored_version) = last_stored_version else { + // no version stored, the object doesn't exist + return false + }; + match &last_stored_version.state { + ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) + | ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => { + &meta.etag == etag + }, + // last version was a tombstone + _ => false + } + } + } + } } impl CountedItem for Object { diff --git a/src/model/s3/version_table.rs b/src/model/s3/version_table.rs index 45be5af8..485c7298 100644 --- a/src/model/s3/version_table.rs +++ b/src/model/s3/version_table.rs @@ -205,6 +205,7 @@ impl TableSchema for VersionTable { type S = EmptyKey; type E = Version; type Filter = DeletedFilter; + type Precondition = (); fn updated( &self, diff --git a/src/model/snapshot.rs b/src/model/snapshot.rs index 8e8995f9..d66e7935 100644 --- a/src/model/snapshot.rs +++ b/src/model/snapshot.rs @@ -1,5 +1,5 @@ use std::fs; -use std::path::PathBuf; +use std::path::Path; use std::sync::Arc; use std::sync::Mutex; use std::time::{Duration, Instant}; @@ -67,9 +67,9 @@ pub fn snapshot_metadata(garage: &Garage) -> Result<(), Error> { Ok(()) } -fn cleanup_snapshots(snapshots_dir: &PathBuf) -> Result<(), Error> { +fn cleanup_snapshots(snapshots_dir: &Path) -> Result<(), Error> { let mut snapshots = - fs::read_dir(&snapshots_dir)?.collect::, std::io::Error>>()?; + fs::read_dir(snapshots_dir)?.collect::, std::io::Error>>()?; snapshots.retain(|x| x.file_name().len() > 8); snapshots.sort_by_key(|x| x.file_name()); diff --git a/src/net/Cargo.toml b/src/net/Cargo.toml index 71f42c68..0d146ae1 100644 --- a/src/net/Cargo.toml +++ b/src/net/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_net" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" diff --git a/src/net/bytes_buf.rs b/src/net/bytes_buf.rs index 1d928ffb..e96184ae 100644 --- a/src/net/bytes_buf.rs +++ b/src/net/bytes_buf.rs @@ -130,7 +130,7 @@ impl BytesBuf { /// Return the content as a stream of individual chunks pub fn into_stream(self) -> ByteStream { use futures::stream::StreamExt; - Box::pin(futures::stream::iter(self.buf).map(|x| Ok(x))) + Box::pin(futures::stream::iter(self.buf).map(Ok)) } } @@ -161,7 +161,6 @@ mod test { #[test] fn test_bytes_buf() { let mut buf = BytesBuf::new(); - assert!(buf.len() == 0); assert!(buf.is_empty()); buf.extend(Bytes::from(b"Hello, world!".to_vec())); @@ -176,7 +175,6 @@ mod test { buf.take_all(), Bytes::from(b"Hello, world!1234567890".to_vec()) ); - assert!(buf.len() == 0); assert!(buf.is_empty()); buf.extend(Bytes::from(b"1234567890".to_vec())); @@ -193,7 +191,6 @@ mod test { buf.take_exact(11), Some(Bytes::from(b"llo, world!".to_vec())) ); - assert!(buf.len() == 0); assert!(buf.is_empty()); } } diff --git a/src/net/client.rs b/src/net/client.rs index 20e1dacd..bdb1e6c4 100644 --- a/src/net/client.rs +++ b/src/net/client.rs @@ -180,8 +180,7 @@ impl ClientConn { "Too many inflight requests! RequestID collision. Interrupting previous request." ); let _ = old_ch.send(Box::pin(futures::stream::once(async move { - Err(std::io::Error::new( - std::io::ErrorKind::Other, + Err(std::io::Error::other( "RequestID collision, too many inflight requests", )) }))); diff --git a/src/net/endpoint.rs b/src/net/endpoint.rs index 3ab1048a..af484231 100644 --- a/src/net/endpoint.rs +++ b/src/net/endpoint.rs @@ -87,7 +87,7 @@ where { pub(crate) fn new(netapp: Arc, path: String) -> Self { Self { - _phantom: PhantomData::default(), + _phantom: PhantomData, netapp, path, handler: ArcSwapOption::from(None), diff --git a/src/net/lib.rs b/src/net/lib.rs index 8e30e40f..47cac5d6 100644 --- a/src/net/lib.rs +++ b/src/net/lib.rs @@ -10,7 +10,7 @@ //! //! Of particular interest, read the documentation for the `netapp::NetApp` type, //! the `message::Message` trait, and `proto::RequestPriority` to learn more -//! about message priorization. +//! about message prioritization. //! Also check out the examples to learn how to use this crate. pub mod bytes_buf; diff --git a/src/net/message.rs b/src/net/message.rs index 59afb058..3126d144 100644 --- a/src/net/message.rs +++ b/src/net/message.rs @@ -14,7 +14,7 @@ use crate::util::*; /// Priority of a request (click to read more about priorities). /// -/// This priority value is used to priorize messages +/// This priority value is used to prioritize messages /// in the send queue of the client, and their responses in the send queue of the /// server. Lower values mean higher priority. /// @@ -100,9 +100,9 @@ pub trait Message: Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static // ---- -/// The Req is a helper object used to create requests and attach them +/// The `Req` is a helper object used to create requests and attach them /// a stream of data. If the stream is a fixed Bytes and not a ByteStream, -/// Req is cheaply cloneable to allow the request to be sent to different +/// `Req` is cheaply cloneable to allow the request to be sent to different /// peers (Clone will panic if the stream is a ByteStream). pub struct Req { pub(crate) msg: Arc, @@ -260,7 +260,7 @@ where // ---- -/// The Resp represents a full response from a RPC that may have +/// The `Resp` represents a full response from a RPC that may have /// an attached stream. pub struct Resp { pub(crate) _phantom: PhantomData, @@ -458,11 +458,13 @@ impl ReqEnc { } /// Encoding for responses into a ByteStream: +/// /// IF SUCCESS: /// - 0: u8 /// - msg len: u32 /// - msg [u8; ..] /// - the attached stream as the rest of the encoded stream +/// /// IF ERROR: /// - message length + 1: u8 /// - error code: u8 @@ -493,10 +495,7 @@ impl RespEnc { (res_stream, order_tag) } Err(err) => { - let err = std::io::Error::new( - std::io::ErrorKind::Other, - format!("netapp error: {}", err), - ); + let err = std::io::Error::other(format!("netapp error: {}", err)); ( Box::pin(futures::stream::once(async move { Err(err) })), None, diff --git a/src/net/peering.rs b/src/net/peering.rs index 08378a08..5d942b5c 100644 --- a/src/net/peering.rs +++ b/src/net/peering.rs @@ -81,7 +81,7 @@ impl PeerInfoInternal { // we want to retry connecting self.state = match self.state { PeerConnState::Trying(_) => PeerConnState::Trying(0), - PeerConnState::Waiting(_, _) | PeerConnState::Abandonned => { + PeerConnState::Waiting(_, _) | PeerConnState::Abandoned => { PeerConnState::Waiting(0, Instant::now()) } x @ (PeerConnState::Ourself | PeerConnState::Connected { .. }) => x, @@ -138,7 +138,7 @@ pub enum PeerConnState { Trying(usize), /// We abandoned trying to connect to this peer (too many failed attempts) - Abandonned, + Abandoned, } impl PeerConnState { @@ -236,7 +236,7 @@ impl PeeringManager { ); known_hosts.update_hash(); - let strat = Arc::new(Self { + let strategy = Arc::new(Self { netapp: netapp.clone(), known_hosts: RwLock::new(known_hosts), public_peer_list: ArcSwap::new(Arc::new(Vec::new())), @@ -246,22 +246,22 @@ impl PeeringManager { ping_timeout_millis: DEFAULT_PING_TIMEOUT_MILLIS.into(), }); - strat.update_public_peer_list(&strat.known_hosts.read().unwrap()); + strategy.update_public_peer_list(&strategy.known_hosts.read().unwrap()); - strat.ping_endpoint.set_handler(strat.clone()); - strat.peer_list_endpoint.set_handler(strat.clone()); + strategy.ping_endpoint.set_handler(strategy.clone()); + strategy.peer_list_endpoint.set_handler(strategy.clone()); - let strat2 = strat.clone(); + let strategy2 = strategy.clone(); netapp.on_connected(move |id: NodeID, addr: SocketAddr, is_incoming: bool| { - strat2.on_connected(id, addr, is_incoming); + strategy2.on_connected(id, addr, is_incoming); }); - let strat2 = strat.clone(); + let strategy2 = strategy.clone(); netapp.on_disconnected(move |id: NodeID, is_incoming: bool| { - strat2.on_disconnected(id, is_incoming); + strategy2.on_disconnected(id, is_incoming); }); - strat + strategy } /// Run the full mesh peering strategy. @@ -525,7 +525,7 @@ impl PeeringManager { host.state = match host.state { PeerConnState::Trying(i) => { if i >= CONN_MAX_RETRIES { - PeerConnState::Abandonned + PeerConnState::Abandoned } else { PeerConnState::Waiting(i + 1, Instant::now() + CONN_RETRY_INTERVAL) } diff --git a/src/net/recv.rs b/src/net/recv.rs index 35a6d71a..24961903 100644 --- a/src/net/recv.rs +++ b/src/net/recv.rs @@ -62,7 +62,7 @@ pub(crate) trait RecvLoop: Sync + 'static { trace!( "recv_loop({}): in_progress = {:?}", debug_name, - streams.iter().map(|(id, _)| id).collect::>() + streams.keys().collect::>() ); let mut header_id = [0u8; RequestID::BITS as usize / 8]; @@ -79,10 +79,7 @@ pub(crate) trait RecvLoop: Sync + 'static { if size == CANCEL_REQUEST { if let Some(mut stream) = streams.remove(&id) { - let _ = stream.send(Err(std::io::Error::new( - std::io::ErrorKind::Other, - "netapp: cancel requested", - ))); + stream.send(Err(std::io::Error::other("netapp: cancel requested"))); stream.end(); } self.cancel_handler(id); @@ -92,7 +89,7 @@ pub(crate) trait RecvLoop: Sync + 'static { let has_cont = (size & CHUNK_FLAG_HAS_CONTINUATION) != 0; let is_error = (size & CHUNK_FLAG_ERROR) != 0; let size = (size & CHUNK_LENGTH_MASK) as usize; - let mut next_slice = vec![0; size as usize]; + let mut next_slice = vec![0; size]; read.read_exact(&mut next_slice[..]).await?; let packet = if is_error { @@ -135,7 +132,7 @@ pub(crate) trait RecvLoop: Sync + 'static { // If we cannot put packet in channel, it means that the // receiving end of the channel is disconnected. // We still need to reach eos before dropping this sender - let _ = sender.send(packet); + sender.send(packet); } if has_cont { diff --git a/src/rpc/Cargo.toml b/src/rpc/Cargo.toml index e23f4bca..1c35dcd9 100644 --- a/src/rpc/Cargo.toml +++ b/src/rpc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_rpc" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" @@ -48,6 +48,6 @@ tokio.workspace = true opentelemetry.workspace = true [features] -kubernetes-discovery = [ "kube", "k8s-openapi", "schemars" ] -consul-discovery = [ "reqwest", "thiserror" ] -system-libs = [ "sodiumoxide/use-pkg-config" ] +kubernetes-discovery = ["kube", "k8s-openapi", "schemars"] +consul-discovery = ["reqwest", "thiserror"] +system-libs = ["sodiumoxide/use-pkg-config"] diff --git a/src/rpc/consul.rs b/src/rpc/consul.rs index 760e9fcb..9391e220 100644 --- a/src/rpc/consul.rs +++ b/src/rpc/consul.rs @@ -113,7 +113,7 @@ impl ConsulDiscovery { let mut headers = reqwest::header::HeaderMap::new(); headers.insert( "x-consul-token", - reqwest::header::HeaderValue::from_str(&token)?, + reqwest::header::HeaderValue::from_str(token)?, ); builder = builder.default_headers(headers); } @@ -126,38 +126,61 @@ impl ConsulDiscovery { } // ---- READING FROM CONSUL CATALOG ---- - + /// Query Consul for Garage nodes registered under the configured service name. + /// + /// This method supports querying multiple Consul datacenters for WAN or + /// multi-datacenter deployments. If `config.datacenters` is set and non-empty, + /// each listed datacenter is queried and the results are aggregated. Otherwise, + /// only the local datacenter is queried. `config.datacenters` does not need to be set + /// when all the datacenters are on the same LAN, in this case service discovery works normally + /// + /// # Returns + /// A list of `(NodeID, SocketAddr)` pairs corresponding to all valid discovered + /// nodes across the queried datacenters. pub async fn get_consul_nodes(&self) -> Result, ConsulError> { - let url = format!( - "{}/v1/catalog/service/{}", - self.config.consul_http_addr, self.config.service_name - ); - - let http = self.client.get(&url).send().await?; - let entries: Vec = http.json().await?; - let mut ret = vec![]; - for ent in entries { - let ip = ent.address.parse::().ok(); - let pubkey = ent - .meta - .get(&format!("{}-pubkey", META_PREFIX)) - .and_then(|k| hex::decode(k).ok()) - .and_then(|k| NodeID::from_slice(&k[..])); - if let (Some(ip), Some(pubkey)) = (ip, pubkey) { - ret.push((pubkey, SocketAddr::new(ip, ent.service_port))); - } else { - warn!( - "Could not process node spec from Consul: {:?} (invalid IP address or node ID/pubkey)", - ent - ); + + let dcs_to_query: Vec> = match &self.config.datacenters { + dcs if !dcs.is_empty() => dcs.iter().map(|dc| Some(dc.as_str())).collect(), + _ => vec![None], + }; + + for dc in dcs_to_query { + let url = match dc { + Some(datacenter) => format!( + "{}/v1/catalog/service/{}?dc={}", + self.config.consul_http_addr, self.config.service_name, datacenter + ), + None => format!( + "{}/v1/catalog/service/{}", + self.config.consul_http_addr, self.config.service_name + ), + }; + + let http = self.client.get(&url).send().await?; + let entries: Vec = http.json().await?; + + for ent in entries { + let ip = ent.address.parse::().ok(); + let pubkey = ent + .meta + .get(&format!("{}-pubkey", META_PREFIX)) + .and_then(|k| hex::decode(k).ok()) + .and_then(|k| NodeID::from_slice(&k[..])); + if let (Some(ip), Some(pubkey)) = (ip, pubkey) { + ret.push((pubkey, SocketAddr::new(ip, ent.service_port))); + } else { + warn!( + "Could not process node spec from Consul: {:?} (invalid IP address or node ID/pubkey)", + ent + ); + } } } - debug!("Got nodes from Consul: {:?}", ret); + debug!("Got {} nodes from Consul", ret.len()); Ok(ret) } - // ---- PUBLISHING TO CONSUL CATALOG ---- pub async fn publish_consul_service( diff --git a/src/rpc/layout/graph_algo.rs b/src/rpc/layout/graph_algo.rs index 29d4a043..3450b8a1 100644 --- a/src/rpc/layout/graph_algo.rs +++ b/src/rpc/layout/graph_algo.rs @@ -355,7 +355,7 @@ impl Graph { // Remark that the cycle in prev is in the reverse order compared to the cycle // in the graph. Thus the .rev(). - return cycles_prev + cycles_prev .iter() .map(|cycle| { cycle @@ -364,7 +364,7 @@ impl Graph { .map(|id| self.id_to_vertex[*id]) .collect() }) - .collect(); + .collect() } } diff --git a/src/rpc/layout/helper.rs b/src/rpc/layout/helper.rs index c08a5629..7ee0721a 100644 --- a/src/rpc/layout/helper.rs +++ b/src/rpc/layout/helper.rs @@ -4,6 +4,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use serde::{Deserialize, Serialize}; use garage_util::data::*; +use garage_util::error::Error; use super::*; use crate::replication_mode::*; @@ -28,7 +29,6 @@ pub struct SyncLayoutDigest { } pub struct LayoutHelper { - replication_factor: ReplicationFactor, consistency_mode: ConsistencyMode, layout: Option, @@ -51,7 +51,6 @@ pub struct LayoutHelper { impl LayoutHelper { pub fn new( - replication_factor: ReplicationFactor, consistency_mode: ConsistencyMode, mut layout: LayoutHistory, mut ack_lock: HashMap, @@ -97,8 +96,7 @@ impl LayoutHelper { // consistency on those). // This value is calculated using quorums to allow progress even // if not all nodes have successfully completed a sync. - let sync_map_min = - layout.calculate_sync_map_min_with_quorum(replication_factor, &all_nongateway_nodes); + let sync_map_min = layout.calculate_sync_map_min_with_quorum(&all_nongateway_nodes); let trackers_hash = layout.calculate_trackers_hash(); let staging_hash = layout.calculate_staging_hash(); @@ -111,7 +109,6 @@ impl LayoutHelper { let is_check_ok = layout.check().is_ok(); LayoutHelper { - replication_factor, consistency_mode, layout: Some(layout), ack_map_min, @@ -134,7 +131,6 @@ impl LayoutHelper { let changed = f(self.layout.as_mut().unwrap()); if changed { *self = Self::new( - self.replication_factor, self.consistency_mode, self.layout.take().unwrap(), std::mem::take(&mut self.ack_lock), @@ -149,12 +145,32 @@ impl LayoutHelper { self.layout.as_ref().unwrap() } - pub fn current(&self) -> &LayoutVersion { - self.inner().current() + /// Returns the current layout version + pub fn current(&self) -> Result<&LayoutVersion, Error> { + if !self.is_check_ok { + return Err(Error::LayoutNotReady); + } + Ok(self.inner().current()) } - pub fn versions(&self) -> &[LayoutVersion] { - &self.inner().versions + /// Returns all layout versions currently active in the cluster + pub fn versions(&self) -> Result<&[LayoutVersion], Error> { + if !self.is_check_ok { + return Err(Error::LayoutNotReady); + } + Ok(&self.inner().versions) + } + + /// Returns the latest layout version for which it is safe to read data from, + /// i.e. the version whose version number is sync_map_min + pub fn read_version(&self) -> Result<&LayoutVersion, Error> { + let sync_min = self.sync_map_min; + let versions = self.versions()?; + Ok(versions + .iter() + .find(|x| x.version == sync_min) + .or(versions.last()) + .unwrap()) } pub fn is_check_ok(&self) -> bool { @@ -163,14 +179,20 @@ impl LayoutHelper { /// Return all nodes that have a role (gateway or storage) /// in one of the currently active layout versions - pub fn all_nodes(&self) -> &[Uuid] { - &self.all_nodes + pub fn all_nodes(&self) -> Result<&[Uuid], Error> { + if !self.is_check_ok { + return Err(Error::LayoutNotReady); + } + Ok(&self.all_nodes) } /// Return all nodes that are configured to store data /// in one of the currently active layout versions - pub fn all_nongateway_nodes(&self) -> &[Uuid] { - &self.all_nongateway_nodes + pub fn all_nongateway_nodes(&self) -> Result<&[Uuid], Error> { + if !self.is_check_ok { + return Err(Error::LayoutNotReady); + } + Ok(&self.all_nongateway_nodes) } pub fn ack_map_min(&self) -> u64 { @@ -181,61 +203,20 @@ impl LayoutHelper { self.sync_map_min } + // ---- helpers for layout synchronization ---- + pub fn sync_digest(&self) -> SyncLayoutDigest { SyncLayoutDigest { - current: self.current().version, + current: self.inner().current().version, ack_map_min: self.ack_map_min(), min_stored: self.inner().min_stored(), } } - pub fn read_nodes_of(&self, position: &Hash) -> Vec { - let sync_min = self.sync_map_min; - let version = self - .versions() - .iter() - .find(|x| x.version == sync_min) - .or(self.versions().last()) - .unwrap(); - version - .nodes_of(position, version.replication_factor) - .collect() - } - - pub fn storage_sets_of(&self, position: &Hash) -> Vec> { - self.versions() - .iter() - .map(|x| x.nodes_of(position, x.replication_factor).collect()) - .collect() - } - - pub fn storage_nodes_of(&self, position: &Hash) -> Vec { - let mut ret = vec![]; - for version in self.versions().iter() { - ret.extend(version.nodes_of(position, version.replication_factor)); - } - ret.sort(); - ret.dedup(); - ret - } - - pub fn current_storage_nodes_of(&self, position: &Hash) -> Vec { - let ver = self.current(); - ver.nodes_of(position, ver.replication_factor).collect() - } - - pub fn trackers_hash(&self) -> Hash { - self.trackers_hash - } - - pub fn staging_hash(&self) -> Hash { - self.staging_hash - } - - pub fn digest(&self) -> RpcLayoutDigest { + pub(crate) fn digest(&self) -> RpcLayoutDigest { RpcLayoutDigest { - current_version: self.current().version, - active_versions: self.versions().len(), + current_version: self.inner().current().version, + active_versions: self.inner().versions.len(), trackers_hash: self.trackers_hash, staging_hash: self.staging_hash, } @@ -279,17 +260,18 @@ impl LayoutHelper { pub(crate) fn update_ack_to_max_free(&mut self, local_node_id: Uuid) -> bool { let max_free = self - .versions() + .inner() + .versions .iter() .map(|x| x.version) - .skip_while(|v| { - self.ack_lock + .find(|v| { + !self + .ack_lock .get(v) .map(|x| x.load(Ordering::Relaxed) == 0) .unwrap_or(true) }) - .next() - .unwrap_or(self.current().version); + .unwrap_or(self.inner().current().version); let changed = self.update(|layout| { layout .update_trackers diff --git a/src/rpc/layout/history.rs b/src/rpc/layout/history.rs index 574c50c2..b4659543 100644 --- a/src/rpc/layout/history.rs +++ b/src/rpc/layout/history.rs @@ -10,7 +10,7 @@ use crate::replication_mode::*; impl LayoutHistory { pub fn new(replication_factor: ReplicationFactor) -> Self { - let version = LayoutVersion::new(replication_factor.into()); + let version = LayoutVersion::new(replication_factor); let staging = LayoutStaging { parameters: Lww::::new(version.parameters), @@ -123,13 +123,9 @@ impl LayoutHistory { } } - pub(crate) fn calculate_sync_map_min_with_quorum( - &self, - replication_factor: ReplicationFactor, - all_nongateway_nodes: &[Uuid], - ) -> u64 { - // This function calculates the minimum layout version from which - // it is safe to read if we want to maintain read-after-write consistency. + /// This function calculates the minimum layout version from which + /// it is safe to read if we want to maintain read-after-write consistency. + pub(crate) fn calculate_sync_map_min_with_quorum(&self, all_nongateway_nodes: &[Uuid]) -> u64 { // In the general case the computation can be a bit expensive so // we try to optimize it in several ways. @@ -139,8 +135,6 @@ impl LayoutHistory { return self.current().version; } - let quorum = replication_factor.write_quorum(ConsistencyMode::Consistent); - let min_version = self.min_stored(); let global_min = self .update_trackers @@ -153,7 +147,16 @@ impl LayoutHistory { // This is represented by reading from the layout with version // number global_min, the smallest layout version for which all nodes // have completed a sync. - if quorum == self.current().replication_factor { + // + // While we currently do not support changing the replication factor + // between layout versions, this calculation is future-proofing for the + // case where this might be possible. + if self + .versions + .iter() + .filter(|v| v.version >= global_min) + .all(|v| v.write_quorum(ConsistencyMode::Consistent) == v.replication_factor) + { return global_min; } @@ -180,9 +183,7 @@ impl LayoutHistory { // Determine set of nodes for partition p in layout version v. // Sort the node set to avoid duplicate computations. - let mut set = v - .nodes_of(&p_hash, v.replication_factor) - .collect::>(); + let mut set = v.nodes_of(&p_hash).collect::>(); set.sort(); // If this set was already processed, skip it. @@ -197,7 +198,8 @@ impl LayoutHistory { .map(|x| self.update_trackers.sync_map.get(x, min_version)) .collect::>(); sync_values.sort(); - let set_min = sync_values[sync_values.len() - quorum]; + let set_min = + sync_values[sync_values.len() - v.write_quorum(ConsistencyMode::Consistent)]; if set_min < current_min { current_min = set_min; } @@ -267,20 +269,9 @@ impl LayoutHistory { changed } - pub fn apply_staged_changes(mut self, version: Option) -> Result<(Self, Message), Error> { - match version { - None => { - let error = r#" -Please pass the new layout version number to ensure that you are writing the correct version of the cluster layout. -To know the correct value of the new layout version, invoke `garage layout show` and review the proposed changes. - "#; - return Err(Error::Message(error.into())); - } - Some(v) => { - if v != self.current().version + 1 { - return Err(Error::Message("Invalid new layout version".into())); - } - } + pub fn apply_staged_changes(mut self, version: u64) -> Result<(Self, Message), Error> { + if version != self.current().version + 1 { + return Err(Error::Message("Invalid new layout version".into())); } // Compute new version and add it to history diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index bb8000bd..44746caa 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -46,11 +46,11 @@ impl LayoutManager { let cluster_layout = match persist_cluster_layout.load() { Ok(x) => { - if x.current().replication_factor != replication_factor.replication_factor() { + if x.current().replication_factor() != replication_factor { return Err(Error::Message(format!( "Previous cluster layout has replication factor {}, which is different than the one specified in the config file ({}). The previous cluster layout can be purged, if you know what you are doing, simply by deleting the `cluster_layout` file in your metadata directory.", - x.current().replication_factor, - replication_factor.replication_factor() + x.current().replication_factor(), + replication_factor, ))); } x @@ -64,12 +64,8 @@ impl LayoutManager { } }; - let mut cluster_layout = LayoutHelper::new( - replication_factor, - consistency_mode, - cluster_layout, - Default::default(), - ); + let mut cluster_layout = + LayoutHelper::new(consistency_mode, cluster_layout, Default::default()); cluster_layout.update_update_trackers(node_id.into()); let layout = Arc::new(RwLock::new(cluster_layout)); @@ -109,7 +105,7 @@ impl LayoutManager { } pub fn add_table(&self, table_name: &'static str) { - let first_version = self.layout().versions().first().unwrap().version; + let first_version = self.layout().inner().versions.first().unwrap().version; self.table_sync_version .lock() @@ -120,7 +116,7 @@ impl LayoutManager { pub fn sync_table_until(self: &Arc, table_name: &'static str, version: u64) { let mut table_sync_version = self.table_sync_version.lock().unwrap(); *table_sync_version.get_mut(table_name).unwrap() = version; - let sync_until = table_sync_version.iter().map(|(_, v)| *v).min().unwrap(); + let sync_until = *table_sync_version.values().min().unwrap(); drop(table_sync_version); let mut layout = self.layout.write().unwrap(); @@ -143,16 +139,20 @@ impl LayoutManager { // ---- ACK LOCKING ---- - pub fn write_sets_of(self: &Arc, position: &Hash) -> WriteLock>> { + pub fn write_lock_with(self: &Arc, f: F) -> Result, Error> + where + F: FnOnce(&[LayoutVersion]) -> T, + { let layout = self.layout(); - let version = layout.current().version; - let nodes = layout.storage_sets_of(position); + let current_version = layout.current()?.version; + let versions = layout.versions()?; + let value = f(versions); layout .ack_lock - .get(&version) + .get(¤t_version) .unwrap() .fetch_add(1, Ordering::Relaxed); - WriteLock::new(version, self, nodes) + Ok(WriteLock::new(current_version, self, value)) } // ---- INTERNALS --- @@ -163,7 +163,8 @@ impl LayoutManager { let prev_layout_check = layout.is_check_ok(); if !prev_layout_check || adv.check().is_ok() { - if layout.update(|l| l.merge(adv)) { + let changed = layout.update(|l| l.merge(adv)); + if changed { layout.update_update_trackers(self.node_id); if prev_layout_check && !layout.is_check_ok() { panic!("Merged two correct layouts and got an incorrect layout."); @@ -181,7 +182,8 @@ impl LayoutManager { let prev_digest = layout.digest(); if layout.inner().update_trackers != *adv { - if layout.update(|l| l.update_trackers.merge(adv)) { + let changed = layout.update(|l| l.update_trackers.merge(adv)); + if changed { layout.update_update_trackers(self.node_id); assert!(layout.digest() != prev_digest); return Some(layout.inner().update_trackers.clone()); @@ -296,11 +298,11 @@ impl LayoutManager { adv.update_trackers ); - if adv.current().replication_factor != self.replication_factor.replication_factor() { + if adv.current().replication_factor() != self.replication_factor { let msg = format!( "Received a cluster layout from another node with replication factor {}, which is different from what we have in our configuration ({}). Discarding the cluster layout we received.", - adv.current().replication_factor, - self.replication_factor.replication_factor() + adv.current().replication_factor(), + self.replication_factor, ); error!("{}", msg); return Err(Error::Message(msg)); @@ -368,7 +370,7 @@ impl Drop for WriteLock { let layout = self.layout_manager.layout(); // acquire read lock if let Some(counter) = layout.ack_lock.get(&self.layout_version) { let prev_lock = counter.fetch_sub(1, Ordering::Relaxed); - if prev_lock == 1 && layout.current().version > self.layout_version { + if prev_lock == 1 && layout.current().unwrap().version > self.layout_version { drop(layout); // release read lock, write lock will be acquired self.layout_manager.ack_new_version(); } diff --git a/src/rpc/layout/mod.rs b/src/rpc/layout/mod.rs index ce21a524..d18c603d 100644 --- a/src/rpc/layout/mod.rs +++ b/src/rpc/layout/mod.rs @@ -1,5 +1,3 @@ -use std::fmt; - use bytesize::ByteSize; use garage_util::crdt::{AutoCrdt, Crdt}; @@ -295,7 +293,7 @@ mod v010 { pub roles: LwwMap, } - /// The tracker of acknowlegments and data syncs around the cluster + /// The tracker of acknowledgments and data syncs around the cluster #[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq)] pub struct UpdateTrackers { /// The highest layout version number each node has ack'ed @@ -397,30 +395,6 @@ impl NodeRole { } } -impl fmt::Display for ZoneRedundancy { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ZoneRedundancy::Maximum => write!(f, "maximum"), - ZoneRedundancy::AtLeast(x) => write!(f, "{}", x), - } - } -} - -impl core::str::FromStr for ZoneRedundancy { - type Err = &'static str; - fn from_str(s: &str) -> Result { - match s { - "none" | "max" | "maximum" => Ok(ZoneRedundancy::Maximum), - x => { - let v = x - .parse::() - .map_err(|_| "zone redundancy must be 'none'/'max' or an integer")?; - Ok(ZoneRedundancy::AtLeast(v)) - } - } - } -} - impl UpdateTracker { fn merge(&mut self, other: &UpdateTracker) -> bool { let mut changed = false; @@ -455,7 +429,7 @@ impl UpdateTracker { } } - pub fn min_among(&self, storage_nodes: &[Uuid], min_version: u64) -> u64 { + fn min_among(&self, storage_nodes: &[Uuid], min_version: u64) -> u64 { storage_nodes .iter() .map(|x| self.get(x, min_version)) diff --git a/src/rpc/layout/test.rs b/src/rpc/layout/test.rs index 5462160b..e6505d87 100644 --- a/src/rpc/layout/test.rs +++ b/src/rpc/layout/test.rs @@ -35,8 +35,8 @@ fn check_against_naive(cl: &LayoutVersion) -> Result { zone_token.insert(z.clone(), 0); } for uuid in cl.nongateway_nodes() { - let z = cl.expect_get_node_zone(&uuid); - let c = cl.expect_get_node_capacity(&uuid); + let z = cl.expect_get_node_zone(uuid); + let c = cl.expect_get_node_capacity(uuid); zone_token.insert( z.to_string(), zone_token[z] + min(NB_PARTITIONS, (c / over_size) as usize), @@ -77,7 +77,7 @@ fn check_against_naive(cl: &LayoutVersion) -> Result { } } - return Ok(false); + Ok(false) } fn show_msg(msg: &Message) { @@ -124,7 +124,7 @@ fn test_assignment() { let mut cl = LayoutHistory::new(ReplicationFactor::new(3).unwrap()); update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3); let v = cl.current().version; - let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap(); + let (mut cl, msg) = cl.apply_staged_changes(v + 1).unwrap(); show_msg(&msg); assert_eq!(cl.check(), Ok(())); assert!(check_against_naive(cl.current()).unwrap()); @@ -133,7 +133,7 @@ fn test_assignment() { node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"]; update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 2); let v = cl.current().version; - let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap(); + let (mut cl, msg) = cl.apply_staged_changes(v + 1).unwrap(); show_msg(&msg); assert_eq!(cl.check(), Ok(())); assert!(check_against_naive(cl.current()).unwrap()); @@ -141,7 +141,7 @@ fn test_assignment() { node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000]; update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3); let v = cl.current().version; - let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap(); + let (mut cl, msg) = cl.apply_staged_changes(v + 1).unwrap(); show_msg(&msg); assert_eq!(cl.check(), Ok(())); assert!(check_against_naive(cl.current()).unwrap()); @@ -151,7 +151,7 @@ fn test_assignment() { ]; update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 1); let v = cl.current().version; - let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap(); + let (cl, msg) = cl.apply_staged_changes(v + 1).unwrap(); show_msg(&msg); assert_eq!(cl.check(), Ok(())); assert!(check_against_naive(cl.current()).unwrap()); diff --git a/src/rpc/layout/version.rs b/src/rpc/layout/version.rs index a02fce89..f307f86b 100644 --- a/src/rpc/layout/version.rs +++ b/src/rpc/layout/version.rs @@ -11,12 +11,13 @@ use garage_util::error::*; use super::graph_algo::*; use super::*; +use crate::replication_mode::*; // The Message type will be used to collect information on the algorithm. pub type Message = Vec; impl LayoutVersion { - pub fn new(replication_factor: usize) -> Self { + pub fn new(replication_factor: ReplicationFactor) -> Self { // We set the default zone redundancy to be Maximum, meaning that the maximum // possible value will be used depending on the cluster topology let parameters = LayoutParameters { @@ -25,7 +26,7 @@ impl LayoutVersion { LayoutVersion { version: 0, - replication_factor, + replication_factor: usize::from(replication_factor), partition_size: 0, roles: LwwMap::new(), node_id_vec: Vec::new(), @@ -114,26 +115,35 @@ impl LayoutVersion { } /// Return the n servers in which data for this hash should be replicated - pub fn nodes_of(&self, position: &Hash, n: usize) -> impl Iterator + '_ { - assert_eq!(n, self.replication_factor); - + pub fn nodes_of(&self, position: &Hash) -> impl Iterator + '_ { let data = &self.ring_assignment_data; - let partition_nodes = if data.len() == self.replication_factor * (1 << PARTITION_BITS) { - let partition_idx = self.partition_of(position) as usize; - let partition_start = partition_idx * self.replication_factor; - let partition_end = (partition_idx + 1) * self.replication_factor; - &data[partition_start..partition_end] - } else { - warn!("Ring not yet ready, read/writes will be lost!"); - &[] - }; + if data.len() != self.replication_factor * (1 << PARTITION_BITS) { + panic!(".nodes_of() called on invalid LayoutVersion (this is a bug)"); + } + + let partition_idx = self.partition_of(position) as usize; + let partition_start = partition_idx * self.replication_factor; + let partition_end = (partition_idx + 1) * self.replication_factor; + let partition_nodes = &data[partition_start..partition_end]; partition_nodes .iter() .map(move |i| self.node_id_vec[*i as usize]) } + pub fn replication_factor(&self) -> ReplicationFactor { + ReplicationFactor::new(self.replication_factor).unwrap() + } + + pub fn read_quorum(&self, consistency_mode: ConsistencyMode) -> usize { + self.replication_factor().read_quorum(consistency_mode) + } + + pub fn write_quorum(&self, consistency_mode: ConsistencyMode) -> usize { + self.replication_factor().write_quorum(consistency_mode) + } + // ===================== internal information extractors ====================== pub(crate) fn expect_get_node_capacity(&self, uuid: &Uuid) -> u64 { @@ -823,7 +833,7 @@ impl LayoutVersion { let total_cap_n = self.expect_get_node_capacity(&self.node_id_vec[*n]); let tags_n = (self.node_role(&self.node_id_vec[*n]).ok_or(""))?.tags_string(); table.push(format!( - " {:?}\t{}\t{} ({} new)\t{}\t{} ({:.1}%)", + " {:?}\t[{}]\t{} ({} new)\t{}\t{} ({:.1}%)", self.node_id_vec[*n], tags_n, stored_partitions[*n], diff --git a/src/rpc/replication_mode.rs b/src/rpc/replication_mode.rs index a3a94085..7bb91978 100644 --- a/src/rpc/replication_mode.rs +++ b/src/rpc/replication_mode.rs @@ -38,14 +38,10 @@ impl ReplicationFactor { } } - pub fn replication_factor(&self) -> usize { - self.0 - } - pub fn read_quorum(&self, consistency_mode: ConsistencyMode) -> usize { match consistency_mode { ConsistencyMode::Dangerous | ConsistencyMode::Degraded => 1, - ConsistencyMode::Consistent => self.replication_factor().div_ceil(2), + ConsistencyMode::Consistent => usize::from(*self).div_ceil(2), } } @@ -53,7 +49,7 @@ impl ReplicationFactor { match consistency_mode { ConsistencyMode::Dangerous => 1, ConsistencyMode::Degraded | ConsistencyMode::Consistent => { - (self.replication_factor() + 1) - self.read_quorum(ConsistencyMode::Consistent) + (usize::from(*self) + 1) - self.read_quorum(ConsistencyMode::Consistent) } } } @@ -65,30 +61,28 @@ impl std::convert::From for usize { } } +impl std::fmt::Display for ReplicationFactor { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(f) + } +} + pub fn parse_replication_mode( config: &Config, ) -> Result<(ReplicationFactor, ConsistencyMode), Error> { match (&config.replication_mode, config.replication_factor, config.consistency_mode.as_str()) { - (Some(replication_mode), None, "consistent") => { - tracing::warn!("Legacy config option replication_mode in use. Please migrate to replication_factor and consistency_mode"); - let parsed_replication_mode = match replication_mode.as_str() { - "1" | "none" => Some((ReplicationFactor(1), ConsistencyMode::Consistent)), - "2" => Some((ReplicationFactor(2), ConsistencyMode::Consistent)), - "2-dangerous" => Some((ReplicationFactor(2), ConsistencyMode::Dangerous)), - "3" => Some((ReplicationFactor(3), ConsistencyMode::Consistent)), - "3-degraded" => Some((ReplicationFactor(3), ConsistencyMode::Degraded)), - "3-dangerous" => Some((ReplicationFactor(3), ConsistencyMode::Dangerous)), - _ => None, - }; - Some(parsed_replication_mode.ok_or_message("Invalid replication_mode in config file.")?) - }, - (None, Some(replication_factor), consistency_mode) => { - let replication_factor = ReplicationFactor::new(replication_factor) - .ok_or_message("Invalid replication_factor in config file.")?; - let consistency_mode = ConsistencyMode::parse(consistency_mode) - .ok_or_message("Invalid consistency_mode in config file.")?; - Some((replication_factor, consistency_mode)) - } - _ => None, - }.ok_or_message("Either the legacy replication_mode or replication_level and consistency_mode can be set, not both.") + (Some(_replication_mode), _, _) => { + Err(Error::Message("The legacy replication_mode is no longer supported. Use replication_factor and consistency_mode instead.".into())) + } + (None, Some(replication_factor), consistency_mode) => { + let replication_factor = ReplicationFactor::new(replication_factor) + .ok_or_message("Invalid replication_factor in config file.")?; + let consistency_mode = ConsistencyMode::parse(consistency_mode) + .ok_or_message("Invalid consistency_mode in config file.")?; + Ok((replication_factor, consistency_mode)) + } + (None, None, _) => { + Err(Error::Message("The option replication_factor is required.".into())) + } + } } diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs index 2505c2ce..0f744f5b 100644 --- a/src/rpc/rpc_helper.rs +++ b/src/rpc/rpc_helper.rs @@ -162,7 +162,7 @@ impl RpcHelper { endpoint: &Endpoint, to: Uuid, msg: N, - strat: RequestStrategy<()>, + strategy: RequestStrategy<()>, ) -> Result where M: Rpc>, @@ -185,12 +185,12 @@ impl RpcHelper { let node_id = to.into(); let rpc_call = endpoint - .call_streaming(&node_id, msg, strat.rs_priority) + .call_streaming(&node_id, msg, strategy.rs_priority) .with_context(Context::current_with_span(span)) .record_duration(&self.0.metrics.rpc_duration, &metric_tags); let timeout = async { - match strat.rs_timeout { + match strategy.rs_timeout { Timeout::None => futures::future::pending().await, Timeout::Default => tokio::time::sleep(self.0.rpc_timeout).await, Timeout::Custom(t) => tokio::time::sleep(t).await, @@ -222,7 +222,7 @@ impl RpcHelper { endpoint: &Endpoint, to: &[Uuid], msg: N, - strat: RequestStrategy<()>, + strategy: RequestStrategy<()>, ) -> Result)>, Error> where M: Rpc>, @@ -237,7 +237,7 @@ impl RpcHelper { let resps = join_all( to.iter() - .map(|to| self.call(endpoint, *to, msg.clone(), strat.clone())), + .map(|to| self.call(endpoint, *to, msg.clone(), strategy.clone())), ) .with_context(Context::current_with_span(span)) .await; @@ -252,7 +252,7 @@ impl RpcHelper { &self, endpoint: &Endpoint, msg: N, - strat: RequestStrategy<()>, + strategy: RequestStrategy<()>, ) -> Result)>, Error> where M: Rpc>, @@ -266,7 +266,7 @@ impl RpcHelper { .iter() .map(|p| p.id.into()) .collect::>(); - self.call_many(endpoint, &to[..], msg, strat).await + self.call_many(endpoint, &to[..], msg, strategy).await } /// Make a RPC call to multiple servers, returning either a Vec of responses, @@ -336,16 +336,16 @@ impl RpcHelper { { // Once quorum is reached, other requests don't matter. // What we do here is only send the required number of requests - // to reach a quorum, priorizing nodes with the lowest latency. + // to reach a quorum, prioritizing nodes with the lowest latency. // When there are errors, we start new requests to compensate. // TODO: this could be made more aggressive, e.g. if after 2x the // average ping of a given request, the response is not yet received, // preemptively send an additional request to any remaining nodes. - // Reorder requests to priorize closeness / low latency + // Reorder requests to prioritize closeness / low latency let request_order = - self.request_order(&self.0.layout.read().unwrap().current(), to.iter().copied()); + self.request_order(self.0.layout.read().unwrap().current()?, to.iter().copied()); let send_all_at_once = strategy.rs_send_all_at_once.unwrap_or(false); // Build future for each request @@ -540,6 +540,8 @@ impl RpcHelper { // ---- functions not related to MAKING RPCs, but just determining to what nodes // they should be made and in which order ---- + #[expect(clippy::doc_overindented_list_items)] + #[expect(clippy::doc_lazy_continuation)] /// Determine to what nodes, and in what order, requests to read a data block /// should be sent. All nodes in the Vec returned by this function are tried /// one by one until there is one that returns the block (in block/manager.rs). @@ -558,7 +560,7 @@ impl RpcHelper { /// /// 1. ask first all nodes of all currently active layout versions /// -> ask the preferred node in all layout versions (older to newer), - /// then the second preferred onde in all verions, etc. + /// then the second preferred onde in all versions, etc. /// -> we start by the oldest active layout version first, because a majority /// of blocks will have been saved before the layout change /// 2. ask all nodes of historical layout versions, for blocks which have not @@ -567,28 +569,32 @@ impl RpcHelper { /// The preference order, for each layout version, is given by `request_order`, /// based on factors such as nodes being in the same datacenter, /// having low ping, etc. - pub fn block_read_nodes_of(&self, position: &Hash, rpc_helper: &RpcHelper) -> Vec { + pub fn block_read_nodes_of( + &self, + position: &Hash, + rpc_helper: &RpcHelper, + ) -> Result, Error> { let layout = self.0.layout.read().unwrap(); + let current_layout = layout.current()?; // Compute, for each layout version, the set of nodes that might store // the block, and put them in their preferred order as of `request_order`. - let mut vernodes = layout.versions().iter().map(|ver| { - let nodes = ver.nodes_of(position, ver.replication_factor); - rpc_helper.request_order(layout.current(), nodes) - }); + let mut vernodes = vec![]; + for ver in layout.versions()?.iter() { + let nodes = ver.nodes_of(position); + vernodes.push(rpc_helper.request_order(current_layout, nodes)) + } - let mut ret = if layout.versions().len() == 1 { + let mut ret = if vernodes.len() == 1 { // If we have only one active layout version, then these are the // only nodes we ask in step 1 - vernodes.next().unwrap() + vernodes.into_iter().next().unwrap() } else { - let vernodes = vernodes.collect::>(); - let mut nodes = Vec::::with_capacity(12); - for i in 0..layout.current().replication_factor { + for i in 0..current_layout.replication_factor { for vn in vernodes.iter() { if let Some(n) = vn.get(i) { - if !nodes.contains(&n) { + if !nodes.contains(n) { if *n == self.0.our_node_id { // it's always fast (almost free) to ask locally, // so always put that as first choice @@ -607,15 +613,15 @@ impl RpcHelper { // Second step: add nodes of older layout versions let old_ver_iter = layout.inner().old_versions.iter().rev(); for ver in old_ver_iter { - let nodes = ver.nodes_of(position, ver.replication_factor); - for node in rpc_helper.request_order(layout.current(), nodes) { + let nodes = ver.nodes_of(position); + for node in rpc_helper.request_order(current_layout, nodes) { if !ret.contains(&node) { ret.push(node); } } } - ret + Ok(ret) } fn request_order( @@ -631,8 +637,8 @@ impl RpcHelper { // The tuples are as follows: // (is another node?, is another zone?, latency, node ID, request future) // We store all of these tuples in a vec that we can sort. - // By sorting this vec, we priorize ourself, then nodes in the same zone, - // and within a same zone we priorize nodes with the lowest latency. + // By sorting this vec, we prioritize ourself, then nodes in the same zone, + // and within a same zone we prioritize nodes with the lowest latency. let mut nodes = nodes .map(|to| { let peer_zone = layout.get_node_zone(&to).unwrap_or(""); @@ -650,7 +656,7 @@ impl RpcHelper { }) .collect::>(); - // Sort requests by (priorize ourself, priorize same zone, priorize low latency) + // Sort requests by (prioritize ourself, prioritize same zone, prioritize low latency) nodes.sort_by_key(|(diffnode, diffzone, ping, _to)| (*diffnode, *diffzone, *ping)); nodes diff --git a/src/rpc/system.rs b/src/rpc/system.rs index 2a52ae5d..6adb13a1 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -45,7 +45,7 @@ const STATUS_EXCHANGE_INTERVAL: Duration = Duration::from_secs(10); /// Version tag used for version check upon Netapp connection. /// Cluster nodes with different version tags are deemed /// incompatible and will refuse to connect. -pub const GARAGE_VERSION_TAG: u64 = 0x6761726167650010; // garage 0x0010 (1.0) +pub const GARAGE_VERSION_TAG: u64 = 0x6761726167650020; // garage 0x0020 (2.0) /// RPC endpoint used for calls related to membership pub const SYSTEM_RPC_PATH: &str = "garage_rpc/system.rs/SystemRpc"; @@ -55,7 +55,7 @@ pub const SYSTEM_RPC_PATH: &str = "garage_rpc/system.rs/SystemRpc"; pub enum SystemRpc { /// Response to successful advertisements Ok, - /// Request to connect to a specific node (in @: format, pubkey = full-length node ID) + /// Request to connect to a specific node (in `@:` format, pubkey = full-length node ID) Connect(String), /// Advertise Garage status. Answered with another AdvertiseStatus. /// Exchanged with every node on a regular basis. @@ -124,6 +124,9 @@ pub struct NodeStatus { /// Hostname of the node pub hostname: Option, + /// Garage version of the node + pub garage_version: Option, + /// Replication factor configured on the node pub replication_factor: usize, @@ -369,6 +372,10 @@ impl System { &self.layout_manager.rpc_helper } + pub fn local_status(&self) -> NodeStatus { + self.local_status.read().unwrap().clone() + } + // ---- Administrative operations (directly available and // also available through RPC) ---- @@ -446,11 +453,28 @@ impl System { // Acquire a rwlock read-lock to the current cluster layout let layout = self.cluster_layout(); + let layout_versions = match layout.versions() { + Ok(v) => v, + Err(_) => { + // Layout not yet configured, special case + return ClusterHealth { + status: ClusterHealthStatus::Unavailable, + known_nodes: nodes.len(), + connected_nodes, + storage_nodes: 0, + storage_nodes_ok: 0, + partitions: 0, + partitions_quorum: 0, + partitions_all_ok: 0, + }; + } + }; + let current_layout = layout_versions.last().unwrap(); // Obtain information about nodes that have a role as storage nodes // in one of the active layout versions let mut storage_nodes = HashSet::::with_capacity(16); - for ver in layout.versions().iter() { + for ver in layout_versions.iter() { storage_nodes.extend( ver.roles .items() @@ -464,14 +488,11 @@ impl System { // Determine the number of partitions that have: // - a quorum of up nodes for all write sets (i.e. are available) // - for which all nodes in all write sets are up (i.e. are fully healthy) - let partitions = layout.current().partitions().collect::>(); + let partitions = current_layout.partitions().collect::>(); let mut partitions_quorum = 0; let mut partitions_all_ok = 0; for (_, hash) in partitions.iter() { - let mut write_sets = layout - .versions() - .iter() - .map(|x| x.nodes_of(hash, x.replication_factor)); + let mut write_sets = layout_versions.iter().map(|x| x.nodes_of(hash)); let has_quorum = write_sets .clone() .all(|set| set.filter(|x| node_up(x)).count() >= quorum); @@ -626,21 +647,37 @@ impl System { async fn discovery_loop(self: &Arc, mut stop_signal: watch::Receiver) { while !*stop_signal.borrow() { - let n_connected = self + let peers_up = self .peering .get_peer_list() .iter() .filter(|p| p.is_up()) - .count(); + .map(|p| Uuid::from(p.id)) + .collect::>(); - let not_configured = !self.cluster_layout().is_check_ok(); - let no_peers = n_connected < self.replication_factor.into(); - let expected_n_nodes = self.cluster_layout().all_nodes().len(); - let bad_peers = n_connected != expected_n_nodes; - - if not_configured || no_peers || bad_peers { - info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); + let do_bootstrap = match self.cluster_layout().all_nodes() { + Err(_) => { + debug!("doing bootstrap/discovery step (layout not configured)"); + true + } + Ok(all_nodes) => { + // Do bootstrap if we have fewer peers than the replication + // factor, + // or if some peers in the layout are not connected + let do_bootstrap = peers_up.len() < self.replication_factor.into() + || all_nodes.iter().any(|x| !peers_up.contains(x)); + if do_bootstrap { + debug!( + "doing bootstrap/discovery step (peers_up: {}, all_nodes: {})", + peers_up.len(), + all_nodes.len() + ); + } + do_bootstrap + } + }; + if do_bootstrap { let mut ping_list = resolve_peers(&self.bootstrap_peers).await; // Add peer list from list stored on disk @@ -683,12 +720,13 @@ impl System { } } - if !not_configured && !no_peers { - // If the layout is configured, and we already have some connections - // to other nodes in the cluster, we can skip trying to connect to - // nodes that are not in the cluster layout. - let layout = self.cluster_layout(); - ping_list.retain(|(id, _)| layout.all_nodes().contains(&(*id).into())); + if let Ok(all_nodes) = self.cluster_layout().all_nodes() { + if peers_up.len() >= self.replication_factor.into() { + // If the layout is configured, and we already have some connections + // to other nodes in the cluster, we can skip trying to connect to + // nodes that are not in the cluster layout. + ping_list.retain(|(id, _)| all_nodes.contains(&(*id).into())); + } } for (node_id, node_addr) in ping_list { @@ -786,6 +824,7 @@ impl NodeStatus { .into_string() .unwrap_or_else(|_| "".to_string()), ), + garage_version: Some(garage_util::version::garage_version().to_string()), replication_factor: replication_factor.into(), layout_digest: layout_manager.layout().digest(), meta_disk_avail: None, @@ -796,6 +835,7 @@ impl NodeStatus { fn unknown() -> Self { NodeStatus { hostname: None, + garage_version: None, replication_factor: 0, layout_digest: Default::default(), meta_disk_avail: None, @@ -816,6 +856,7 @@ impl NodeStatus { }; let mount_avail = |path: &Path| match statvfs(path) { + #[allow(clippy::unnecessary_cast)] Ok(x) => { let avail = x.blocks_available() as u64 * x.fragment_size() as u64; let total = x.blocks() as u64 * x.fragment_size() as u64; @@ -911,13 +952,13 @@ fn get_rpc_public_addr(config: &Config) -> Option { let filter_subnet: Option = config .rpc_public_addr_subnet .as_ref() - .and_then(|filter_subnet_str| match filter_subnet_str.parse::() { + .map(|filter_subnet_str| match filter_subnet_str.parse::() { Ok(filter_subnet) => { let filter_subnet_trunc = filter_subnet.trunc(); if filter_subnet_trunc != filter_subnet { warn!("`rpc_public_addr_subnet` changed after applying netmask, continuing with {}", filter_subnet.trunc()); } - Some(filter_subnet_trunc) + filter_subnet_trunc } Err(e) => { panic!( diff --git a/src/rpc/system_metrics.rs b/src/rpc/system_metrics.rs index a64daec8..937c6db2 100644 --- a/src/rpc/system_metrics.rs +++ b/src/rpc/system_metrics.rs @@ -68,7 +68,7 @@ impl SystemMetrics { let replication_factor = system.replication_factor; meter .u64_value_observer("garage_replication_factor", move |observer| { - observer.observe(replication_factor.replication_factor() as u64, &[]) + observer.observe(usize::from(replication_factor) as u64, &[]) }) .with_description("Garage replication factor setting") .init() @@ -216,10 +216,13 @@ impl SystemMetrics { .u64_value_observer("cluster_layout_node_connected", move |observer| { let layout = system.cluster_layout(); let nodes = system.get_known_nodes(); - for id in layout.all_nodes().iter() { + for id in layout.all_nodes().unwrap_or_default().iter() { let mut kv = vec![KeyValue::new("id", format!("{:?}", id))]; - if let Some(role) = - layout.current().roles.get(id).and_then(|r| r.0.as_ref()) + if let Some(role) = layout + .current() + .ok() + .and_then(|l| l.roles.get(id)) + .and_then(|r| r.0.as_ref()) { kv.push(KeyValue::new("role_zone", role.zone.clone())); match role.capacity { @@ -260,10 +263,13 @@ impl SystemMetrics { .u64_value_observer("cluster_layout_node_disconnected_time", move |observer| { let layout = system.cluster_layout(); let nodes = system.get_known_nodes(); - for id in layout.all_nodes().iter() { + for id in layout.all_nodes().unwrap_or_default().iter() { let mut kv = vec![KeyValue::new("id", format!("{:?}", id))]; - if let Some(role) = - layout.current().roles.get(id).and_then(|r| r.0.as_ref()) + if let Some(role) = layout + .current() + .ok() + .and_then(|l| l.roles.get(id)) + .and_then(|r| r.0.as_ref()) { kv.push(KeyValue::new("role_zone", role.zone.clone())); match role.capacity { diff --git a/src/table/Cargo.toml b/src/table/Cargo.toml index 478dbd18..3495380f 100644 --- a/src/table/Cargo.toml +++ b/src/table/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_table" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" diff --git a/src/table/data.rs b/src/table/data.rs index 1d0308ce..86bf7b93 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -66,6 +66,7 @@ impl TableData { store.clone(), merkle_tree.clone(), merkle_todo.clone(), + insert_queue.clone(), gc_todo.clone(), ); @@ -253,7 +254,7 @@ impl TableData { // any node of the partition is unavailable. let pk_hash = Hash::try_from(&tree_key[..32]).unwrap(); // TODO: this probably breaks when the layout changes - let nodes = self.replication.storage_nodes(&pk_hash); + let nodes = self.replication.storage_nodes(&pk_hash)?; if nodes.first() == Some(&self.system.id) { GcTodoEntry::new(tree_key, new_bytes_hash).save(&self.gc_todo)?; } @@ -367,6 +368,10 @@ impl TableData { } } + pub fn insert_queue_approximate_len(&self) -> Result { + Ok(self.insert_queue.approximate_len()?) + } + pub fn gc_todo_approximate_len(&self) -> Result { Ok(self.gc_todo.approximate_len()?) } diff --git a/src/table/gc.rs b/src/table/gc.rs index 1f30bd76..1ef7d471 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -153,7 +153,7 @@ impl TableGc { let mut partitions = HashMap::new(); for entry in entries { let pkh = Hash::try_from(&entry.key[..32]).unwrap(); - let mut nodes = self.data.replication.storage_nodes(&pkh); + let mut nodes = self.data.replication.storage_nodes(&pkh)?; nodes.retain(|x| *x != self.system.id); nodes.sort(); @@ -339,12 +339,11 @@ impl Worker for GcWorker { /// such entry in the db /// /// Format of an entry: -/// - key = 8 bytes: timestamp of tombstone -/// (used to implement GC delay) -/// n bytes: key in the main data table +/// - key = 8 bytes: timestamp of tombstone (used to implement GC delay) +/// n bytes: key in the main data table /// - value = hash of the table entry to delete (the tombstone) -/// for verification purpose, because we don't want to delete -/// things that aren't tombstones +/// for verification purpose, because we don't want to delete +/// things that aren't tombstones pub(crate) struct GcTodoEntry { tombstone_timestamp: u64, key: Vec, diff --git a/src/table/merkle.rs b/src/table/merkle.rs index 7ba1f007..a162b225 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -102,7 +102,7 @@ impl MerkleUpdater { partition: self .data .replication - .partition_of(&Hash::try_from(&k[0..32]).unwrap()), + .partition_of(&Hash::try_from(&k[0..32]).unwrap())?, prefix: vec![], }; self.data diff --git a/src/table/metrics.rs b/src/table/metrics.rs index 78593202..8d2047e2 100644 --- a/src/table/metrics.rs +++ b/src/table/metrics.rs @@ -7,6 +7,7 @@ pub struct TableMetrics { pub(crate) _table_size: ValueObserver, pub(crate) _merkle_tree_size: ValueObserver, pub(crate) _merkle_todo_len: ValueObserver, + pub(crate) _insert_queue_len: ValueObserver, pub(crate) _gc_todo_len: ValueObserver, pub(crate) get_request_counter: BoundCounter, @@ -26,6 +27,7 @@ impl TableMetrics { store: db::Tree, merkle_tree: db::Tree, merkle_todo: db::Tree, + insert_queue: db::Tree, gc_todo: db::Tree, ) -> Self { let meter = global::meter(table_name); @@ -72,6 +74,20 @@ impl TableMetrics { ) .with_description("Merkle tree updater TODO queue length") .init(), + _insert_queue_len: meter + .u64_value_observer( + "table.insert_queue_length", + move |observer| { + if let Ok(v) = insert_queue.approximate_len() { + observer.observe( + v as u64, + &[KeyValue::new("table_name", table_name)], + ); + } + }, + ) + .with_description("Table insert queue length") + .init(), _gc_todo_len: meter .u64_value_observer( "table.gc_todo_queue_length", diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs index 1e52bb47..f3ee51fe 100644 --- a/src/table/replication/fullcopy.rs +++ b/src/table/replication/fullcopy.rs @@ -1,8 +1,10 @@ use std::sync::Arc; +use std::time::Duration; use garage_rpc::layout::*; -use garage_rpc::system::System; +use garage_rpc::{replication_mode::ConsistencyMode, system::System}; use garage_util::data::*; +use garage_util::error::Error; use crate::replication::*; @@ -21,53 +23,96 @@ use crate::replication::*; pub struct TableFullReplication { /// The membership manager of this node pub system: Arc, + pub consistency_mode: ConsistencyMode, } impl TableReplication for TableFullReplication { - type WriteSets = Vec>; + type WriteSets = WriteLock>>; - fn storage_nodes(&self, _hash: &Hash) -> Vec { - let layout = self.system.cluster_layout(); - layout.current().all_nodes().to_vec() + // Do anti-entropy every 10 seconds. + // Compared to sharded tables, anti-entropy is much less costly as there is + // a single partition hash to exchange. + // Also, it's generally a much bigger problem for fullcopy tables to be out of sync. + const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10); + + fn storage_nodes(&self, _hash: &Hash) -> Result, Error> { + Ok(self.system.cluster_layout().all_nodes()?.to_vec()) } - fn read_nodes(&self, _hash: &Hash) -> Vec { - vec![self.system.id] + fn read_nodes(&self, _hash: &Hash) -> Result, Error> { + Ok(self + .system + .cluster_layout() + .read_version()? + .all_nodes() + .to_vec()) } - fn read_quorum(&self) -> usize { - 1 - } - - fn write_sets(&self, hash: &Hash) -> Self::WriteSets { - vec![self.storage_nodes(hash)] - } - fn write_quorum(&self) -> usize { - let nmembers = self.system.cluster_layout().current().all_nodes().len(); - - let max_faults = if nmembers > 1 { 1 } else { 0 }; - - if nmembers > max_faults { - nmembers - max_faults - } else { - 1 + fn read_quorum(&self) -> Result { + match self.consistency_mode { + ConsistencyMode::Dangerous | ConsistencyMode::Degraded => Ok(1), + ConsistencyMode::Consistent => { + let layout = self.system.cluster_layout(); + let nodes = layout.read_version()?.all_nodes(); + Ok(nodes.len().div_ceil(2)) + } } } - fn partition_of(&self, _hash: &Hash) -> Partition { - 0u16 + fn write_sets(&self, _hash: &Hash) -> Result { + self.system.layout_manager.write_lock_with(write_sets) + } + fn write_quorum(&self) -> Result { + match self.consistency_mode { + ConsistencyMode::Dangerous => Ok(1), + ConsistencyMode::Degraded | ConsistencyMode::Consistent => { + let layout = self.system.cluster_layout(); + let min_len = layout + .versions()? + .iter() + .map(|x| x.all_nodes().len()) + .min() + .unwrap(); + let max_quorum = layout + .versions()? + .iter() + .map(|x| x.all_nodes().len().div_euclid(2) + 1) + .max() + .unwrap(); + if min_len < max_quorum { + warn!("Write quorum will not be respected for TableFullReplication operations due to multiple active layout versions with vastly different number of nodes"); + Ok(std::cmp::max(1, min_len)) + } else { + Ok(max_quorum) + } + } + } } - fn sync_partitions(&self) -> SyncPartitions { + fn partition_of(&self, _hash: &Hash) -> Result { + Ok(0u16) + } + + fn sync_partitions(&self) -> Result { let layout = self.system.cluster_layout(); - let layout_version = layout.current().version; - SyncPartitions { + let layout_version = layout.ack_map_min(); + + let partitions = vec![SyncPartition { + partition: 0u16, + first_hash: [0u8; 32].into(), + last_hash: [0xff; 32].into(), + storage_sets: write_sets(layout.versions()?), + }]; + + Ok(SyncPartitions { layout_version, - partitions: vec![SyncPartition { - partition: 0u16, - first_hash: [0u8; 32].into(), - last_hash: [0xff; 32].into(), - storage_sets: vec![layout.current().all_nodes().to_vec()], - }], - } + partitions, + }) } } + +fn write_sets(layout_versions: &[LayoutVersion]) -> Vec> { + layout_versions + .iter() + .map(|x| x.all_nodes().to_vec()) + .collect() +} diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs index 3649fad3..0bdfebd0 100644 --- a/src/table/replication/parameters.rs +++ b/src/table/replication/parameters.rs @@ -1,31 +1,36 @@ +use std::time::Duration; + use garage_rpc::layout::*; use garage_util::data::*; +use garage_util::error::Error; /// Trait to describe how a table shall be replicated pub trait TableReplication: Send + Sync + 'static { type WriteSets: AsRef>> + AsMut>> + Send + Sync + 'static; + const ANTI_ENTROPY_INTERVAL: Duration; + // See examples in table_sharded.rs and table_fullcopy.rs // To understand various replication methods /// The entire list of all nodes that store a partition - fn storage_nodes(&self, hash: &Hash) -> Vec; + fn storage_nodes(&self, hash: &Hash) -> Result, Error>; /// Which nodes to send read requests to - fn read_nodes(&self, hash: &Hash) -> Vec; + fn read_nodes(&self, hash: &Hash) -> Result, Error>; /// Responses needed to consider a read successful - fn read_quorum(&self) -> usize; + fn read_quorum(&self) -> Result; /// Which nodes to send writes to - fn write_sets(&self, hash: &Hash) -> Self::WriteSets; + fn write_sets(&self, hash: &Hash) -> Result; /// Responses needed to consider a write successful in each set - fn write_quorum(&self) -> usize; + fn write_quorum(&self) -> Result; // Accessing partitions, for Merkle tree & sync /// Get partition for data with given hash - fn partition_of(&self, hash: &Hash) -> Partition; + fn partition_of(&self, hash: &Hash) -> Result; /// List of partitions and nodes to sync with in current layout - fn sync_partitions(&self) -> SyncPartitions; + fn sync_partitions(&self) -> Result; } #[derive(Debug)] diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs index e0245949..b8983a4a 100644 --- a/src/table/replication/sharded.rs +++ b/src/table/replication/sharded.rs @@ -1,9 +1,12 @@ use std::sync::Arc; +use std::time::Duration; use garage_rpc::layout::*; -use garage_rpc::system::System; +use garage_rpc::replication_mode::ConsistencyMode; use garage_util::data::*; +use garage_util::error::Error; +use crate::replication::sharded::manager::LayoutManager; use crate::replication::*; /// Sharded replication schema: @@ -15,54 +18,74 @@ use crate::replication::*; #[derive(Clone)] pub struct TableShardedReplication { /// The membership manager of this node - pub system: Arc, - /// How many time each data should be replicated - pub replication_factor: usize, - /// How many nodes to contact for a read, should be at most `replication_factor` - pub read_quorum: usize, - /// How many nodes to contact for a write, should be at most `replication_factor` - pub write_quorum: usize, + pub layout_manager: Arc, + pub consistency_mode: ConsistencyMode, } impl TableReplication for TableShardedReplication { + // Do anti-entropy every 10 minutes + const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60); + type WriteSets = WriteLock>>; - fn storage_nodes(&self, hash: &Hash) -> Vec { - self.system.cluster_layout().storage_nodes_of(hash) + fn storage_nodes(&self, hash: &Hash) -> Result, Error> { + let mut ret = vec![]; + for version in self.layout_manager.layout().versions()?.iter() { + ret.extend(version.nodes_of(hash)); + } + ret.sort(); + ret.dedup(); + Ok(ret) } - fn read_nodes(&self, hash: &Hash) -> Vec { - self.system.cluster_layout().read_nodes_of(hash) - } - fn read_quorum(&self) -> usize { - self.read_quorum + fn read_nodes(&self, hash: &Hash) -> Result, Error> { + Ok(self + .layout_manager + .layout() + .read_version()? + .nodes_of(hash) + .collect()) } - fn write_sets(&self, hash: &Hash) -> Self::WriteSets { - self.system.layout_manager.write_sets_of(hash) - } - fn write_quorum(&self) -> usize { - self.write_quorum + fn read_quorum(&self) -> Result { + Ok(self + .layout_manager + .layout() + .read_version()? + .read_quorum(self.consistency_mode)) } - fn partition_of(&self, hash: &Hash) -> Partition { - self.system.cluster_layout().current().partition_of(hash) + fn write_sets(&self, hash: &Hash) -> Result { + self.layout_manager + .write_lock_with(|lvs| write_sets(lvs, hash)) } - fn sync_partitions(&self) -> SyncPartitions { - let layout = self.system.cluster_layout(); + fn write_quorum(&self) -> Result { + Ok(self + .layout_manager + .layout() + .current()? + .write_quorum(self.consistency_mode)) + } + + fn partition_of(&self, hash: &Hash) -> Result { + Ok(self.layout_manager.layout().current()?.partition_of(hash)) + } + + fn sync_partitions(&self) -> Result { + let layout = self.layout_manager.layout(); + let layout_versions = layout.versions()?; let layout_version = layout.ack_map_min(); let mut partitions = layout - .current() + .current()? .partitions() .map(|(partition, first_hash)| { - let storage_sets = layout.storage_sets_of(&first_hash); SyncPartition { partition, first_hash, last_hash: [0u8; 32].into(), // filled in just after - storage_sets, + storage_sets: write_sets(layout_versions, &first_hash), } }) .collect::>(); @@ -75,9 +98,16 @@ impl TableReplication for TableShardedReplication { }; } - SyncPartitions { + Ok(SyncPartitions { layout_version, partitions, - } + }) } } + +fn write_sets(layout_versions: &[LayoutVersion], hash: &Hash) -> Vec> { + layout_versions + .iter() + .map(|x| x.nodes_of(hash).collect()) + .collect() +} diff --git a/src/table/schema.rs b/src/table/schema.rs index fc1a465e..999ee2c5 100644 --- a/src/table/schema.rs +++ b/src/table/schema.rs @@ -85,6 +85,9 @@ pub trait TableSchema: Send + Sync + 'static { /// (e.g. filter out deleted entries) type Filter: Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static; + /// A precondition that should be checked before some update operation + type Precondition: Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static; + /// Actions triggered by data changing in a table. If such actions /// include updates to the local database that should be applied /// atomically with the item update itself, a db transaction is @@ -100,4 +103,9 @@ pub trait TableSchema: Send + Sync + 'static { } fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool; + + fn matches_condition(local_entry: Option<&Self::E>, new_entry: &Self::E, condition: &Self::Precondition) -> bool { + let _ = (local_entry, new_entry, condition); + false + } } diff --git a/src/table/sync.rs b/src/table/sync.rs index 2d43b9fc..a4c558bb 100644 --- a/src/table/sync.rs +++ b/src/table/sync.rs @@ -27,9 +27,6 @@ use crate::merkle::*; use crate::replication::*; use crate::*; -// Do anti-entropy every 10 minutes -const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60); - pub struct TableSyncer { system: Arc, data: Arc>, @@ -118,7 +115,7 @@ impl TableSyncer { ); let mut result_tracker = QuorumSetResultTracker::new( &partition.storage_sets, - self.data.replication.write_quorum(), + self.data.replication.write_quorum()?, ); let mut sync_futures = result_tracker @@ -182,7 +179,7 @@ impl TableSyncer { } if !items.is_empty() { - let nodes = self.data.replication.storage_nodes(begin); + let nodes = self.data.replication.storage_nodes(begin)?; if nodes.contains(&self.system.id) { warn!( "({}) Interrupting offload as partitions seem to have changed", @@ -190,7 +187,7 @@ impl TableSyncer { ); break; } - if nodes.len() < self.data.replication.write_quorum() { + if nodes.len() < self.data.replication.write_quorum()? { return Err(Error::Message( "Not offloading as we don't have a quorum of nodes to write to." .to_string(), @@ -505,16 +502,22 @@ impl SyncWorker { } fn add_full_sync(&mut self) { - let mut partitions = self.syncer.data.replication.sync_partitions(); - info!( - "{}: Adding full sync for ack layout version {}", - F::TABLE_NAME, - partitions.layout_version - ); + match self.syncer.data.replication.sync_partitions() { + Ok(mut partitions) => { + debug!( + "{}: Adding full sync for ack layout version {}", + F::TABLE_NAME, + partitions.layout_version + ); - partitions.partitions.shuffle(&mut thread_rng()); - self.todo = Some(partitions); - self.next_full_sync = Instant::now() + ANTI_ENTROPY_INTERVAL; + partitions.partitions.shuffle(&mut thread_rng()); + self.todo = Some(partitions); + } + Err(e) => { + debug!("{}: Not adding full sync: {}", F::TABLE_NAME, e); + } + } + self.next_full_sync = Instant::now() + R::ANTI_ENTROPY_INTERVAL; } } @@ -556,7 +559,7 @@ impl Worker for SyncWorker { } if todo.partitions.is_empty() { - info!( + debug!( "{}: Completed full sync for ack layout version {}", F::TABLE_NAME, todo.layout_version diff --git a/src/table/table.rs b/src/table/table.rs index c96f4731..4f8719de 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -18,6 +18,7 @@ use garage_util::data::*; use garage_util::error::Error; use garage_util::metrics::RecordDuration; use garage_util::migrate::Migrate; +use garage_util::keyed_mutex::KeyedMutex; use garage_rpc::rpc_helper::QuorumSetResultTracker; use garage_rpc::system::System; @@ -40,6 +41,7 @@ pub struct Table { pub syncer: Arc>, gc: Arc>, endpoint: Arc, Self>>, + keyed_mutex: KeyedMutex<(Hash, Vec)>, } #[derive(Serialize, Deserialize)] @@ -59,6 +61,19 @@ pub(crate) enum TableRpc { }, Update(Vec>), + CompareUpdate(CompareUpdate) +} + +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct CompareUpdate { + condition: Precondition, + /// Nodes that were not contacted yet, and should be + node_ids: Vec, + /// number of success still required, when this reach zero, enough nodes + /// agree that we can persist the transaction + success_required: usize, + /// Actual value to store + value: Arc, } impl Rpc for TableRpc { @@ -89,6 +104,7 @@ impl Table { gc, syncer, endpoint, + keyed_mutex: KeyedMutex::new(), }); table.endpoint.set_handler(table.clone()); @@ -119,7 +135,7 @@ impl Table { async fn insert_internal(&self, e: &F::E) -> Result<(), Error> { let hash = e.partition_key().hash(); - let who = self.data.replication.write_sets(&hash); + let who = self.data.replication.write_sets(&hash)?; let e_enc = Arc::new(ByteBuf::from(e.encode()?)); let rpc = TableRpc::::Update(vec![e_enc]); @@ -131,7 +147,7 @@ impl Table { who.as_ref(), rpc, RequestStrategy::with_priority(PRIO_NORMAL) - .with_quorum(self.data.replication.write_quorum()), + .with_quorum(self.data.replication.write_quorum()?), ) .await?; @@ -180,7 +196,7 @@ impl Table { // a quorum of nodes has answered OK, then the insert has succeeded and // consistency properties (read-after-write) are preserved. - let quorum = self.data.replication.write_quorum(); + let quorum = self.data.replication.write_quorum()?; // Serialize all entries and compute the write sets for each of them. // In the case of sharded table replication, this also takes an "ack lock" @@ -193,7 +209,7 @@ impl Table { for entry in entries.into_iter() { let entry = entry.borrow(); let hash = entry.partition_key().hash(); - let mut write_sets = self.data.replication.write_sets(&hash); + let mut write_sets = self.data.replication.write_sets(&hash)?; for set in write_sets.as_mut().iter_mut() { // Sort nodes in each write sets to merge write sets with same // nodes but in possibly different orders @@ -309,7 +325,7 @@ impl Table { sort_key: &F::S, ) -> Result, Error> { let hash = partition_key.hash(); - let who = self.data.replication.read_nodes(&hash); + let who = self.data.replication.read_nodes(&hash)?; let rpc = TableRpc::::ReadEntry(partition_key.clone(), sort_key.clone()); let resps = self @@ -320,7 +336,7 @@ impl Table { &who, rpc, RequestStrategy::with_priority(PRIO_NORMAL) - .with_quorum(self.data.replication.read_quorum()), + .with_quorum(self.data.replication.read_quorum()?), ) .await?; @@ -397,7 +413,7 @@ impl Table { enumeration_order: EnumerationOrder, ) -> Result, Error> { let hash = partition_key.hash(); - let who = self.data.replication.read_nodes(&hash); + let who = self.data.replication.read_nodes(&hash)?; let rpc = TableRpc::::ReadRange { partition: partition_key.clone(), @@ -415,7 +431,7 @@ impl Table { &who, rpc, RequestStrategy::with_priority(PRIO_NORMAL) - .with_quorum(self.data.replication.read_quorum()), + .with_quorum(self.data.replication.read_quorum()?), ) .await?; @@ -482,6 +498,66 @@ impl Table { Ok(ret_vec) } + pub fn get_local( + self: &Arc, + partition_key: &F::P, + sort_key: &F::S, + ) -> Result, Error> { + let bytes = self.data.read_entry(partition_key, sort_key)?; + bytes.map(|b| self.data.decode_entry(&b)).transpose() + } + + async fn handle_compare_update(self: &Arc, update: &CompareUpdate) -> Result<(), Error> { + let mut update = update.clone(); + + let new_entry = self.data.decode_entry(update.value.as_slice())?; + let mutex_handle = self.keyed_mutex.lock((new_entry.partition_key().hash(), new_entry.sort_key().sort_key().to_vec())); + let local_value = self.get_local(new_entry.partition_key(), new_entry.sort_key())?; + if F::matches_condition(local_value.as_ref(), &new_entry, &update.condition) { + update.success_required -= 1; + } else { + // there is no point in maintaining a lock, the condition didn't hold for us + // it might still hold for enough nodes that this is a valid update though + drop(mutex_handle) + } + if update.success_required == 0 { + self.data.update_entry(update.value.as_slice())?; + let this = self.clone(); + tokio::spawn(async move { + this.system + .rpc_helper() + .try_call_many( + &this.endpoint, + &update.node_ids, + TableRpc::::Update(vec![update.value]), + RequestStrategy::with_priority(PRIO_NORMAL), + ).await + }); + return Ok(()) + } else { + // the node that called us thought this could succeed, + // the only way we don't do a single loop iteration + // is the condition was false for us, so set that as default + // exit condition + let mut last_error = Error::PreconditionFailed; + while update.node_ids.len() >= update.success_required { + let next_node = update.node_ids.pop().unwrap(/* node_ids >= success_required > 0, pop always succeed*/); + match self.system.rpc_helper().call(&self.endpoint, next_node, TableRpc::::CompareUpdate(update.clone()), RequestStrategy::with_priority(PRIO_NORMAL)).await { + Ok(_) => { + self.data.update_entry(update.value.as_slice())?; + }, + Err(Error::PreconditionFailed) => { + return Err(Error::PreconditionFailed); + } + Err(e) => { + last_error = e; + }, + } + } + return Err(last_error) + } + } + // =============== UTILITY FUNCTION FOR CLIENT OPERATIONS =============== async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> { @@ -530,6 +606,10 @@ impl EndpointHandler> for Table self.data.update_many(pairs)?; Ok(TableRpc::Ok) } + TableRpc::CompareUpdate(compare_update) => { + self.handle_compare_update(compare_update).await?; + Ok(TableRpc::Ok) + } m => Err(Error::unexpected_rpc_message(m)), } } diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml index 46fa6590..48d39872 100644 --- a/src/util/Cargo.toml +++ b/src/util/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_util" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" diff --git a/src/util/background/mod.rs b/src/util/background/mod.rs index 607cd7a3..1fc70d19 100644 --- a/src/util/background/mod.rs +++ b/src/util/background/mod.rs @@ -6,7 +6,6 @@ pub mod worker; use std::collections::HashMap; use std::sync::Arc; -use serde::{Deserialize, Serialize}; use tokio::sync::{mpsc, watch}; use worker::WorkerProcessor; @@ -18,7 +17,7 @@ pub struct BackgroundRunner { worker_info: Arc>>, } -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Debug)] pub struct WorkerInfo { pub name: String, pub status: WorkerStatus, @@ -30,7 +29,7 @@ pub struct WorkerInfo { /// WorkerStatus is a struct returned by the worker with a bunch of canonical /// fields to indicate their status to CLI users. All fields are optional. -#[derive(Clone, Serialize, Deserialize, Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct WorkerStatus { pub tranquility: Option, pub progress: Option, @@ -69,7 +68,6 @@ impl BackgroundRunner { { self.send_worker .send(Box::new(worker)) - .ok() .expect("Could not put worker in queue"); } } diff --git a/src/util/background/worker.rs b/src/util/background/worker.rs index 3c938b7e..9028a052 100644 --- a/src/util/background/worker.rs +++ b/src/util/background/worker.rs @@ -6,7 +6,6 @@ use async_trait::async_trait; use futures::future::*; use futures::stream::FuturesUnordered; use futures::StreamExt; -use serde::{Deserialize, Serialize}; use tokio::select; use tokio::sync::{mpsc, watch}; @@ -18,7 +17,7 @@ use crate::time::now_msec; // will be interrupted in the middle of whatever they are doing. const EXIT_DEADLINE: Duration = Duration::from_secs(8); -#[derive(PartialEq, Copy, Clone, Serialize, Deserialize, Debug)] +#[derive(PartialEq, Copy, Clone, Debug)] pub enum WorkerState { Busy, Throttled(f32), @@ -26,17 +25,6 @@ pub enum WorkerState { Done, } -impl std::fmt::Display for WorkerState { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WorkerState::Busy => write!(f, "Busy"), - WorkerState::Throttled(_) => write!(f, "Busy*"), - WorkerState::Idle => write!(f, "Idle"), - WorkerState::Done => write!(f, "Done"), - } - } -} - #[async_trait] pub trait Worker: Send { fn name(&self) -> String; @@ -115,39 +103,32 @@ impl WorkerProcessor { trace!("{} (TID {}): {:?}", worker.worker.name(), worker.task_id, worker.state); // Save worker info - { - let mut wi = self.worker_info.lock().unwrap(); - match wi.get_mut(&worker.task_id) { - Some(i) => { - i.state = worker.state; - i.status = worker.worker.status(); - i.errors = worker.errors; - i.consecutive_errors = worker.consecutive_errors; - if worker.last_error.is_some() { - i.last_error = worker.last_error.take(); - } - } - None => { - wi.insert(worker.task_id, WorkerInfo { - name: worker.worker.name(), - state: worker.state, - status: worker.worker.status(), - errors: worker.errors, - consecutive_errors: worker.consecutive_errors, - last_error: worker.last_error.take(), - }); + let mut wi = self.worker_info.lock().unwrap(); + match wi.get_mut(&worker.task_id) { + Some(i) => { + i.state = worker.state; + i.status = worker.worker.status(); + i.errors = worker.errors; + i.consecutive_errors = worker.consecutive_errors; + if worker.last_error.is_some() { + i.last_error = worker.last_error.take(); } } + None => { + wi.insert(worker.task_id, WorkerInfo { + name: worker.worker.name(), + state: worker.state, + status: worker.worker.status(), + errors: worker.errors, + consecutive_errors: worker.consecutive_errors, + last_error: worker.last_error.take(), + }); + } } if worker.state == WorkerState::Done { info!("Worker {} (TID {}) exited", worker.worker.name(), worker.task_id); } else { - // Yield to the Tokio scheduler between consecutive Busy steps so - // that a worker which never suspends on its own cannot starve other tasks. - if worker.state == WorkerState::Busy { - tokio::task::yield_now().await; - } workers.push(async move { worker.step().await; worker diff --git a/src/util/config.rs b/src/util/config.rs index eb889ebe..bc476a35 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -215,6 +215,9 @@ pub struct AdminConfig { pub metrics_token: Option, /// File to read metrics token from pub metrics_token_file: Option, + /// Whether to require an access token for accessing the metrics endpoint + #[serde(default)] + pub metrics_require_token: bool, /// Bearer token to use to access Admin API endpoints pub admin_token: Option, @@ -259,6 +262,8 @@ pub struct ConsulDiscoveryConfig { /// Additional service metadata to add #[serde(default)] pub meta: Option>, + #[serde(default)] + pub datacenters: Vec, } #[derive(Deserialize, Debug, Clone)] diff --git a/src/util/crdt/crdt.rs b/src/util/crdt/crdt.rs index fdf63084..f2cfd464 100644 --- a/src/util/crdt/crdt.rs +++ b/src/util/crdt/crdt.rs @@ -26,14 +26,14 @@ pub trait Crdt { fn merge(&mut self, other: &Self); } -/// Option implements Crdt for any type T, even if T doesn't implement CRDT itself: when +/// `Option` implements Crdt for any type T, even if T doesn't implement CRDT itself: when /// different values are detected, they are always merged to None. This can be used for value /// types which shoulnd't be merged, instead of trying to merge things when we know we don't want /// to merge them (which is what the AutoCrdt trait is used for most of the time). This cases /// arises very often, for example with a Lww or a LwwMap: the value type has to be a CRDT so that /// we have a rule for what to do when timestamps aren't enough to disambiguate (in a distributed /// system, anything can happen!), and with AutoCrdt the rule is to make an arbitrary (but -/// deterministic) choice between the two. When using an Option instead with this impl, ambiguity +/// deterministic) choice between the two. When using an `Option` instead with this impl, ambiguity /// cases are explicitly stored as None, which allows us to detect the ambiguity and handle it in /// the way we want. (this can only work if we are happy with losing the value when an ambiguity /// arises) diff --git a/src/util/crdt/deletable.rs b/src/util/crdt/deletable.rs index e771aceb..0594d850 100644 --- a/src/util/crdt/deletable.rs +++ b/src/util/crdt/deletable.rs @@ -9,6 +9,16 @@ pub enum Deletable { Deleted, } +impl Deletable { + /// Map value, used for migrations + pub fn map U>(self, f: F) -> Deletable { + match self { + Self::Present(x) => Deletable::::Present(f(x)), + Self::Deleted => Deletable::::Deleted, + } + } +} + impl Deletable { /// Create a new deletable object that isn't deleted pub fn present(v: T) -> Self { diff --git a/src/util/crdt/lww.rs b/src/util/crdt/lww.rs index 80747406..f8b03b85 100644 --- a/src/util/crdt/lww.rs +++ b/src/util/crdt/lww.rs @@ -43,6 +43,16 @@ pub struct Lww { v: T, } +impl Lww { + /// Map value, used for migrations + pub fn map U>(self, f: F) -> Lww { + Lww:: { + ts: self.ts, + v: f(self.v), + } + } +} + impl Lww where T: Crdt, diff --git a/src/util/data.rs b/src/util/data.rs index 1fe7dfe0..640e58c9 100644 --- a/src/util/data.rs +++ b/src/util/data.rs @@ -90,11 +90,11 @@ impl FixedBytes32 { if *byte == u8::MAX { *byte = 0; } else { - *byte = *byte + 1; + *byte += 1; return Some(ret); } } - return None; + None } } diff --git a/src/util/encode.rs b/src/util/encode.rs index c6815d49..3d989b7e 100644 --- a/src/util/encode.rs +++ b/src/util/encode.rs @@ -18,7 +18,7 @@ where /// data formats) pub fn nonversioned_decode(bytes: &[u8]) -> Result where - T: for<'de> Deserialize<'de> + ?Sized, + T: for<'de> Deserialize<'de>, { rmp_serde::decode::from_slice::<_>(bytes) } diff --git a/src/util/error.rs b/src/util/error.rs index 170d2687..26377ab9 100644 --- a/src/util/error.rs +++ b/src/util/error.rs @@ -54,6 +54,9 @@ pub enum Error { #[error("Timeout")] Timeout, + #[error("Layout not ready")] + LayoutNotReady, + #[error("Could not reach quorum of {0} (sets={1:?}). {2} of {3} request succeeded, others returned errors: {4:?}")] Quorum(usize, Option, usize, usize, Vec), @@ -68,6 +71,9 @@ pub enum Error { #[error("{0}")] Message(String), + + #[error("Precondition failed")] + PreconditionFailed, } impl Error { diff --git a/src/util/keyed_mutex.rs b/src/util/keyed_mutex.rs new file mode 100644 index 00000000..824f1b89 --- /dev/null +++ b/src/util/keyed_mutex.rs @@ -0,0 +1,41 @@ +use std::collections::HashSet; +use std::hash::Hash; + +use tokio::sync::watch::Sender as WatchSender; + +pub struct KeyedMutex { + state: WatchSender>, +} + +impl KeyedMutex { + pub fn new() -> Self { + KeyedMutex { + state: WatchSender::new(HashSet::new()), + } + } + + pub async fn lock(&self, key: K) -> LockGuard<'_, K> { + let mut receiver = self.state.subscribe(); + loop { + if self.state.send_if_modified(|set| set.insert(key.clone())) { + return LockGuard { + lock: self, + key, + } + } + // this can't error because we still hold a sender + let _ = receiver.wait_for(|set| !set.contains(&key)).await; + } + } +} + +pub struct LockGuard<'a, K: Hash + Eq > { + lock: &'a KeyedMutex, + key: K, +} + +impl<'a, K: Hash + Eq> Drop for LockGuard<'a, K> { + fn drop(&mut self) { + self.lock.state.send_modify(|set| assert!(set.remove(&self.key), "unlocked mutex that wasn't locked")) + } +} diff --git a/src/util/lib.rs b/src/util/lib.rs index 8b035ff0..81c673b4 100644 --- a/src/util/lib.rs +++ b/src/util/lib.rs @@ -10,6 +10,7 @@ pub mod data; pub mod encode; pub mod error; pub mod forwarded_headers; +pub mod keyed_mutex; pub mod metrics; pub mod migrate; pub mod persister; diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml index e0cb317f..ba0ecc04 100644 --- a/src/web/Cargo.toml +++ b/src/web/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage_web" -version = "1.3.1" +version = "2.2.0" authors = ["Alex Auvolat ", "Quentin Dufour "] edition = "2018" license = "AGPL-3.0" diff --git a/src/web/web_server.rs b/src/web/web_server.rs index ea02ab0f..f50e4ca7 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -25,12 +25,14 @@ use garage_api_common::cors::{ }; use garage_api_common::generic_server::{server_loop, UnixListenerOn}; use garage_api_common::helpers::*; +use garage_api_s3::api_server::ResBody; use garage_api_s3::error::{ CommonErrorDerivative, Error as ApiError, OkOrBadRequest, OkOrInternalError, }; use garage_api_s3::get::{handle_get_without_ctx, handle_head_without_ctx}; use garage_api_s3::website::X_AMZ_WEBSITE_REDIRECT_LOCATION; +use garage_model::bucket_table::{self, RoutingRule}; use garage_model::garage::Garage; use garage_table::*; @@ -153,8 +155,8 @@ impl WebServer { .span_builder(format!("Web {} request", req.method())) .with_trace_id(gen_trace_id()) .with_attributes(vec![ - KeyValue::new("host", format!("{}", host_header.clone())), - KeyValue::new("method", format!("{}", req.method())), + KeyValue::new("host", host_header.clone()), + KeyValue::new("method", req.method().to_string()), KeyValue::new("uri", req.uri().to_string()), ]) .start(&tracer); @@ -260,45 +262,71 @@ impl WebServer { // Get path let path = req.uri().path().to_string(); let index = &website_config.index_document; - let (key, may_redirect) = path_to_keys(&path, index)?; + let routing_result = path_to_keys(&path, index, &website_config.routing_rules)?; debug!( - "Selected bucket: \"{}\" {:?}, target key: \"{}\", may redirect to: {:?}", - bucket_name, bucket_id, key, may_redirect + "Selected bucket: \"{}\" {:?}, routing to {:?}", + bucket_name, bucket_id, routing_result, ); - let ret_doc = match *req.method() { - Method::OPTIONS => handle_options_for_bucket(req, &bucket_params) + let ret_doc = match (req.method(), routing_result.main_target()) { + (&Method::OPTIONS, _) => handle_options_for_bucket(req, &bucket_params) .map_err(ApiError::from) .map(|res| res.map(|_empty_body: EmptyBody| empty_body())), - Method::HEAD => { - handle_head_without_ctx(self.garage.clone(), req, bucket_id, &key, None).await + (_, Err((url, code))) => Ok(Response::builder() + .status(code) + .header("Location", url) + .body(empty_body()) + .unwrap()), + (_, Ok((key, code))) => { + handle_inner(self.garage.clone(), req, bucket_id, key, code).await } - Method::GET => { - handle_get_without_ctx( + }; + + // Try handling errors if bucket configuration provided fallbacks + let ret_doc_with_redir = match (&ret_doc, &routing_result) { + ( + Err(ApiError::NoSuchKey), + RoutingResult::LoadOrRedirect { + redirect_if_exists, + redirect_url, + redirect_code, + .. + }, + ) => { + let redirect = if let Some(redirect_key) = redirect_if_exists { + self.check_key_exists(bucket_id, redirect_key.as_str()) + .await? + } else { + true + }; + if redirect { + Ok(Response::builder() + .status(redirect_code) + .header("Location", redirect_url) + .body(empty_body()) + .unwrap()) + } else { + ret_doc + } + } + ( + Err(ApiError::NoSuchKey), + RoutingResult::LoadOrAlternativeError { + redirect_key, + redirect_code, + .. + }, + ) => { + handle_inner( self.garage.clone(), req, bucket_id, - &key, - None, - Default::default(), + redirect_key, + *redirect_code, ) .await } - _ => Err(ApiError::bad_request("HTTP method not supported")), - }; - - // Try implicit redirect on error - let ret_doc_with_redir = match (&ret_doc, may_redirect) { - (Err(ApiError::NoSuchKey), ImplicitRedirect::To { key, url }) - if self.check_key_exists(bucket_id, key.as_str()).await? => - { - Ok(Response::builder() - .status(StatusCode::FOUND) - .header(LOCATION, url) - .body(empty_body()) - .unwrap()) - } (Ok(ret), _) if ret.headers().contains_key(X_AMZ_WEBSITE_REDIRECT_LOCATION) => { let redirect_location = ret.headers().get(X_AMZ_WEBSITE_REDIRECT_LOCATION).unwrap(); Ok(Response::builder() @@ -332,17 +360,17 @@ impl WebServer { // We want to return the error document // Create a fake HTTP request with path = the error document let req2 = Request::builder() + .method("GET") .uri(format!("http://{}/{}", host, &error_document)) .body(()) .unwrap(); - match handle_get_without_ctx( + match handle_inner( self.garage.clone(), &req2, bucket_id, &error_document, - None, - Default::default(), + error.http_status_code(), ) .await { @@ -357,8 +385,6 @@ impl WebServer { error ); - *error_doc.status_mut() = error.http_status_code(); - // Preserve error message in a special header for error_line in error.to_string().split('\n') { if let Ok(v) = HeaderValue::from_bytes(error_line.as_bytes()) { @@ -389,6 +415,52 @@ impl WebServer { } } +async fn handle_inner( + garage: Arc, + req: &Request<()>, + bucket_id: Uuid, + key: &str, + status_code: StatusCode, +) -> Result, ApiError> { + if status_code != StatusCode::OK { + // If we are returning an error document, discard all headers from + // the original request that would have influenced the result: + // - Range header, we don't want to return a subrange of the error document + // - Caching directives such as If-None-Match, etc, which are not relevant + let cleaned_req = Request::builder().uri(req.uri()).body(()).unwrap(); + + let mut ret = match *req.method() { + Method::HEAD => { + handle_head_without_ctx(garage, &cleaned_req, bucket_id, key, None).await? + } + Method::GET => { + handle_get_without_ctx( + garage, + &cleaned_req, + bucket_id, + key, + None, + Default::default(), + ) + .await? + } + _ => return Err(ApiError::bad_request("HTTP method not supported")), + }; + + *ret.status_mut() = status_code; + + Ok(ret) + } else { + match *req.method() { + Method::HEAD => handle_head_without_ctx(garage, req, bucket_id, key, None).await, + Method::GET => { + handle_get_without_ctx(garage, req, bucket_id, key, None, Default::default()).await + } + _ => Err(ApiError::bad_request("HTTP method not supported")), + } + } +} + fn error_to_res(e: Error) -> Response> { // If we are here, it is either that: // - there was an error before trying to get the requested URL @@ -425,9 +497,44 @@ fn error_to_res(e: Error) -> Response> { } #[derive(Debug, PartialEq)] -enum ImplicitRedirect { - No, - To { key: String, url: String }, +enum RoutingResult { + // Load a key and use `code` as status, or fallback to normal 404 handler if not found + LoadKey { + key: String, + code: StatusCode, + }, + // Load a key and use `200` as status, or fallback with a redirection using `redirect_code` + // as status + LoadOrRedirect { + key: String, + redirect_if_exists: Option, + redirect_url: String, + redirect_code: StatusCode, + }, + // Load a key and use `200` as status, or fallback by loading a different key and use + // `redirect_code` as status + LoadOrAlternativeError { + key: String, + redirect_key: String, + redirect_code: StatusCode, + }, + // Send an http redirect with `code` as status + Redirect { + url: String, + code: StatusCode, + }, +} + +impl RoutingResult { + // return Ok((key_to_deref, status_code)) or Err((redirect_target, status_code)) + fn main_target(&self) -> Result<(&str, StatusCode), (&str, StatusCode)> { + match self { + RoutingResult::LoadKey { key, code } => Ok((key, *code)), + RoutingResult::LoadOrRedirect { key, .. } => Ok((key, StatusCode::OK)), + RoutingResult::LoadOrAlternativeError { key, .. } => Ok((key, StatusCode::OK)), + RoutingResult::Redirect { url, code } => Err((url, *code)), + } + } } /// Path to key @@ -436,36 +543,155 @@ enum ImplicitRedirect { /// When a path ends with "/", we append the index name to match traditional web server behavior /// which is also AWS S3 behavior. /// -/// Check: https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html -fn path_to_keys<'a>(path: &'a str, index: &str) -> Result<(String, ImplicitRedirect), Error> { +/// Check: +fn path_to_keys( + path: &str, + index: &str, + routing_rules: &[RoutingRule], +) -> Result { let path_utf8 = percent_encoding::percent_decode_str(path).decode_utf8()?; let base_key = match path_utf8.strip_prefix("/") { Some(bk) => bk, None => return Err(Error::BadRequest("Path must start with a / (slash)".into())), }; - let is_bucket_root = base_key.len() == 0; + + let is_bucket_root = base_key.is_empty(); let is_trailing_slash = path_utf8.ends_with("/"); - match (is_bucket_root, is_trailing_slash) { - // It is not possible to store something at the root of the bucket (ie. empty key), - // the only option is to fetch the index - (true, _) => Ok((index.to_string(), ImplicitRedirect::No)), + let key = if is_bucket_root || is_trailing_slash { + // we can't store anything at the root, so we need to query the index + // if the key end with a slash, we always query the index + format!("{base_key}{index}") + } else { + // if the key doesn't end with `/`, leave it unmodified + base_key.to_string() + }; - // "If you create a folder structure in your bucket, you must have an index document at each level. In each folder, the index document must have the same name, for example, index.html. When a user specifies a URL that resembles a folder lookup, the presence or absence of a trailing slash determines the behavior of the website. For example, the following URL, with a trailing slash, returns the photos/index.html index document." - (false, true) => Ok((format!("{base_key}{index}"), ImplicitRedirect::No)), + let mut routing_rules_iter = routing_rules.iter(); + let key = loop { + let Some(routing_rule) = routing_rules_iter.next() else { + break key; + }; - // "However, if you exclude the trailing slash from the preceding URL, Amazon S3 first looks for an object photos in the bucket. If the photos object is not found, it searches for an index document, photos/index.html. If that document is found, Amazon S3 returns a 302 Found message and points to the photos/ key. For subsequent requests to photos/, Amazon S3 returns photos/index.html. If the index document is not found, Amazon S3 returns an error." - (false, false) => Ok(( - base_key.to_string(), - ImplicitRedirect::To { - key: format!("{base_key}/{index}"), - url: format!("{path}/"), - }, - )), + let Ok(status_code) = StatusCode::from_u16(routing_rule.redirect.http_redirect_code) else { + continue; + }; + if let Some(condition) = &routing_rule.condition { + let suffix = if let Some(prefix) = &condition.prefix { + let Some(suffix) = base_key.strip_prefix(prefix) else { + continue; + }; + Some(suffix) + } else { + None + }; + let mut target = compute_redirect_target(&routing_rule.redirect, suffix); + let query_alternative_key = + status_code == StatusCode::OK || status_code == StatusCode::NOT_FOUND; + let redirect_on_error = + condition.http_error_code == Some(StatusCode::NOT_FOUND.as_u16()); + match (query_alternative_key, redirect_on_error) { + (false, false) => { + return Ok(RoutingResult::Redirect { + url: target, + code: status_code, + }) + } + (true, false) => { + // we need to remove the leading / + target.remove(0); + if status_code == StatusCode::OK { + break target; + } else { + return Ok(RoutingResult::LoadKey { + key: target, + code: status_code, + }); + } + } + (false, true) => { + return Ok(RoutingResult::LoadOrRedirect { + key, + redirect_if_exists: None, + redirect_url: target, + redirect_code: status_code, + }); + } + (true, true) => { + target.remove(0); + return Ok(RoutingResult::LoadOrAlternativeError { + key, + redirect_key: target, + redirect_code: status_code, + }); + } + } + } else { + let target = compute_redirect_target(&routing_rule.redirect, None); + return Ok(RoutingResult::Redirect { + url: target, + code: status_code, + }); + } + }; + + if is_bucket_root || is_trailing_slash { + Ok(RoutingResult::LoadKey { + key, + code: StatusCode::OK, + }) + } else { + Ok(RoutingResult::LoadOrRedirect { + redirect_if_exists: Some(format!("{key}/{index}")), + // we can't use `path` because key might have changed substantially in case of + // routing rules + redirect_url: percent_encoding::percent_encode( + format!("/{key}/").as_bytes(), + PATH_ENCODING_SET, + ) + .to_string(), + key, + redirect_code: StatusCode::FOUND, + }) } } +// per https://url.spec.whatwg.org/#path-percent-encode-set +const PATH_ENCODING_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS + .add(b' ') + .add(b'"') + .add(b'#') + .add(b'<') + .add(b'>') + .add(b'?') + .add(b'`') + .add(b'{') + .add(b'}'); + +fn compute_redirect_target(redirect: &bucket_table::Redirect, suffix: Option<&str>) -> String { + let mut res = String::new(); + if let Some(hostname) = &redirect.hostname { + if let Some(protocol) = &redirect.protocol { + res.push_str(protocol); + res.push_str("://"); + } else { + res.push_str("//"); + } + res.push_str(hostname); + } + res.push('/'); + if let Some(replace_key_prefix) = &redirect.replace_key_prefix { + res.push_str(replace_key_prefix); + if let Some(suffix) = suffix { + res.push_str(suffix) + } + } else if let Some(replace_key) = &redirect.replace_key { + res.push_str(replace_key) + } + res +} + #[cfg(test)] mod tests { use super::*; @@ -473,35 +699,39 @@ mod tests { #[test] fn path_to_keys_test() -> Result<(), Error> { assert_eq!( - path_to_keys("/file%20.jpg", "index.html")?, - ( - "file .jpg".to_string(), - ImplicitRedirect::To { - key: "file .jpg/index.html".to_string(), - url: "/file%20.jpg/".to_string() - } - ) + path_to_keys("/file%20.jpg", "index.html", &[])?, + RoutingResult::LoadOrRedirect { + key: "file .jpg".to_string(), + redirect_url: "/file%20.jpg/".to_string(), + redirect_if_exists: Some("file .jpg/index.html".to_string()), + redirect_code: StatusCode::FOUND, + } ); assert_eq!( - path_to_keys("/%20t/", "index.html")?, - (" t/index.html".to_string(), ImplicitRedirect::No) + path_to_keys("/%20t/", "index.html", &[])?, + RoutingResult::LoadKey { + key: " t/index.html".to_string(), + code: StatusCode::OK + } ); assert_eq!( - path_to_keys("/", "index.html")?, - ("index.html".to_string(), ImplicitRedirect::No) + path_to_keys("/", "index.html", &[])?, + RoutingResult::LoadKey { + key: "index.html".to_string(), + code: StatusCode::OK + } ); assert_eq!( - path_to_keys("/hello", "index.html")?, - ( - "hello".to_string(), - ImplicitRedirect::To { - key: "hello/index.html".to_string(), - url: "/hello/".to_string() - } - ) + path_to_keys("/hello", "index.html", &[])?, + RoutingResult::LoadOrRedirect { + key: "hello".to_string(), + redirect_url: "/hello/".to_string(), + redirect_if_exists: Some("hello/index.html".to_string()), + redirect_code: StatusCode::FOUND, + } ); - assert!(path_to_keys("", "index.html").is_err()); - assert!(path_to_keys("i/am/relative", "index.html").is_err()); + assert!(path_to_keys("", "index.html", &[]).is_err()); + assert!(path_to_keys("i/am/relative", "index.html", &[]).is_err()); Ok(()) } } diff --git a/taplo.toml b/taplo.toml new file mode 100644 index 00000000..05486ef4 --- /dev/null +++ b/taplo.toml @@ -0,0 +1,7 @@ +include = ["**/Cargo.toml", "taplo.toml"] + +[formatting] +indent_string = " " +compact_inline_tables = false +compact_arrays = true +inline_table_expand = false diff --git a/typos.toml b/typos.toml new file mode 100644 index 00000000..f76d002d --- /dev/null +++ b/typos.toml @@ -0,0 +1,6 @@ +[default.extend-words] +PN = "PN" +substituters = "substituters" + +[files] +extend-exclude = ["CHANGELOG.md", "**.js", "**.svg", "doc/talks/*"]