mirror of
https://git.deuxfleurs.fr/Deuxfleurs/garage.git
synced 2026-05-15 13:46:54 -04:00
Compare commits
14 commits
main-v1
...
feat-delay
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85aca61860 |
||
|
|
46ebfdba66 |
||
|
|
ee8fa687ad |
||
|
|
fa457328c8 |
||
|
|
f34558af07 |
||
|
|
d78e5f8a1b |
||
|
|
3172f875ae |
||
|
|
11a6417d11 |
||
|
|
b0a9e007bd |
||
|
|
904548d1d1 |
||
|
|
6cc79bc696 |
||
|
|
60b3d28f93 |
||
|
|
7fddf0af9c |
||
|
|
78882f4040 |
109 changed files with 1824 additions and 3343 deletions
|
|
@ -1,6 +1,3 @@
|
||||||
labels:
|
|
||||||
nix: "enabled"
|
|
||||||
|
|
||||||
when:
|
when:
|
||||||
event:
|
event:
|
||||||
- push
|
- push
|
||||||
|
|
@ -12,32 +9,27 @@ when:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: check formatting
|
- name: check formatting
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build -j4 --attr flakePackages.fmt
|
- nix-shell --attr devShell --run "cargo fmt -- --check"
|
||||||
|
|
||||||
- name: build
|
- name: build
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build -j4 --attr flakePackages.dev
|
- nix-build -j4 --attr flakePackages.dev
|
||||||
|
|
||||||
- name: unit + func tests (lmdb)
|
- name: unit + func tests (lmdb)
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build -j4 --attr flakePackages.tests-lmdb
|
- nix-build -j4 --attr flakePackages.tests-lmdb
|
||||||
|
|
||||||
- name: unit + func tests (sqlite)
|
- name: unit + func tests (sqlite)
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build -j4 --attr flakePackages.tests-sqlite
|
- nix-build -j4 --attr flakePackages.tests-sqlite
|
||||||
|
|
||||||
- name: unit + func tests (fjall)
|
|
||||||
image: nixpkgs/nix:nixos-24.05
|
|
||||||
commands:
|
|
||||||
- nix-build -j4 --attr flakePackages.tests-fjall
|
|
||||||
|
|
||||||
- name: integration tests
|
- name: integration tests
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build -j4 --attr flakePackages.dev
|
- nix-build -j4 --attr flakePackages.dev
|
||||||
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,3 @@
|
||||||
labels:
|
|
||||||
nix: "enabled"
|
|
||||||
|
|
||||||
when:
|
when:
|
||||||
event:
|
event:
|
||||||
- deployment
|
- deployment
|
||||||
|
|
@ -11,7 +8,7 @@ depends_on:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: refresh-index
|
- name: refresh-index
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
environment:
|
environment:
|
||||||
AWS_ACCESS_KEY_ID:
|
AWS_ACCESS_KEY_ID:
|
||||||
from_secret: garagehq_aws_access_key_id
|
from_secret: garagehq_aws_access_key_id
|
||||||
|
|
@ -22,7 +19,7 @@ steps:
|
||||||
- nix-shell --attr ci --run "refresh_index"
|
- nix-shell --attr ci --run "refresh_index"
|
||||||
|
|
||||||
- name: multiarch-docker
|
- name: multiarch-docker
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
environment:
|
environment:
|
||||||
DOCKER_AUTH:
|
DOCKER_AUTH:
|
||||||
from_secret: docker_auth
|
from_secret: docker_auth
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,3 @@
|
||||||
labels:
|
|
||||||
nix: "enabled"
|
|
||||||
|
|
||||||
when:
|
when:
|
||||||
event:
|
event:
|
||||||
- deployment
|
- deployment
|
||||||
|
|
@ -19,17 +16,17 @@ matrix:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: build
|
- name: build
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-build --attr releasePackages.${ARCH} --argstr git_version ${CI_COMMIT_TAG:-$CI_COMMIT_SHA}
|
- nix-build --attr releasePackages.${ARCH} --argstr git_version ${CI_COMMIT_TAG:-$CI_COMMIT_SHA}
|
||||||
|
|
||||||
- name: check is static binary
|
- name: check is static binary
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-shell --attr ci --run "./script/not-dynamic.sh result/bin/garage"
|
- nix-shell --attr ci --run "./script/not-dynamic.sh result/bin/garage"
|
||||||
|
|
||||||
- name: integration tests
|
- name: integration tests
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
||||||
when:
|
when:
|
||||||
|
|
@ -39,7 +36,7 @@ steps:
|
||||||
ARCH: i386
|
ARCH: i386
|
||||||
|
|
||||||
- name: upgrade tests
|
- name: upgrade tests
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
commands:
|
commands:
|
||||||
- nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false)
|
- nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false)
|
||||||
when:
|
when:
|
||||||
|
|
@ -47,7 +44,7 @@ steps:
|
||||||
ARCH: amd64
|
ARCH: amd64
|
||||||
|
|
||||||
- name: push static binary
|
- name: push static binary
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
environment:
|
environment:
|
||||||
TARGET: "${TARGET}"
|
TARGET: "${TARGET}"
|
||||||
AWS_ACCESS_KEY_ID:
|
AWS_ACCESS_KEY_ID:
|
||||||
|
|
@ -58,7 +55,7 @@ steps:
|
||||||
- nix-shell --attr ci --run "to_s3"
|
- nix-shell --attr ci --run "to_s3"
|
||||||
|
|
||||||
- name: docker build and publish
|
- name: docker build and publish
|
||||||
image: nixpkgs/nix:nixos-24.05
|
image: nixpkgs/nix:nixos-22.05
|
||||||
environment:
|
environment:
|
||||||
DOCKER_PLATFORM: "linux/${ARCH}"
|
DOCKER_PLATFORM: "linux/${ARCH}"
|
||||||
CONTAINER_NAME: "dxflrs/${ARCH}_garage"
|
CONTAINER_NAME: "dxflrs/${ARCH}_garage"
|
||||||
|
|
|
||||||
1966
Cargo.lock
generated
1966
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
47
Cargo.toml
47
Cargo.toml
|
|
@ -24,18 +24,18 @@ default-members = ["src/garage"]
|
||||||
|
|
||||||
# Internal Garage crates
|
# Internal Garage crates
|
||||||
format_table = { version = "0.1.1", path = "src/format-table" }
|
format_table = { version = "0.1.1", path = "src/format-table" }
|
||||||
garage_api_common = { version = "1.3.1", path = "src/api/common" }
|
garage_api_common = { version = "1.1.0", path = "src/api/common" }
|
||||||
garage_api_admin = { version = "1.3.1", path = "src/api/admin" }
|
garage_api_admin = { version = "1.1.0", path = "src/api/admin" }
|
||||||
garage_api_s3 = { version = "1.3.1", path = "src/api/s3" }
|
garage_api_s3 = { version = "1.1.0", path = "src/api/s3" }
|
||||||
garage_api_k2v = { version = "1.3.1", path = "src/api/k2v" }
|
garage_api_k2v = { version = "1.1.0", path = "src/api/k2v" }
|
||||||
garage_block = { version = "1.3.1", path = "src/block" }
|
garage_block = { version = "1.1.0", path = "src/block" }
|
||||||
garage_db = { version = "1.3.1", path = "src/db", default-features = false }
|
garage_db = { version = "1.1.0", path = "src/db", default-features = false }
|
||||||
garage_model = { version = "1.3.1", path = "src/model", default-features = false }
|
garage_model = { version = "1.1.0", path = "src/model", default-features = false }
|
||||||
garage_net = { version = "1.3.1", path = "src/net" }
|
garage_net = { version = "1.1.0", path = "src/net" }
|
||||||
garage_rpc = { version = "1.3.1", path = "src/rpc" }
|
garage_rpc = { version = "1.1.0", path = "src/rpc" }
|
||||||
garage_table = { version = "1.3.1", path = "src/table" }
|
garage_table = { version = "1.1.0", path = "src/table" }
|
||||||
garage_util = { version = "1.3.1", path = "src/util" }
|
garage_util = { version = "1.1.0", path = "src/util" }
|
||||||
garage_web = { version = "1.3.1", path = "src/web" }
|
garage_web = { version = "1.1.0", path = "src/web" }
|
||||||
k2v-client = { version = "0.0.4", path = "src/k2v-client" }
|
k2v-client = { version = "0.0.4", path = "src/k2v-client" }
|
||||||
|
|
||||||
# External crates from crates.io
|
# External crates from crates.io
|
||||||
|
|
@ -52,11 +52,13 @@ chrono = "0.4"
|
||||||
crc32fast = "1.4"
|
crc32fast = "1.4"
|
||||||
crc32c = "0.6"
|
crc32c = "0.6"
|
||||||
crypto-common = "0.1"
|
crypto-common = "0.1"
|
||||||
|
err-derive = "0.3"
|
||||||
gethostname = "0.4"
|
gethostname = "0.4"
|
||||||
git-version = "0.3.4"
|
git-version = "0.3.4"
|
||||||
hex = "0.4"
|
hex = "0.4"
|
||||||
hexdump = "0.1"
|
hexdump = "0.1"
|
||||||
hmac = "0.12"
|
hmac = "0.12"
|
||||||
|
idna = "0.5"
|
||||||
itertools = "0.12"
|
itertools = "0.12"
|
||||||
ipnet = "2.9.0"
|
ipnet = "2.9.0"
|
||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
|
|
@ -64,7 +66,6 @@ md-5 = "0.10"
|
||||||
mktemp = "0.5"
|
mktemp = "0.5"
|
||||||
nix = { version = "0.29", default-features = false, features = ["fs"] }
|
nix = { version = "0.29", default-features = false, features = ["fs"] }
|
||||||
nom = "7.1"
|
nom = "7.1"
|
||||||
parking_lot = "0.12"
|
|
||||||
parse_duration = "2.1"
|
parse_duration = "2.1"
|
||||||
pin-project = "1.0.12"
|
pin-project = "1.0.12"
|
||||||
pnet_datalink = "0.34"
|
pnet_datalink = "0.34"
|
||||||
|
|
@ -83,14 +84,12 @@ pretty_env_logger = "0.5"
|
||||||
structopt = { version = "0.3", default-features = false }
|
structopt = { version = "0.3", default-features = false }
|
||||||
syslog-tracing = "0.3"
|
syslog-tracing = "0.3"
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-journald = "0.3.1"
|
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
|
||||||
heed = { version = "0.11", default-features = false, features = ["lmdb"] }
|
heed = { version = "0.11", default-features = false, features = ["lmdb"] }
|
||||||
rusqlite = "0.37"
|
rusqlite = "0.31.0"
|
||||||
r2d2 = "0.8"
|
r2d2 = "0.8"
|
||||||
r2d2_sqlite = "0.31"
|
r2d2_sqlite = "0.24"
|
||||||
fjall = "2.4"
|
|
||||||
|
|
||||||
async-compression = { version = "0.4", features = ["tokio", "zstd"] }
|
async-compression = { version = "0.4", features = ["tokio", "zstd"] }
|
||||||
zstd = { version = "0.13", default-features = false }
|
zstd = { version = "0.13", default-features = false }
|
||||||
|
|
@ -136,7 +135,7 @@ prometheus = "0.13"
|
||||||
aws-sigv4 = { version = "1.1", default-features = false }
|
aws-sigv4 = { version = "1.1", default-features = false }
|
||||||
hyper-rustls = { version = "0.26", default-features = false, features = ["http1", "http2", "ring", "rustls-native-certs"] }
|
hyper-rustls = { version = "0.26", default-features = false, features = ["http1", "http2", "ring", "rustls-native-certs"] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
thiserror = "2.0"
|
thiserror = "1.0"
|
||||||
|
|
||||||
# ---- used only as build / dev dependencies ----
|
# ---- used only as build / dev dependencies ----
|
||||||
assert-json-diff = "2.0"
|
assert-json-diff = "2.0"
|
||||||
|
|
@ -146,8 +145,12 @@ aws-smithy-runtime = { version = "1.8", default-features = false, features = ["t
|
||||||
aws-sdk-config = { version = "1.62", default-features = false }
|
aws-sdk-config = { version = "1.62", default-features = false }
|
||||||
aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] }
|
aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] }
|
||||||
|
|
||||||
|
[profile.dev]
|
||||||
|
#lto = "thin" # disabled for now, adds 2-4 min to each CI build
|
||||||
|
lto = "off"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
lto = "thin"
|
lto = true
|
||||||
codegen-units = 16
|
codegen-units = 1
|
||||||
opt-level = 3
|
opt-level = "s"
|
||||||
strip = "debuginfo"
|
strip = true
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ In this section, we cover the following web applications:
|
||||||
| [Mastodon](#mastodon) | ✅ | Natively supported |
|
| [Mastodon](#mastodon) | ✅ | Natively supported |
|
||||||
| [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` |
|
| [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` |
|
||||||
| [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` |
|
| [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` |
|
||||||
| [Pixelfed](#pixelfed) | ✅ | Natively supported |
|
| [Pixelfed](#pixelfed) | ❓ | Not yet tested |
|
||||||
| [Pleroma](#pleroma) | ❓ | Not yet tested |
|
| [Pleroma](#pleroma) | ❓ | Not yet tested |
|
||||||
| [Lemmy](#lemmy) | ✅ | Supported with pict-rs |
|
| [Lemmy](#lemmy) | ✅ | Supported with pict-rs |
|
||||||
| [Funkwhale](#funkwhale) | ❓ | Not yet tested |
|
| [Funkwhale](#funkwhale) | ❓ | Not yet tested |
|
||||||
|
|
@ -191,10 +191,10 @@ garage key create peertube-key
|
||||||
|
|
||||||
Keep the Key ID and the Secret key in a pad, they will be needed later.
|
Keep the Key ID and the Secret key in a pad, they will be needed later.
|
||||||
|
|
||||||
We need two buckets, one for normal videos (named peertube-videos) and one for webtorrent videos (named peertube-playlists).
|
We need two buckets, one for normal videos (named peertube-video) and one for webtorrent videos (named peertube-playlist).
|
||||||
```bash
|
```bash
|
||||||
garage bucket create peertube-videos
|
garage bucket create peertube-videos
|
||||||
garage bucket create peertube-playlists
|
garage bucket create peertube-playlist
|
||||||
```
|
```
|
||||||
|
|
||||||
Now we allow our key to read and write on these buckets:
|
Now we allow our key to read and write on these buckets:
|
||||||
|
|
@ -253,7 +253,7 @@ object_storage:
|
||||||
proxify_private_files: false
|
proxify_private_files: false
|
||||||
|
|
||||||
streaming_playlists:
|
streaming_playlists:
|
||||||
bucket_name: 'peertube-playlists'
|
bucket_name: 'peertube-playlist'
|
||||||
|
|
||||||
# Keep it empty for our example
|
# Keep it empty for our example
|
||||||
prefix: ''
|
prefix: ''
|
||||||
|
|
|
||||||
|
|
@ -161,49 +161,3 @@ kopia repository validate-provider
|
||||||
|
|
||||||
You can then run all the standard kopia commands: `kopia snapshot create`, `kopia mount`...
|
You can then run all the standard kopia commands: `kopia snapshot create`, `kopia mount`...
|
||||||
Everything should work out-of-the-box.
|
Everything should work out-of-the-box.
|
||||||
|
|
||||||
## Plakar
|
|
||||||
|
|
||||||
Create your key and bucket on Garage server:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
garage key create my-plakar-key
|
|
||||||
garage bucket create plakar-backups
|
|
||||||
garage bucket allow plakar-backups --read --write --key my-plakar-key
|
|
||||||
…
|
|
||||||
```
|
|
||||||
|
|
||||||
On Plakar server, add your Garage as a storage location:
|
|
||||||
```bash
|
|
||||||
plakar store add garageS3 s3://my-garage.tld/plakar-backups \
|
|
||||||
region=garage # Or as you've specified in garage.toml \
|
|
||||||
access_key=<Key ID from "garage key info my-plakar-key"> \
|
|
||||||
secret_access_key=<Secret key from "garage key info my-plakar-key">
|
|
||||||
```
|
|
||||||
|
|
||||||
Then create the repository.
|
|
||||||
```bash
|
|
||||||
plakar at @garageS3 create -plaintext # Unencrypted
|
|
||||||
# or
|
|
||||||
plakar at @garageS3 create #encrypted
|
|
||||||
```
|
|
||||||
|
|
||||||
If you encrypt your backups (Plakar default), you will need to define a strong passphrase. Do not forget to save your password safely. It will be needed to decrypt your backups.
|
|
||||||
|
|
||||||
|
|
||||||
After the repository has been created, check that everything works as expected (that might give an empty result as no file has been added yet, but no error message):
|
|
||||||
```bash
|
|
||||||
plakar at @garageS3 check
|
|
||||||
```
|
|
||||||
|
|
||||||
Now that everything is configure, you can use Garage as your backups storage. For instance sync it with a local backup storage:
|
|
||||||
```bash
|
|
||||||
$ plakar at ~/backups sync to @garageS3
|
|
||||||
```
|
|
||||||
|
|
||||||
Or list the S3 storage content:
|
|
||||||
```bash
|
|
||||||
$ plakar at @garageS3 ls
|
|
||||||
```
|
|
||||||
|
|
||||||
More information in Plakar documentation: https://www.plakar.io/docs/main/quickstart/
|
|
||||||
|
|
|
||||||
|
|
@ -8,18 +8,18 @@ have published Ansible roles. We list them and compare them below.
|
||||||
|
|
||||||
## Comparison of Ansible roles
|
## Comparison of Ansible roles
|
||||||
|
|
||||||
| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster ansible-role-garage](#eddster-ansible-role-garage) |
|
| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) |
|
||||||
|------------------------------------|---------------------------------------------|---------------------------------------------------------------|---------------------------------|
|
|------------------------------------|---------------------------------------------|---------------------------------------------------------------|
|
||||||
| **Runtime** | Systemd | Docker | Systemd |
|
| **Runtime** | Systemd | Docker |
|
||||||
| **Target OS** | Any Linux | Any Linux | Any Linux |
|
| **Target OS** | Any Linux | Any Linux |
|
||||||
| **Architecture** | amd64, arm64, i686 | amd64, arm64 | arm64, arm, 386, amd64 |
|
| **Architecture** | amd64, arm64, i686 | amd64, arm64 |
|
||||||
| **Additional software** | None | Traefik | Ngnix and Keepalived (optional) |
|
| **Additional software** | None | Traefik |
|
||||||
| **Automatic node connection** | ❌ | ✅ | ✅ |
|
| **Automatic node connection** | ❌ | ✅ |
|
||||||
| **Layout management** | ❌ | ✅ | ✅ |
|
| **Layout management** | ❌ | ✅ |
|
||||||
| **Manage buckets & keys** | ❌ | ✅ (basic) | ✅ |
|
| **Manage buckets & keys** | ❌ | ✅ (basic) |
|
||||||
| **Allow custom Garage config** | ✅ | ❌ | ❌ |
|
| **Allow custom Garage config** | ✅ | ❌ |
|
||||||
| **Facilitate Garage upgrades** | ✅ | ❌ | ✅ |
|
| **Facilitate Garage upgrades** | ✅ | ❌ |
|
||||||
| **Multiple instances on one host** | ✅ | ✅ | ❌ |
|
| **Multiple instances on one host** | ✅ | ✅ |
|
||||||
|
|
||||||
|
|
||||||
## zorun/ansible-role-garage
|
## zorun/ansible-role-garage
|
||||||
|
|
@ -49,15 +49,3 @@ structured DNS names, etc).
|
||||||
|
|
||||||
As a result, this role makes it easier to start with Garage on Ansible,
|
As a result, this role makes it easier to start with Garage on Ansible,
|
||||||
but is less flexible.
|
but is less flexible.
|
||||||
|
|
||||||
## eddster2309/ansible-role-garage
|
|
||||||
|
|
||||||
[Source code](https://github.com/eddster2309/ansible-role-garage), [Ansible galaxy](https://galaxy.ansible.com/ui/standalone/roles/eddster2309/garage/)
|
|
||||||
|
|
||||||
This role is a opinionated but customisable role using the official Garage
|
|
||||||
static binaries and only requires Systemd. As such it should work on any
|
|
||||||
Linux based host. It includes all the nesscary configuration to
|
|
||||||
automatically setup a clustered Garage deployment. Most Garage
|
|
||||||
configuration options are exposed through Ansible variables so while you
|
|
||||||
can't provide a custom config you can get very close. It can optionally
|
|
||||||
installed a HA nginx deployment with Keepalived.
|
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,9 @@ Alpine Linux repositories (available since v3.17):
|
||||||
apk add garage
|
apk add garage
|
||||||
```
|
```
|
||||||
|
|
||||||
The default configuration file is installed to `/etc/garage/garage.toml`. You can run
|
The default configuration file is installed to `/etc/garage.toml`. You can run
|
||||||
Garage using: `rc-service garage start`.
|
Garage using: `rc-service garage start`. If you don't specify `rpc_secret`, it
|
||||||
|
will be automatically replaced with a random string on the first start.
|
||||||
If you don't specify `rpc_secret`, it will be automatically replaced with a random string on the first start.
|
|
||||||
|
|
||||||
Please note that this package is built without Consul discovery, Kubernetes
|
Please note that this package is built without Consul discovery, Kubernetes
|
||||||
discovery, OpenTelemetry exporter, and K2V features (K2V will be enabled once
|
discovery, OpenTelemetry exporter, and K2V features (K2V will be enabled once
|
||||||
|
|
@ -27,7 +26,7 @@ it's stable).
|
||||||
|
|
||||||
## Arch Linux
|
## Arch Linux
|
||||||
|
|
||||||
Garage is available in the official repositories under [extra](https://archlinux.org/packages/extra/x86_64/garage).
|
Garage is available in the [AUR](https://aur.archlinux.org/packages/garage).
|
||||||
|
|
||||||
## FreeBSD
|
## FreeBSD
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ Firstly clone the repository:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage
|
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage
|
||||||
cd garage/script/helm
|
cd garage/scripts/helm
|
||||||
```
|
```
|
||||||
|
|
||||||
Deploy with default options:
|
Deploy with default options:
|
||||||
|
|
@ -26,13 +26,6 @@ Or deploy with custom values:
|
||||||
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
|
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to manage the CustomRessourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
kubectl apply -k ../k8s/crd
|
|
||||||
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI:
|
After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -96,14 +96,14 @@ to store 2 TB of data in total.
|
||||||
## Get a Docker image
|
## Get a Docker image
|
||||||
|
|
||||||
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
|
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
|
||||||
We encourage you to use a fixed tag (eg. `v1.3.0`) and not the `latest` tag.
|
We encourage you to use a fixed tag (eg. `v1.1.0`) and not the `latest` tag.
|
||||||
For this example, we will use the latest published version at the time of the writing which is `v1.3.0` but it's up to you
|
For this example, we will use the latest published version at the time of the writing which is `v1.1.0` but it's up to you
|
||||||
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
|
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo docker pull dxflrs/garage:v1.3.0
|
sudo docker pull dxflrs/garage:v1.1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Deploying and configuring Garage
|
## Deploying and configuring Garage
|
||||||
|
|
@ -171,7 +171,7 @@ docker run \
|
||||||
-v /etc/garage.toml:/etc/garage.toml \
|
-v /etc/garage.toml:/etc/garage.toml \
|
||||||
-v /var/lib/garage/meta:/var/lib/garage/meta \
|
-v /var/lib/garage/meta:/var/lib/garage/meta \
|
||||||
-v /var/lib/garage/data:/var/lib/garage/data \
|
-v /var/lib/garage/data:/var/lib/garage/data \
|
||||||
dxflrs/garage:v1.3.0
|
dxflrs/garage:v1.1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
With this command line, Garage should be started automatically at each boot.
|
With this command line, Garage should be started automatically at each boot.
|
||||||
|
|
@ -185,7 +185,7 @@ If you want to use `docker-compose`, you may use the following `docker-compose.y
|
||||||
version: "3"
|
version: "3"
|
||||||
services:
|
services:
|
||||||
garage:
|
garage:
|
||||||
image: dxflrs/garage:v1.3.0
|
image: dxflrs/garage:v1.1.0
|
||||||
network_mode: "host"
|
network_mode: "host"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
volumes:
|
volumes:
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ StateDirectory=garage
|
||||||
DynamicUser=true
|
DynamicUser=true
|
||||||
ProtectHome=true
|
ProtectHome=true
|
||||||
NoNewPrivileges=true
|
NoNewPrivileges=true
|
||||||
LimitNOFILE=42000
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
|
||||||
|
|
@ -132,7 +132,7 @@ docker run \
|
||||||
-v /path/to/garage.toml:/etc/garage.toml \
|
-v /path/to/garage.toml:/etc/garage.toml \
|
||||||
-v /path/to/garage/meta:/var/lib/garage/meta \
|
-v /path/to/garage/meta:/var/lib/garage/meta \
|
||||||
-v /path/to/garage/data:/var/lib/garage/data \
|
-v /path/to/garage/data:/var/lib/garage/data \
|
||||||
dxflrs/garage:v1.3.0
|
dxflrs/garage:v1.1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903`
|
Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903`
|
||||||
|
|
@ -182,12 +182,11 @@ ID Hostname Address Tag Zone Capacit
|
||||||
## Creating a cluster layout
|
## Creating a cluster layout
|
||||||
|
|
||||||
Creating a cluster layout for a Garage deployment means informing Garage
|
Creating a cluster layout for a Garage deployment means informing Garage
|
||||||
of the disk space available on each node of the cluster, `-c`,
|
of the disk space available on each node of the cluster
|
||||||
as well as the name of the zone (e.g. datacenter), `-z`, each machine is located in.
|
as well as the zone (e.g. datacenter) each machine is located in.
|
||||||
|
|
||||||
For our test deployment, we are have only one node with zone named `dc1` and a
|
For our test deployment, we are using only one node. The way in which we configure
|
||||||
capacity of `1G`, though the capacity is ignored for a single node deployment
|
it does not matter, you can simply write:
|
||||||
and can be changed later when adding new nodes.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
garage layout assign -z dc1 -c 1G <node_id>
|
garage layout assign -z dc1 -c 1G <node_id>
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,7 @@ db_engine = "lmdb"
|
||||||
|
|
||||||
block_size = "1M"
|
block_size = "1M"
|
||||||
block_ram_buffer_max = "256MiB"
|
block_ram_buffer_max = "256MiB"
|
||||||
block_max_concurrent_reads = 16
|
|
||||||
block_max_concurrent_writes_per_request =10
|
|
||||||
lmdb_map_size = "1T"
|
lmdb_map_size = "1T"
|
||||||
|
|
||||||
compression_level = 1
|
compression_level = 1
|
||||||
|
|
@ -47,7 +46,6 @@ bootstrap_peers = [
|
||||||
"212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901",
|
"212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901",
|
||||||
]
|
]
|
||||||
|
|
||||||
allow_punycode = false
|
|
||||||
|
|
||||||
[consul_discovery]
|
[consul_discovery]
|
||||||
api = "catalog"
|
api = "catalog"
|
||||||
|
|
@ -94,32 +92,29 @@ The following gives details about each available configuration option.
|
||||||
|
|
||||||
[Environment variables](#env_variables).
|
[Environment variables](#env_variables).
|
||||||
|
|
||||||
Top-level configuration options, in alphabetical order:
|
Top-level configuration options:
|
||||||
[`allow_punycode`](#allow_punycode),
|
|
||||||
[`allow_world_readable_secrets`](#allow_world_readable_secrets),
|
[`allow_world_readable_secrets`](#allow_world_readable_secrets),
|
||||||
[`block_max_concurrent_reads`](`block_max_concurrent_reads),
|
|
||||||
[`block_ram_buffer_max`](#block_ram_buffer_max),
|
[`block_ram_buffer_max`](#block_ram_buffer_max),
|
||||||
[`block_max_concurrent_writes_per_request`](#block_max_concurrent_writes_per_request),
|
|
||||||
[`block_size`](#block_size),
|
[`block_size`](#block_size),
|
||||||
[`bootstrap_peers`](#bootstrap_peers),
|
[`bootstrap_peers`](#bootstrap_peers),
|
||||||
[`compression_level`](#compression_level),
|
[`compression_level`](#compression_level),
|
||||||
[`consistency_mode`](#consistency_mode),
|
|
||||||
[`data_dir`](#data_dir),
|
[`data_dir`](#data_dir),
|
||||||
[`data_fsync`](#data_fsync),
|
[`data_fsync`](#data_fsync),
|
||||||
[`db_engine`](#db_engine),
|
[`db_engine`](#db_engine),
|
||||||
[`disable_scrub`](#disable_scrub),
|
[`disable_scrub`](#disable_scrub),
|
||||||
|
[`use_local_tz`](#use_local_tz),
|
||||||
[`lmdb_map_size`](#lmdb_map_size),
|
[`lmdb_map_size`](#lmdb_map_size),
|
||||||
[`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval),
|
[`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval),
|
||||||
[`metadata_dir`](#metadata_dir),
|
[`metadata_dir`](#metadata_dir),
|
||||||
[`metadata_fsync`](#metadata_fsync),
|
[`metadata_fsync`](#metadata_fsync),
|
||||||
[`metadata_snapshots_dir`](#metadata_snapshots_dir),
|
[`metadata_snapshots_dir`](#metadata_snapshots_dir),
|
||||||
[`replication_factor`](#replication_factor),
|
[`replication_factor`](#replication_factor),
|
||||||
|
[`consistency_mode`](#consistency_mode),
|
||||||
[`rpc_bind_addr`](#rpc_bind_addr),
|
[`rpc_bind_addr`](#rpc_bind_addr),
|
||||||
[`rpc_bind_outgoing`](#rpc_bind_outgoing),
|
[`rpc_bind_outgoing`](#rpc_bind_outgoing),
|
||||||
[`rpc_public_addr`](#rpc_public_addr),
|
[`rpc_public_addr`](#rpc_public_addr),
|
||||||
[`rpc_public_addr_subnet`](#rpc_public_addr_subnet)
|
[`rpc_public_addr_subnet`](#rpc_public_addr_subnet)
|
||||||
[`rpc_secret`/`rpc_secret_file`](#rpc_secret),
|
[`rpc_secret`/`rpc_secret_file`](#rpc_secret).
|
||||||
[`use_local_tz`](#use_local_tz).
|
|
||||||
|
|
||||||
The `[consul_discovery]` section:
|
The `[consul_discovery]` section:
|
||||||
[`api`](#consul_api),
|
[`api`](#consul_api),
|
||||||
|
|
@ -156,17 +151,13 @@ The `[admin]` section:
|
||||||
|
|
||||||
### Environment variables {#env_variables}
|
### Environment variables {#env_variables}
|
||||||
|
|
||||||
The following configuration parameters must be specified as environment variables,
|
The following configuration parameter must be specified as an environment
|
||||||
they do not exist in the configuration file:
|
variable, it does not exist in the configuration file:
|
||||||
|
|
||||||
- `GARAGE_LOG_TO_SYSLOG` (since `v0.9.4`): set this to `1` or `true` to make the
|
- `GARAGE_LOG_TO_SYSLOG` (since `v0.9.4`): set this to `1` or `true` to make the
|
||||||
Garage daemon send its logs to `syslog` (using the libc `syslog` function)
|
Garage daemon send its logs to `syslog` (using the libc `syslog` function)
|
||||||
instead of printing to stderr.
|
instead of printing to stderr.
|
||||||
|
|
||||||
- `GARAGE_LOG_TO_JOURNALD` (since `v1.2.0`): set this to `1` or `true` to make the
|
|
||||||
Garage daemon send its logs to `journald` (using the native protocol of `systemd-journald`)
|
|
||||||
instead of printing to stderr.
|
|
||||||
|
|
||||||
The following environment variables can be used to override the corresponding
|
The following environment variables can be used to override the corresponding
|
||||||
values in the configuration file:
|
values in the configuration file:
|
||||||
|
|
||||||
|
|
@ -178,7 +169,7 @@ values in the configuration file:
|
||||||
|
|
||||||
### Top-level configuration options
|
### Top-level configuration options
|
||||||
|
|
||||||
#### `replication_factor` (since `v1.0.0`) {#replication_factor}
|
#### `replication_factor` {#replication_factor}
|
||||||
|
|
||||||
The replication factor can be any positive integer smaller or equal the node count in your cluster.
|
The replication factor can be any positive integer smaller or equal the node count in your cluster.
|
||||||
The chosen replication factor has a big impact on the cluster's failure tolerancy and performance characteristics.
|
The chosen replication factor has a big impact on the cluster's failure tolerancy and performance characteristics.
|
||||||
|
|
@ -226,7 +217,7 @@ is in progress. In theory, no data should be lost as rebalancing is a
|
||||||
routine operation for Garage, although we cannot guarantee you that everything
|
routine operation for Garage, although we cannot guarantee you that everything
|
||||||
will go right in such an extreme scenario.
|
will go right in such an extreme scenario.
|
||||||
|
|
||||||
#### `consistency_mode` (since `v1.0.0`) {#consistency_mode}
|
#### `consistency_mode` {#consistency_mode}
|
||||||
|
|
||||||
The consistency mode setting determines the read and write behaviour of your cluster.
|
The consistency mode setting determines the read and write behaviour of your cluster.
|
||||||
|
|
||||||
|
|
@ -336,7 +327,6 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows:
|
||||||
| --------- | ----------------- | ------------- |
|
| --------- | ----------------- | ------------- |
|
||||||
| [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
| [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
||||||
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
||||||
| [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`) | `"fjall"` | `<metadata_dir>/db.fjall/` |
|
|
||||||
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
|
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
|
||||||
|
|
||||||
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
|
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
|
||||||
|
|
@ -373,14 +363,6 @@ LMDB works very well, but is known to have the following limitations:
|
||||||
so it is not the best choice for high-performance storage clusters,
|
so it is not the best choice for high-performance storage clusters,
|
||||||
but it should work fine in many cases.
|
but it should work fine in many cases.
|
||||||
|
|
||||||
- Fjall: a storage engine based on LSM trees, which theoretically allow for
|
|
||||||
higher write throughput than other storage engines that are based on B-trees.
|
|
||||||
Using Fjall could potentially improve Garage's performance significantly in
|
|
||||||
write-heavy workloads. **Support for Fjall is experimental at this point**,
|
|
||||||
we have added it to Garage for evaluation purposes only. **Do not use it for
|
|
||||||
production-critical workloads.**
|
|
||||||
|
|
||||||
|
|
||||||
It is possible to convert Garage's metadata directory from one format to another
|
It is possible to convert Garage's metadata directory from one format to another
|
||||||
using the `garage convert-db` command, which should be used as follows:
|
using the `garage convert-db` command, which should be used as follows:
|
||||||
|
|
||||||
|
|
@ -418,7 +400,6 @@ Here is how this option impacts the different database engines:
|
||||||
|----------|------------------------------------|-------------------------------|
|
|----------|------------------------------------|-------------------------------|
|
||||||
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
|
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
|
||||||
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
|
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
|
||||||
| Fjall | default options | not supported |
|
|
||||||
|
|
||||||
Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`).
|
Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`).
|
||||||
|
|
||||||
|
|
@ -525,37 +506,6 @@ node.
|
||||||
|
|
||||||
The default value is 256MiB.
|
The default value is 256MiB.
|
||||||
|
|
||||||
#### `block_max_concurrent_reads` (since `v1.3.0` / `v2.1.0`) {#block_max_concurrent_reads}
|
|
||||||
|
|
||||||
The maximum number of blocks (individual files in the data directory) open
|
|
||||||
simultaneously for reading.
|
|
||||||
|
|
||||||
Reducing this number does not limit the number of data blocks that can be
|
|
||||||
transferred through the network simultaneously. This mechanism was just added
|
|
||||||
as a backpressure mechanism for HDD read speed: it helps avoid a situation
|
|
||||||
where too many requests are coming in and Garage is reading too many block
|
|
||||||
files simultaneously, thus not making timely progress on any of the reads.
|
|
||||||
|
|
||||||
When a request to read a data block comes in through the network, the requests
|
|
||||||
awaits for one of the `block_max_concurrent_reads` slots to be available
|
|
||||||
(internally implemented using a Semaphore object). Once it acquired a read
|
|
||||||
slot, it reads the entire block file to RAM and frees the slot as soon as the
|
|
||||||
block file is finished reading. Only after the slot is released will the
|
|
||||||
block's data start being transferred over the network. If the request fails to
|
|
||||||
acquire a reading slot wihtin 15 seconds, it fails with a timeout error.
|
|
||||||
Timeout events can be monitored through the `block_read_semaphore_timeouts`
|
|
||||||
metric in Prometheus: a non-zero number of such events indicates an I/O
|
|
||||||
bottleneck on HDD read speed.
|
|
||||||
|
|
||||||
|
|
||||||
#### `block_max_concurrent_writes_per_request` (since `v2.1.0`) {#block_max_concurrent_writes_per_request}
|
|
||||||
|
|
||||||
This parameter is designed to adapt to the concurrent write performance of
|
|
||||||
different storage media.Maximum number of parallel block writes per put request
|
|
||||||
Higher values improve throughput but increase memory usage.
|
|
||||||
|
|
||||||
Default: 3, Recommended: 10-30 for NVMe, 3-10 for HDD
|
|
||||||
|
|
||||||
#### `lmdb_map_size` {#lmdb_map_size}
|
#### `lmdb_map_size` {#lmdb_map_size}
|
||||||
|
|
||||||
This parameters can be used to set the map size used by LMDB,
|
This parameters can be used to set the map size used by LMDB,
|
||||||
|
|
@ -654,7 +604,7 @@ be obtained by running `garage node id` and then included directly in the
|
||||||
key will be returned by `garage node id` and you will have to add the IP
|
key will be returned by `garage node id` and you will have to add the IP
|
||||||
yourself.
|
yourself.
|
||||||
|
|
||||||
#### `allow_world_readable_secrets` or `GARAGE_ALLOW_WORLD_READABLE_SECRETS` (env) {#allow_world_readable_secrets}
|
### `allow_world_readable_secrets` or `GARAGE_ALLOW_WORLD_READABLE_SECRETS` (env) {#allow_world_readable_secrets}
|
||||||
|
|
||||||
Garage checks the permissions of your secret files to make sure they're not
|
Garage checks the permissions of your secret files to make sure they're not
|
||||||
world-readable. In some cases, the check might fail and consider your files as
|
world-readable. In some cases, the check might fail and consider your files as
|
||||||
|
|
@ -666,13 +616,6 @@ permission verification.
|
||||||
Alternatively, you can set the `GARAGE_ALLOW_WORLD_READABLE_SECRETS`
|
Alternatively, you can set the `GARAGE_ALLOW_WORLD_READABLE_SECRETS`
|
||||||
environment variable to `true` to bypass the permissions check.
|
environment variable to `true` to bypass the permissions check.
|
||||||
|
|
||||||
#### `allow_punycode` {#allow_punycode}
|
|
||||||
|
|
||||||
Allow creating buckets with names containing punycode. When used for buckets served
|
|
||||||
as websites, this allows using almost any unicode character in the domain name.
|
|
||||||
|
|
||||||
Default to `false`.
|
|
||||||
|
|
||||||
### The `[consul_discovery]` section
|
### The `[consul_discovery]` section
|
||||||
|
|
||||||
Garage supports discovering other nodes of the cluster using Consul. For this
|
Garage supports discovering other nodes of the cluster using Consul. For this
|
||||||
|
|
|
||||||
|
|
@ -23,17 +23,17 @@ Feel free to open a PR to suggest fixes this table. Minio is missing because the
|
||||||
- 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them.
|
- 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## High-level features
|
## High-level features
|
||||||
|
|
||||||
| Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|
| Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|
||||||
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
|
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
|
||||||
| [signature v2](https://docs.aws.amazon.com/AmazonS3/latest/API/Appendix-Sigv2.html) (deprecated) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
|
| [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
|
| [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
|
||||||
| [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ |
|
| [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ |
|
||||||
| [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ |
|
| [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ |
|
||||||
| [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) |
|
| [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) |
|
||||||
| [SSE-C encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) | ✅ Implemented | ❓ | ✅ | ❌ | ✅ |
|
| [SSE-C encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) | ✅ Implemented | ❓ | ✅ | ❌ | ✅ |
|
||||||
| [Bucket versioning](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Versioning.html) | ❌ Missing | ✅ | ✅ | ❌ | ✅ |
|
|
||||||
|
|
||||||
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part
|
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part
|
||||||
of signature v4 and they claim they support it without additional precisions,
|
of signature v4 and they claim they support it without additional precisions,
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ Example response body:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||||
"garageVersion": "v1.3.0",
|
"garageVersion": "v1.1.0",
|
||||||
"garageFeatures": [
|
"garageFeatures": [
|
||||||
"k2v",
|
"k2v",
|
||||||
"lmdb",
|
"lmdb",
|
||||||
|
|
|
||||||
16
flake.lock
generated
16
flake.lock
generated
|
|
@ -50,17 +50,17 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1763977559,
|
"lastModified": 1736692550,
|
||||||
"narHash": "sha256-g4MKqsIRy5yJwEsI+fYODqLUnAqIY4kZai0nldAP6EM=",
|
"narHash": "sha256-7tk8xH+g0sJkKLTJFOxphJxxOjMDFMWv24nXslaU2ro=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "cfe2c7d5b5d3032862254e68c37a6576b633d632",
|
"rev": "7c4869c47090dd7f9f1bdfb49a22aea026996815",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "cfe2c7d5b5d3032862254e68c37a6576b633d632",
|
"rev": "7c4869c47090dd7f9f1bdfb49a22aea026996815",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -80,17 +80,17 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1763952169,
|
"lastModified": 1738549608,
|
||||||
"narHash": "sha256-+PeDBD8P+NKauH+w7eO/QWCIp8Cx4mCfWnh9sJmy9CM=",
|
"narHash": "sha256-GdyT9QEUSx5k/n8kILuNy83vxxdyUfJ8jL5mMpQZWfw=",
|
||||||
"owner": "oxalica",
|
"owner": "oxalica",
|
||||||
"repo": "rust-overlay",
|
"repo": "rust-overlay",
|
||||||
"rev": "ab726555a9a72e6dc80649809147823a813fa95b",
|
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "oxalica",
|
"owner": "oxalica",
|
||||||
"repo": "rust-overlay",
|
"repo": "rust-overlay",
|
||||||
"rev": "ab726555a9a72e6dc80649809147823a813fa95b",
|
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
||||||
19
flake.nix
19
flake.nix
|
|
@ -2,13 +2,13 @@
|
||||||
description =
|
description =
|
||||||
"Garage, an S3-compatible distributed object store for self-hosted deployments";
|
"Garage, an S3-compatible distributed object store for self-hosted deployments";
|
||||||
|
|
||||||
# Nixpkgs 25.05 as of 2025-11-24
|
# Nixpkgs 24.11 as of 2025-01-12
|
||||||
inputs.nixpkgs.url =
|
inputs.nixpkgs.url =
|
||||||
"github:NixOS/nixpkgs/cfe2c7d5b5d3032862254e68c37a6576b633d632";
|
"github:NixOS/nixpkgs/7c4869c47090dd7f9f1bdfb49a22aea026996815";
|
||||||
|
|
||||||
# Rust overlay as of 2025-11-24
|
# Rust overlay as of 2025-02-03
|
||||||
inputs.rust-overlay.url =
|
inputs.rust-overlay.url =
|
||||||
"github:oxalica/rust-overlay/ab726555a9a72e6dc80649809147823a813fa95b";
|
"github:oxalica/rust-overlay/35c6f8c4352f995ecd53896200769f80a3e8f22d";
|
||||||
inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
|
inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
|
||||||
inputs.crane.url = "github:ipetkov/crane";
|
inputs.crane.url = "github:ipetkov/crane";
|
||||||
|
|
@ -30,10 +30,6 @@
|
||||||
inherit system nixpkgs crane rust-overlay extraTestEnv;
|
inherit system nixpkgs crane rust-overlay extraTestEnv;
|
||||||
release = false;
|
release = false;
|
||||||
}).garage-test;
|
}).garage-test;
|
||||||
lints = (compile {
|
|
||||||
inherit system nixpkgs crane rust-overlay;
|
|
||||||
release = false;
|
|
||||||
});
|
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
packages = {
|
packages = {
|
||||||
|
|
@ -57,13 +53,6 @@
|
||||||
tests-sqlite = testWith {
|
tests-sqlite = testWith {
|
||||||
GARAGE_TEST_INTEGRATION_DB_ENGINE = "sqlite";
|
GARAGE_TEST_INTEGRATION_DB_ENGINE = "sqlite";
|
||||||
};
|
};
|
||||||
tests-fjall = testWith {
|
|
||||||
GARAGE_TEST_INTEGRATION_DB_ENGINE = "fjall";
|
|
||||||
};
|
|
||||||
|
|
||||||
# lints (fmt, clippy)
|
|
||||||
fmt = lints.garage-cargo-fmt;
|
|
||||||
clippy = lints.garage-cargo-clippy;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
# ---- developpment shell, for making native builds only ----
|
# ---- developpment shell, for making native builds only ----
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ let
|
||||||
|
|
||||||
inherit (pkgs) lib stdenv;
|
inherit (pkgs) lib stdenv;
|
||||||
|
|
||||||
toolchainFn = (p: p.rust-bin.stable."1.91.0".default.override {
|
toolchainFn = (p: p.rust-bin.stable."1.82.0".default.override {
|
||||||
targets = lib.optionals (target != null) [ rustTarget ];
|
targets = lib.optionals (target != null) [ rustTarget ];
|
||||||
extensions = [
|
extensions = [
|
||||||
"rust-src"
|
"rust-src"
|
||||||
|
|
@ -68,13 +68,12 @@ let
|
||||||
rootFeatures = if features != null then
|
rootFeatures = if features != null then
|
||||||
features
|
features
|
||||||
else
|
else
|
||||||
([ "bundled-libs" "lmdb" "sqlite" "fjall" "k2v" ] ++ (lib.optionals release [
|
([ "bundled-libs" "lmdb" "sqlite" "k2v" ] ++ (lib.optionals release [
|
||||||
"consul-discovery"
|
"consul-discovery"
|
||||||
"kubernetes-discovery"
|
"kubernetes-discovery"
|
||||||
"metrics"
|
"metrics"
|
||||||
"telemetry-otlp"
|
"telemetry-otlp"
|
||||||
"syslog"
|
"syslog"
|
||||||
"journald"
|
|
||||||
]));
|
]));
|
||||||
|
|
||||||
featuresStr = lib.concatStringsSep "," rootFeatures;
|
featuresStr = lib.concatStringsSep "," rootFeatures;
|
||||||
|
|
@ -190,15 +189,4 @@ in rec {
|
||||||
pkgs.cacert
|
pkgs.cacert
|
||||||
];
|
];
|
||||||
} // extraTestEnv);
|
} // extraTestEnv);
|
||||||
|
|
||||||
# ---- source code linting ----
|
|
||||||
|
|
||||||
garage-cargo-fmt = craneLib.cargoFmt (commonArgs // {
|
|
||||||
cargoExtraArgs = "";
|
|
||||||
});
|
|
||||||
|
|
||||||
garage-cargo-clippy = craneLib.cargoClippy (commonArgs // {
|
|
||||||
cargoArtifacts = garage-deps;
|
|
||||||
cargoClippyExtraArgs = "--all-targets -- -D warnings";
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
|
export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
|
||||||
export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
|
export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
|
||||||
export AWS_DEFAULT_REGION='garage'
|
export AWS_DEFAULT_REGION='garage'
|
||||||
export AWS_REQUEST_CHECKSUM_CALCULATION='when_required'
|
|
||||||
# FUTUREWORK: set AWS_ENDPOINT_URL instead, once nixpkgs bumps awscli to >=2.13.0.
|
# FUTUREWORK: set AWS_ENDPOINT_URL instead, once nixpkgs bumps awscli to >=2.13.0.
|
||||||
function aws { command aws --endpoint-url http://127.0.0.1:3911 $@ ; }
|
function aws { command aws --endpoint-url http://127.0.0.1:3911 $@ ; }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,8 @@ apiVersion: v2
|
||||||
name: garage
|
name: garage
|
||||||
description: S3-compatible object store for small self-hosted geo-distributed deployments
|
description: S3-compatible object store for small self-hosted geo-distributed deployments
|
||||||
type: application
|
type: application
|
||||||
version: 0.7.3
|
version: 0.7.0
|
||||||
appVersion: "v1.3.1"
|
appVersion: "v1.1.0"
|
||||||
home: https://garagehq.deuxfleurs.fr/
|
home: https://garagehq.deuxfleurs.fr/
|
||||||
icon: https://garagehq.deuxfleurs.fr/images/garage-logo.svg
|
icon: https://garagehq.deuxfleurs.fr/images/garage-logo.svg
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# garage
|
# garage
|
||||||
|
|
||||||
  
|
  
|
||||||
|
|
||||||
S3-compatible object store for small self-hosted geo-distributed deployments
|
S3-compatible object store for small self-hosted geo-distributed deployments
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,6 @@ metadata:
|
||||||
name: {{ include "garage.fullname" . }}
|
name: {{ include "garage.fullname" . }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "garage.labels" . | nindent 4 }}
|
{{- include "garage.labels" . | nindent 4 }}
|
||||||
{{- with .Values.service.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
spec:
|
||||||
type: {{ .Values.service.type }}
|
type: {{ .Values.service.type }}
|
||||||
ports:
|
ports:
|
||||||
|
|
|
||||||
|
|
@ -124,8 +124,6 @@ service:
|
||||||
# - NodePort (+ Ingress)
|
# - NodePort (+ Ingress)
|
||||||
# - LoadBalancer
|
# - LoadBalancer
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
# -- Annotations to add to the service
|
|
||||||
annotations: {}
|
|
||||||
s3:
|
s3:
|
||||||
api:
|
api:
|
||||||
port: 3900
|
port: 3900
|
||||||
|
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
apiVersion: apiextensions.k8s.io/v1
|
|
||||||
kind: CustomResourceDefinition
|
|
||||||
metadata:
|
|
||||||
name: garagenodes.deuxfleurs.fr
|
|
||||||
spec:
|
|
||||||
conversion:
|
|
||||||
strategy: None
|
|
||||||
group: deuxfleurs.fr
|
|
||||||
names:
|
|
||||||
kind: GarageNode
|
|
||||||
listKind: GarageNodeList
|
|
||||||
plural: garagenodes
|
|
||||||
singular: garagenode
|
|
||||||
scope: Namespaced
|
|
||||||
versions:
|
|
||||||
- name: v1
|
|
||||||
schema:
|
|
||||||
openAPIV3Schema:
|
|
||||||
description: Auto-generated derived type for Node via `CustomResource`
|
|
||||||
properties:
|
|
||||||
spec:
|
|
||||||
properties:
|
|
||||||
address:
|
|
||||||
format: ip
|
|
||||||
type: string
|
|
||||||
hostname:
|
|
||||||
type: string
|
|
||||||
port:
|
|
||||||
format: uint16
|
|
||||||
minimum: 0
|
|
||||||
type: integer
|
|
||||||
required:
|
|
||||||
- address
|
|
||||||
- hostname
|
|
||||||
- port
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- spec
|
|
||||||
title: GarageNode
|
|
||||||
type: object
|
|
||||||
served: true
|
|
||||||
storage: true
|
|
||||||
subresources: {}
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- garagenodes.deuxfleurs.fr.yaml
|
|
||||||
|
|
@ -34,8 +34,6 @@ in
|
||||||
jq
|
jq
|
||||||
];
|
];
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
export AWS_REQUEST_CHECKSUM_CALCULATION='when_required'
|
|
||||||
|
|
||||||
function to_s3 {
|
function to_s3 {
|
||||||
aws \
|
aws \
|
||||||
--endpoint-url https://garage.deuxfleurs.fr \
|
--endpoint-url https://garage.deuxfleurs.fr \
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api_admin"
|
name = "garage_api_admin"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -22,7 +22,7 @@ garage_api_common.workspace = true
|
||||||
|
|
||||||
argon2.workspace = true
|
argon2.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -277,7 +277,7 @@ pub async fn handle_create_bucket(
|
||||||
let helper = garage.locked_helper().await;
|
let helper = garage.locked_helper().await;
|
||||||
|
|
||||||
if let Some(ga) = &req.global_alias {
|
if let Some(ga) = &req.global_alias {
|
||||||
if !is_valid_bucket_name(ga, garage.config.allow_punycode) {
|
if !is_valid_bucket_name(ga) {
|
||||||
return Err(Error::bad_request(format!(
|
return Err(Error::bad_request(format!(
|
||||||
"{}: {}",
|
"{}: {}",
|
||||||
ga, INVALID_BUCKET_NAME_MESSAGE
|
ga, INVALID_BUCKET_NAME_MESSAGE
|
||||||
|
|
@ -292,7 +292,7 @@ pub async fn handle_create_bucket(
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(la) = &req.local_alias {
|
if let Some(la) = &req.local_alias {
|
||||||
if !is_valid_bucket_name(&la.alias, garage.config.allow_punycode) {
|
if !is_valid_bucket_name(&la.alias) {
|
||||||
return Err(Error::bad_request(format!(
|
return Err(Error::bad_request(format!(
|
||||||
"{}: {}",
|
"{}: {}",
|
||||||
la.alias, INVALID_BUCKET_NAME_MESSAGE
|
la.alias, INVALID_BUCKET_NAME_MESSAGE
|
||||||
|
|
@ -382,7 +382,7 @@ pub async fn handle_delete_bucket(
|
||||||
for ((key_id, alias), _, active) in state.local_aliases.items().iter() {
|
for ((key_id, alias), _, active) in state.local_aliases.items().iter() {
|
||||||
if *active {
|
if *active {
|
||||||
helper
|
helper
|
||||||
.purge_local_bucket_alias(bucket.id, key_id, alias)
|
.unset_local_bucket_alias(bucket.id, key_id, alias)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
|
use err_derive::Error;
|
||||||
use hyper::header::HeaderValue;
|
use hyper::header::HeaderValue;
|
||||||
use hyper::{HeaderMap, StatusCode};
|
use hyper::{HeaderMap, StatusCode};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
pub use garage_model::helper::error::Error as HelperError;
|
pub use garage_model::helper::error::Error as HelperError;
|
||||||
|
|
||||||
|
|
@ -16,17 +16,20 @@ use garage_api_common::helpers::*;
|
||||||
/// Errors of this crate
|
/// Errors of this crate
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
/// Error from common error
|
/// Error from common error
|
||||||
Common(#[from] CommonError),
|
Common(#[error(source)] CommonError),
|
||||||
|
|
||||||
// Category: cannot process
|
// Category: cannot process
|
||||||
/// The API access key does not exist
|
/// The API access key does not exist
|
||||||
#[error("Access key not found: {0}")]
|
#[error(display = "Access key not found: {}", _0)]
|
||||||
NoSuchAccessKey(String),
|
NoSuchAccessKey(String),
|
||||||
|
|
||||||
/// In Import key, the key already exists
|
/// In Import key, the key already exists
|
||||||
#[error("Key {0} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.")]
|
#[error(
|
||||||
|
display = "Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.",
|
||||||
|
_0
|
||||||
|
)]
|
||||||
KeyAlreadyExists(String),
|
KeyAlreadyExists(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api_common"
|
name = "garage_api_common"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -24,10 +24,11 @@ chrono.workspace = true
|
||||||
crc32fast.workspace = true
|
crc32fast.workspace = true
|
||||||
crc32c.workspace = true
|
crc32c.workspace = true
|
||||||
crypto-common.workspace = true
|
crypto-common.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
hmac.workspace = true
|
hmac.workspace = true
|
||||||
md-5.workspace = true
|
md-5.workspace = true
|
||||||
|
idna.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
nom.workspace = true
|
nom.workspace = true
|
||||||
pin-project.workspace = true
|
pin-project.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
|
use err_derive::Error;
|
||||||
use hyper::StatusCode;
|
use hyper::StatusCode;
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use garage_util::error::Error as GarageError;
|
use garage_util::error::Error as GarageError;
|
||||||
|
|
||||||
|
|
@ -12,48 +12,48 @@ use garage_model::helper::error::Error as HelperError;
|
||||||
pub enum CommonError {
|
pub enum CommonError {
|
||||||
// ---- INTERNAL ERRORS ----
|
// ---- INTERNAL ERRORS ----
|
||||||
/// Error related to deeper parts of Garage
|
/// Error related to deeper parts of Garage
|
||||||
#[error("Internal error: {0}")]
|
#[error(display = "Internal error: {}", _0)]
|
||||||
InternalError(#[from] GarageError),
|
InternalError(#[error(source)] GarageError),
|
||||||
|
|
||||||
/// Error related to Hyper
|
/// Error related to Hyper
|
||||||
#[error("Internal error (Hyper error): {0}")]
|
#[error(display = "Internal error (Hyper error): {}", _0)]
|
||||||
Hyper(#[from] hyper::Error),
|
Hyper(#[error(source)] hyper::Error),
|
||||||
|
|
||||||
/// Error related to HTTP
|
/// Error related to HTTP
|
||||||
#[error("Internal error (HTTP error): {0}")]
|
#[error(display = "Internal error (HTTP error): {}", _0)]
|
||||||
Http(#[from] http::Error),
|
Http(#[error(source)] http::Error),
|
||||||
|
|
||||||
// ---- GENERIC CLIENT ERRORS ----
|
// ---- GENERIC CLIENT ERRORS ----
|
||||||
/// Proper authentication was not provided
|
/// Proper authentication was not provided
|
||||||
#[error("Forbidden: {0}")]
|
#[error(display = "Forbidden: {}", _0)]
|
||||||
Forbidden(String),
|
Forbidden(String),
|
||||||
|
|
||||||
/// Generic bad request response with custom message
|
/// Generic bad request response with custom message
|
||||||
#[error("Bad request: {0}")]
|
#[error(display = "Bad request: {}", _0)]
|
||||||
BadRequest(String),
|
BadRequest(String),
|
||||||
|
|
||||||
/// The client sent a header with invalid value
|
/// The client sent a header with invalid value
|
||||||
#[error("Invalid header value: {0}")]
|
#[error(display = "Invalid header value: {}", _0)]
|
||||||
InvalidHeader(#[from] hyper::header::ToStrError),
|
InvalidHeader(#[error(source)] hyper::header::ToStrError),
|
||||||
|
|
||||||
// ---- SPECIFIC ERROR CONDITIONS ----
|
// ---- SPECIFIC ERROR CONDITIONS ----
|
||||||
// These have to be error codes referenced in the S3 spec here:
|
// These have to be error codes referenced in the S3 spec here:
|
||||||
// https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
|
||||||
/// The bucket requested don't exists
|
/// The bucket requested don't exists
|
||||||
#[error("Bucket not found: {0}")]
|
#[error(display = "Bucket not found: {}", _0)]
|
||||||
NoSuchBucket(String),
|
NoSuchBucket(String),
|
||||||
|
|
||||||
/// Tried to create a bucket that already exist
|
/// Tried to create a bucket that already exist
|
||||||
#[error("Bucket already exists")]
|
#[error(display = "Bucket already exists")]
|
||||||
BucketAlreadyExists,
|
BucketAlreadyExists,
|
||||||
|
|
||||||
/// Tried to delete a non-empty bucket
|
/// Tried to delete a non-empty bucket
|
||||||
#[error("Tried to delete a non-empty bucket")]
|
#[error(display = "Tried to delete a non-empty bucket")]
|
||||||
BucketNotEmpty,
|
BucketNotEmpty,
|
||||||
|
|
||||||
// Category: bad request
|
// Category: bad request
|
||||||
/// Bucket name is not valid according to AWS S3 specs
|
/// Bucket name is not valid according to AWS S3 specs
|
||||||
#[error("Invalid bucket name: {0}")]
|
#[error(display = "Invalid bucket name: {}", _0)]
|
||||||
InvalidBucketName(String),
|
InvalidBucketName(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,12 +58,6 @@ pub trait ApiHandler: Send + Sync + 'static {
|
||||||
req: Request<IncomingBody>,
|
req: Request<IncomingBody>,
|
||||||
endpoint: Self::Endpoint,
|
endpoint: Self::Endpoint,
|
||||||
) -> impl Future<Output = Result<Response<BoxBody<Self::Error>>, Self::Error>> + Send;
|
) -> impl Future<Output = Result<Response<BoxBody<Self::Error>>, Self::Error>> + Send;
|
||||||
|
|
||||||
/// Returns the key id used to authenticate this request. The ID returned must be safe to
|
|
||||||
/// log.
|
|
||||||
fn key_id_from_request(&self, _req: &Request<IncomingBody>) -> Option<String> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ApiServer<A: ApiHandler> {
|
pub struct ApiServer<A: ApiHandler> {
|
||||||
|
|
@ -148,20 +142,19 @@ impl<A: ApiHandler> ApiServer<A> {
|
||||||
) -> Result<Response<BoxBody<A::Error>>, http::Error> {
|
) -> Result<Response<BoxBody<A::Error>>, http::Error> {
|
||||||
let uri = req.uri().clone();
|
let uri = req.uri().clone();
|
||||||
|
|
||||||
let source = if let Ok(forwarded_for_ip_addr) =
|
if let Ok(forwarded_for_ip_addr) =
|
||||||
forwarded_headers::handle_forwarded_for_headers(req.headers())
|
forwarded_headers::handle_forwarded_for_headers(req.headers())
|
||||||
{
|
{
|
||||||
format!("{forwarded_for_ip_addr} (via {addr})")
|
info!(
|
||||||
|
"{} (via {}) {} {}",
|
||||||
|
forwarded_for_ip_addr,
|
||||||
|
addr,
|
||||||
|
req.method(),
|
||||||
|
uri
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
format!("{addr}")
|
info!("{} {} {}", addr, req.method(), uri);
|
||||||
};
|
}
|
||||||
// we only do this to log the access key, so we can discard any error
|
|
||||||
let key = self
|
|
||||||
.api_handler
|
|
||||||
.key_id_from_request(&req)
|
|
||||||
.map(|k| format!("(key {k}) "))
|
|
||||||
.unwrap_or_default();
|
|
||||||
info!("{source} {key}{} {uri}", req.method());
|
|
||||||
debug!("{:?}", req);
|
debug!("{:?}", req);
|
||||||
|
|
||||||
let tracer = opentelemetry::global::tracer("garage");
|
let tracer = opentelemetry::global::tracer("garage");
|
||||||
|
|
@ -350,11 +343,7 @@ where
|
||||||
|
|
||||||
while !*must_exit.borrow() {
|
while !*must_exit.borrow() {
|
||||||
let (stream, client_addr) = tokio::select! {
|
let (stream, client_addr) = tokio::select! {
|
||||||
acc = listener.accept() => match acc {
|
acc = listener.accept() => acc?,
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) if e.kind() == std::io::ErrorKind::ConnectionAborted => continue,
|
|
||||||
Err(e) => return Err(e.into()),
|
|
||||||
},
|
|
||||||
_ = must_exit.changed() => continue,
|
_ = must_exit.changed() => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ use hyper::{
|
||||||
body::{Body, Bytes},
|
body::{Body, Bytes},
|
||||||
Request, Response,
|
Request, Response,
|
||||||
};
|
};
|
||||||
|
use idna::domain_to_unicode;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_model::bucket_table::BucketParams;
|
use garage_model::bucket_table::BucketParams;
|
||||||
|
|
@ -96,7 +97,7 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
|
||||||
authority
|
authority
|
||||||
))),
|
))),
|
||||||
};
|
};
|
||||||
authority.map(|h| h.to_ascii_lowercase())
|
authority.map(|h| domain_to_unicode(h).0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
|
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use thiserror::Error;
|
use err_derive::Error;
|
||||||
|
|
||||||
use crate::common_error::CommonError;
|
use crate::common_error::CommonError;
|
||||||
pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError};
|
pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError};
|
||||||
|
|
@ -6,21 +6,21 @@ pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInterna
|
||||||
/// Errors of this crate
|
/// Errors of this crate
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
/// Error from common error
|
/// Error from common error
|
||||||
Common(CommonError),
|
Common(CommonError),
|
||||||
|
|
||||||
/// Authorization Header Malformed
|
/// Authorization Header Malformed
|
||||||
#[error("Authorization header malformed, unexpected scope: {0}")]
|
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)]
|
||||||
AuthorizationHeaderMalformed(String),
|
AuthorizationHeaderMalformed(String),
|
||||||
|
|
||||||
// Category: bad request
|
// Category: bad request
|
||||||
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
||||||
#[error("Invalid UTF-8: {0}")]
|
#[error(display = "Invalid UTF-8: {}", _0)]
|
||||||
InvalidUtf8Str(#[from] std::str::Utf8Error),
|
InvalidUtf8Str(#[error(source)] std::str::Utf8Error),
|
||||||
|
|
||||||
/// The provided digest (checksum) value was invalid
|
/// The provided digest (checksum) value was invalid
|
||||||
#[error("Invalid digest: {0}")]
|
#[error(display = "Invalid digest: {}", _0)]
|
||||||
InvalidDigest(String),
|
InvalidDigest(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@ async fn check_standard_signature(
|
||||||
// Verify that all necessary request headers are included in signed_headers
|
// Verify that all necessary request headers are included in signed_headers
|
||||||
// The following must be included for all signatures:
|
// The following must be included for all signatures:
|
||||||
// - the Host header (mandatory)
|
// - the Host header (mandatory)
|
||||||
// - all x-amz-* headers used in the request (except x-amz-content-sha256)
|
// - all x-amz-* headers used in the request
|
||||||
// AWS also indicates that the Content-Type header should be signed if
|
// AWS also indicates that the Content-Type header should be signed if
|
||||||
// it is used, but Minio client doesn't sign it so we don't check it for compatibility.
|
// it is used, but Minio client doesn't sign it so we don't check it for compatibility.
|
||||||
let signed_headers = split_signed_headers(&authorization)?;
|
let signed_headers = split_signed_headers(&authorization)?;
|
||||||
|
|
@ -151,7 +151,7 @@ async fn check_presigned_signature(
|
||||||
// Verify that all necessary request headers are included in signed_headers
|
// Verify that all necessary request headers are included in signed_headers
|
||||||
// For AWSv4 pre-signed URLs, the following must be included:
|
// For AWSv4 pre-signed URLs, the following must be included:
|
||||||
// - the Host header (mandatory)
|
// - the Host header (mandatory)
|
||||||
// - all x-amz-* headers used in the request (except x-amz-content-sha256)
|
// - all x-amz-* headers used in the request
|
||||||
let signed_headers = split_signed_headers(&authorization)?;
|
let signed_headers = split_signed_headers(&authorization)?;
|
||||||
verify_signed_headers(request.headers(), &signed_headers)?;
|
verify_signed_headers(request.headers(), &signed_headers)?;
|
||||||
|
|
||||||
|
|
@ -268,9 +268,7 @@ fn verify_signed_headers(headers: &HeaderMap, signed_headers: &[HeaderName]) ->
|
||||||
return Err(Error::bad_request("Header `Host` should be signed"));
|
return Err(Error::bad_request("Header `Host` should be signed"));
|
||||||
}
|
}
|
||||||
for (name, _) in headers.iter() {
|
for (name, _) in headers.iter() {
|
||||||
// Enforce signature of all x-amz-* headers, except x-amz-content-sh256
|
if name.as_str().starts_with("x-amz-") {
|
||||||
// because it is included in the canonical request in all cases
|
|
||||||
if name.as_str().starts_with("x-amz-") && name != X_AMZ_CONTENT_SHA256 {
|
|
||||||
if !signed_headers.contains(name) {
|
if !signed_headers.contains(name) {
|
||||||
return Err(Error::bad_request(format!(
|
return Err(Error::bad_request(format!(
|
||||||
"Header `{}` should be signed",
|
"Header `{}` should be signed",
|
||||||
|
|
@ -419,7 +417,7 @@ pub async fn verify_v4(
|
||||||
// ============ Authorization header, or X-Amz-* query params =========
|
// ============ Authorization header, or X-Amz-* query params =========
|
||||||
|
|
||||||
pub struct Authorization {
|
pub struct Authorization {
|
||||||
pub key_id: String,
|
key_id: String,
|
||||||
scope: String,
|
scope: String,
|
||||||
signed_headers: String,
|
signed_headers: String,
|
||||||
signature: String,
|
signature: String,
|
||||||
|
|
@ -428,7 +426,7 @@ pub struct Authorization {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Authorization {
|
impl Authorization {
|
||||||
pub fn parse_header(headers: &HeaderMap) -> Result<Self, Error> {
|
fn parse_header(headers: &HeaderMap) -> Result<Self, Error> {
|
||||||
let authorization = headers
|
let authorization = headers
|
||||||
.get(AUTHORIZATION)
|
.get(AUTHORIZATION)
|
||||||
.ok_or_bad_request("Missing authorization header")?
|
.ok_or_bad_request("Missing authorization header")?
|
||||||
|
|
@ -470,7 +468,8 @@ impl Authorization {
|
||||||
|
|
||||||
let date = headers
|
let date = headers
|
||||||
.get(X_AMZ_DATE)
|
.get(X_AMZ_DATE)
|
||||||
.ok_or_bad_request("Missing X-Amz-Date field")?
|
.ok_or_bad_request("Missing X-Amz-Date field")
|
||||||
|
.map_err(Error::from)?
|
||||||
.to_str()?;
|
.to_str()?;
|
||||||
let date = parse_date(date)?;
|
let date = parse_date(date)?;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api_k2v"
|
name = "garage_api_k2v"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -20,7 +20,7 @@ garage_util = { workspace = true, features = [ "k2v" ] }
|
||||||
garage_api_common.workspace = true
|
garage_api_common.workspace = true
|
||||||
|
|
||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
|
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -176,12 +176,6 @@ impl ApiHandler for K2VApiServer {
|
||||||
|
|
||||||
Ok(resp_ok)
|
Ok(resp_ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn key_id_from_request(&self, req: &Request<IncomingBody>) -> Option<String> {
|
|
||||||
garage_api_common::signature::payload::Authorization::parse_header(req.headers())
|
|
||||||
.map(|auth| auth.key_id)
|
|
||||||
.ok()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ApiEndpoint for K2VApiEndpoint {
|
impl ApiEndpoint for K2VApiEndpoint {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
|
use err_derive::Error;
|
||||||
use hyper::header::HeaderValue;
|
use hyper::header::HeaderValue;
|
||||||
use hyper::{HeaderMap, StatusCode};
|
use hyper::{HeaderMap, StatusCode};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use garage_api_common::common_error::{commonErrorDerivative, CommonError};
|
use garage_api_common::common_error::{commonErrorDerivative, CommonError};
|
||||||
pub(crate) use garage_api_common::common_error::{helper_error_as_internal, pass_helper_error};
|
pub(crate) use garage_api_common::common_error::{helper_error_as_internal, pass_helper_error};
|
||||||
|
|
@ -14,38 +14,38 @@ use garage_api_common::signature::error::Error as SignatureError;
|
||||||
/// Errors of this crate
|
/// Errors of this crate
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
/// Error from common error
|
/// Error from common error
|
||||||
Common(#[from] CommonError),
|
Common(#[error(source)] CommonError),
|
||||||
|
|
||||||
// Category: cannot process
|
// Category: cannot process
|
||||||
/// Authorization Header Malformed
|
/// Authorization Header Malformed
|
||||||
#[error("Authorization header malformed, unexpected scope: {0}")]
|
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)]
|
||||||
AuthorizationHeaderMalformed(String),
|
AuthorizationHeaderMalformed(String),
|
||||||
|
|
||||||
/// The provided digest (checksum) value was invalid
|
/// The provided digest (checksum) value was invalid
|
||||||
#[error("Invalid digest: {0}")]
|
#[error(display = "Invalid digest: {}", _0)]
|
||||||
InvalidDigest(String),
|
InvalidDigest(String),
|
||||||
|
|
||||||
/// The object requested don't exists
|
/// The object requested don't exists
|
||||||
#[error("Key not found")]
|
#[error(display = "Key not found")]
|
||||||
NoSuchKey,
|
NoSuchKey,
|
||||||
|
|
||||||
/// Some base64 encoded data was badly encoded
|
/// Some base64 encoded data was badly encoded
|
||||||
#[error("Invalid base64: {0}")]
|
#[error(display = "Invalid base64: {}", _0)]
|
||||||
InvalidBase64(#[from] base64::DecodeError),
|
InvalidBase64(#[error(source)] base64::DecodeError),
|
||||||
|
|
||||||
/// Invalid causality token
|
/// Invalid causality token
|
||||||
#[error("Invalid causality token")]
|
#[error(display = "Invalid causality token")]
|
||||||
InvalidCausalityToken,
|
InvalidCausalityToken,
|
||||||
|
|
||||||
/// The client asked for an invalid return format (invalid Accept header)
|
/// The client asked for an invalid return format (invalid Accept header)
|
||||||
#[error("Not acceptable: {0}")]
|
#[error(display = "Not acceptable: {}", _0)]
|
||||||
NotAcceptable(String),
|
NotAcceptable(String),
|
||||||
|
|
||||||
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
||||||
#[error("Invalid UTF-8: {0}")]
|
#[error(display = "Invalid UTF-8: {}", _0)]
|
||||||
InvalidUtf8Str(#[from] std::str::Utf8Error),
|
InvalidUtf8Str(#[error(source)] std::str::Utf8Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
commonErrorDerivative!(Error);
|
commonErrorDerivative!(Error);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api_s3"
|
name = "garage_api_s3"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -29,7 +29,7 @@ bytes.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
crc32fast.workspace = true
|
crc32fast.workspace = true
|
||||||
crc32c.workspace = true
|
crc32c.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
md-5.workspace = true
|
md-5.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -226,7 +226,6 @@ impl ApiHandler for S3ApiServer {
|
||||||
Endpoint::DeleteBucket {} => handle_delete_bucket(ctx).await,
|
Endpoint::DeleteBucket {} => handle_delete_bucket(ctx).await,
|
||||||
Endpoint::GetBucketLocation {} => handle_get_bucket_location(ctx),
|
Endpoint::GetBucketLocation {} => handle_get_bucket_location(ctx),
|
||||||
Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(),
|
Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(),
|
||||||
Endpoint::GetBucketAcl {} => handle_get_bucket_acl(ctx),
|
|
||||||
Endpoint::ListObjects {
|
Endpoint::ListObjects {
|
||||||
delimiter,
|
delimiter,
|
||||||
encoding_type,
|
encoding_type,
|
||||||
|
|
@ -343,12 +342,6 @@ impl ApiHandler for S3ApiServer {
|
||||||
|
|
||||||
Ok(resp_ok)
|
Ok(resp_ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn key_id_from_request(&self, req: &Request<IncomingBody>) -> Option<String> {
|
|
||||||
garage_api_common::signature::payload::Authorization::parse_header(req.headers())
|
|
||||||
.map(|auth| auth.key_id)
|
|
||||||
.ok()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ApiEndpoint for S3ApiEndpoint {
|
impl ApiEndpoint for S3ApiEndpoint {
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ use hyper::{Request, Response, StatusCode};
|
||||||
use garage_model::bucket_alias_table::*;
|
use garage_model::bucket_alias_table::*;
|
||||||
use garage_model::bucket_table::Bucket;
|
use garage_model::bucket_table::Bucket;
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
use garage_model::key_table::{Key, KeyParams};
|
use garage_model::key_table::Key;
|
||||||
use garage_model::permission::BucketKeyPerm;
|
use garage_model::permission::BucketKeyPerm;
|
||||||
use garage_table::util::*;
|
use garage_table::util::*;
|
||||||
use garage_util::crdt::*;
|
use garage_util::crdt::*;
|
||||||
|
|
@ -44,55 +44,6 @@ pub fn handle_get_bucket_versioning() -> Result<Response<ResBody>, Error> {
|
||||||
.body(string_body(xml))?)
|
.body(string_body(xml))?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn handle_get_bucket_acl(ctx: ReqCtx) -> Result<Response<ResBody>, Error> {
|
|
||||||
let ReqCtx {
|
|
||||||
bucket_id, api_key, ..
|
|
||||||
} = ctx;
|
|
||||||
let key_p = api_key.params().ok_or_internal_error(
|
|
||||||
"Key should not be in deleted state at this point (in handle_get_bucket_acl)",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut grants: Vec<s3_xml::Grant> = vec![];
|
|
||||||
let kp = api_key.bucket_permissions(&bucket_id);
|
|
||||||
|
|
||||||
if kp.allow_owner {
|
|
||||||
grants.push(s3_xml::Grant {
|
|
||||||
grantee: create_grantee(&key_p, &api_key),
|
|
||||||
permission: s3_xml::Value("FULL_CONTROL".to_string()),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
if kp.allow_read {
|
|
||||||
grants.push(s3_xml::Grant {
|
|
||||||
grantee: create_grantee(&key_p, &api_key),
|
|
||||||
permission: s3_xml::Value("READ".to_string()),
|
|
||||||
});
|
|
||||||
grants.push(s3_xml::Grant {
|
|
||||||
grantee: create_grantee(&key_p, &api_key),
|
|
||||||
permission: s3_xml::Value("READ_ACP".to_string()),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if kp.allow_write {
|
|
||||||
grants.push(s3_xml::Grant {
|
|
||||||
grantee: create_grantee(&key_p, &api_key),
|
|
||||||
permission: s3_xml::Value("WRITE".to_string()),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let access_control_policy = s3_xml::AccessControlPolicy {
|
|
||||||
xmlns: (),
|
|
||||||
owner: None,
|
|
||||||
acl: s3_xml::AccessControlList { entries: grants },
|
|
||||||
};
|
|
||||||
|
|
||||||
let xml = s3_xml::to_xml_with_header(&access_control_policy)?;
|
|
||||||
trace!("xml: {}", xml);
|
|
||||||
|
|
||||||
Ok(Response::builder()
|
|
||||||
.header("Content-Type", "application/xml")
|
|
||||||
.body(string_body(xml))?)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn handle_list_buckets(
|
pub async fn handle_list_buckets(
|
||||||
garage: &Garage,
|
garage: &Garage,
|
||||||
api_key: &Key,
|
api_key: &Key,
|
||||||
|
|
@ -221,7 +172,7 @@ pub async fn handle_create_bucket(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the bucket!
|
// Create the bucket!
|
||||||
if !is_valid_bucket_name(&bucket_name, garage.config.allow_punycode) {
|
if !is_valid_bucket_name(&bucket_name) {
|
||||||
return Err(Error::bad_request(format!(
|
return Err(Error::bad_request(format!(
|
||||||
"{}: {}",
|
"{}: {}",
|
||||||
bucket_name, INVALID_BUCKET_NAME_MESSAGE
|
bucket_name, INVALID_BUCKET_NAME_MESSAGE
|
||||||
|
|
@ -290,11 +241,11 @@ pub async fn handle_delete_bucket(ctx: ReqCtx) -> Result<Response<ResBody>, Erro
|
||||||
// 1. delete bucket alias
|
// 1. delete bucket alias
|
||||||
if is_local_alias {
|
if is_local_alias {
|
||||||
helper
|
helper
|
||||||
.purge_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name)
|
.unset_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name)
|
||||||
.await?;
|
.await?;
|
||||||
} else {
|
} else {
|
||||||
helper
|
helper
|
||||||
.purge_global_bucket_alias(*bucket_id, bucket_name)
|
.unset_global_bucket_alias(*bucket_id, bucket_name)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -360,15 +311,6 @@ fn parse_create_bucket_xml(xml_bytes: &[u8]) -> Option<Option<String>> {
|
||||||
Some(ret)
|
Some(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_grantee(key_params: &KeyParams, api_key: &Key) -> s3_xml::Grantee {
|
|
||||||
s3_xml::Grantee {
|
|
||||||
xmlns_xsi: (),
|
|
||||||
typ: "CanonicalUser".to_string(),
|
|
||||||
display_name: Some(s3_xml::Value(key_params.name.get().to_string())),
|
|
||||||
id: Some(s3_xml::Value(api_key.key_id.to_string())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
||||||
|
|
@ -26,10 +26,9 @@ use garage_api_common::signature::checksum::*;
|
||||||
use crate::api_server::{ReqBody, ResBody};
|
use crate::api_server::{ReqBody, ResBody};
|
||||||
use crate::encryption::EncryptionParams;
|
use crate::encryption::EncryptionParams;
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
use crate::get::{check_version_not_deleted, full_object_byte_stream, PreconditionHeaders};
|
use crate::get::{full_object_byte_stream, PreconditionHeaders};
|
||||||
use crate::multipart;
|
use crate::multipart;
|
||||||
use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult};
|
use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult};
|
||||||
use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION;
|
|
||||||
use crate::xml::{self as s3_xml, xmlns_tag};
|
use crate::xml::{self as s3_xml, xmlns_tag};
|
||||||
|
|
||||||
pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName =
|
pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName =
|
||||||
|
|
@ -85,18 +84,7 @@ pub async fn handle_copy(
|
||||||
Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => {
|
Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => {
|
||||||
extract_metadata_headers(req.headers())?
|
extract_metadata_headers(req.headers())?
|
||||||
}
|
}
|
||||||
_ => {
|
_ => source_object_meta_inner.into_owned().headers,
|
||||||
// The x-amz-website-redirect-location header is not copied, instead
|
|
||||||
// it is replaced by the value from the request (or removed if no
|
|
||||||
// value was specified)
|
|
||||||
let is_redirect =
|
|
||||||
|(key, _): &(String, String)| key == X_AMZ_WEBSITE_REDIRECT_LOCATION.as_str();
|
|
||||||
let mut headers: Vec<_> = source_object_meta_inner.headers.clone();
|
|
||||||
headers.retain(|h| !is_redirect(h));
|
|
||||||
let new_headers = extract_metadata_headers(req.headers())?;
|
|
||||||
headers.extend(new_headers.into_iter().filter(is_redirect));
|
|
||||||
headers
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
checksum: source_checksum,
|
checksum: source_checksum,
|
||||||
};
|
};
|
||||||
|
|
@ -237,7 +225,6 @@ async fn handle_copy_metaonly(
|
||||||
.get(&source_version.uuid, &EmptyKey)
|
.get(&source_version.uuid, &EmptyKey)
|
||||||
.await?;
|
.await?;
|
||||||
let source_version = source_version.ok_or(Error::NoSuchKey)?;
|
let source_version = source_version.ok_or(Error::NoSuchKey)?;
|
||||||
check_version_not_deleted(&source_version)?;
|
|
||||||
|
|
||||||
// Write an "uploading" marker in Object table
|
// Write an "uploading" marker in Object table
|
||||||
// This holds a reference to the object in the Version table
|
// This holds a reference to the object in the Version table
|
||||||
|
|
@ -429,7 +416,6 @@ pub async fn handle_upload_part_copy(
|
||||||
.get(&source_object_version.uuid, &EmptyKey)
|
.get(&source_object_version.uuid, &EmptyKey)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(Error::NoSuchKey)?;
|
.ok_or(Error::NoSuchKey)?;
|
||||||
check_version_not_deleted(&source_version)?;
|
|
||||||
|
|
||||||
// We want to reuse blocks from the source version as much as possible.
|
// We want to reuse blocks from the source version as much as possible.
|
||||||
// However, we still need to get the data from these blocks
|
// However, we still need to get the data from these blocks
|
||||||
|
|
@ -561,7 +547,6 @@ pub async fn handle_upload_part_copy(
|
||||||
|
|
||||||
let mut current_offset = 0;
|
let mut current_offset = 0;
|
||||||
let mut next_block = defragmenter.next().await?;
|
let mut next_block = defragmenter.next().await?;
|
||||||
let mut blocks_to_dup = dest_version.clone();
|
|
||||||
|
|
||||||
// TODO this could be optimized similarly to read_and_put_blocks
|
// TODO this could be optimized similarly to read_and_put_blocks
|
||||||
// low priority because uploadpartcopy is rarely used
|
// low priority because uploadpartcopy is rarely used
|
||||||
|
|
@ -591,7 +576,8 @@ pub async fn handle_upload_part_copy(
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
checksummer = checksummer_updated;
|
checksummer = checksummer_updated;
|
||||||
|
|
||||||
let (version_block_key, version_block) = (
|
dest_version.blocks.clear();
|
||||||
|
dest_version.blocks.put(
|
||||||
VersionBlockKey {
|
VersionBlockKey {
|
||||||
part_number,
|
part_number,
|
||||||
offset: current_offset,
|
offset: current_offset,
|
||||||
|
|
@ -603,56 +589,37 @@ pub async fn handle_upload_part_copy(
|
||||||
);
|
);
|
||||||
current_offset += data_len;
|
current_offset += data_len;
|
||||||
|
|
||||||
let next = if let Some(final_data) = data_to_upload {
|
let block_ref = BlockRef {
|
||||||
dest_version.blocks.clear();
|
block: final_hash,
|
||||||
dest_version.blocks.put(version_block_key, version_block);
|
version: dest_version_id,
|
||||||
let block_ref = BlockRef {
|
deleted: false.into(),
|
||||||
block: final_hash,
|
|
||||||
version: dest_version_id,
|
|
||||||
deleted: false.into(),
|
|
||||||
};
|
|
||||||
let (_, _, _, next) = futures::try_join!(
|
|
||||||
// Thing 1: if the block is not exactly a block that existed before,
|
|
||||||
// we need to insert that data as a new block.
|
|
||||||
garage.block_manager.rpc_put_block(
|
|
||||||
final_hash,
|
|
||||||
final_data,
|
|
||||||
dest_encryption.is_encrypted(),
|
|
||||||
None
|
|
||||||
),
|
|
||||||
// Thing 2: we need to insert the block in the version
|
|
||||||
garage.version_table.insert(&dest_version),
|
|
||||||
// Thing 3: we need to add a block reference
|
|
||||||
garage.block_ref_table.insert(&block_ref),
|
|
||||||
// Thing 4: we need to read the next block
|
|
||||||
defragmenter.next(),
|
|
||||||
)?;
|
|
||||||
next
|
|
||||||
} else {
|
|
||||||
blocks_to_dup.blocks.put(version_block_key, version_block);
|
|
||||||
defragmenter.next().await?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let (_, _, _, next) = futures::try_join!(
|
||||||
|
// Thing 1: if the block is not exactly a block that existed before,
|
||||||
|
// we need to insert that data as a new block.
|
||||||
|
async {
|
||||||
|
if let Some(final_data) = data_to_upload {
|
||||||
|
garage
|
||||||
|
.block_manager
|
||||||
|
.rpc_put_block(final_hash, final_data, dest_encryption.is_encrypted(), None)
|
||||||
|
.await
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Thing 2: we need to insert the block in the version
|
||||||
|
garage.version_table.insert(&dest_version),
|
||||||
|
// Thing 3: we need to add a block reference
|
||||||
|
garage.block_ref_table.insert(&block_ref),
|
||||||
|
// Thing 4: we need to read the next block
|
||||||
|
defragmenter.next(),
|
||||||
|
)?;
|
||||||
next_block = next;
|
next_block = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert_eq!(current_offset, source_range.length);
|
assert_eq!(current_offset, source_range.length);
|
||||||
|
|
||||||
// Put the duplicated blocks into the version & block_refs tables
|
|
||||||
let block_refs_to_put = blocks_to_dup
|
|
||||||
.blocks
|
|
||||||
.items()
|
|
||||||
.iter()
|
|
||||||
.map(|b| BlockRef {
|
|
||||||
block: b.1.hash,
|
|
||||||
version: dest_version_id,
|
|
||||||
deleted: false.into(),
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
futures::try_join!(
|
|
||||||
garage.version_table.insert(&blocks_to_dup),
|
|
||||||
garage.block_ref_table.insert_many(&block_refs_to_put[..]),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let checksums = checksummer.finalize();
|
let checksums = checksummer.finalize();
|
||||||
let etag = dest_encryption.etag_from_md5(&checksums.md5);
|
let etag = dest_encryption.etag_from_md5(&checksums.md5);
|
||||||
let checksum = checksums.extract(dest_object_checksum_algorithm);
|
let checksum = checksums.extract(dest_object_checksum_algorithm);
|
||||||
|
|
|
||||||
|
|
@ -88,9 +88,7 @@ pub async fn handle_put_cors(
|
||||||
pub struct CorsConfiguration {
|
pub struct CorsConfiguration {
|
||||||
#[serde(serialize_with = "xmlns_tag", skip_deserializing)]
|
#[serde(serialize_with = "xmlns_tag", skip_deserializing)]
|
||||||
pub xmlns: (),
|
pub xmlns: (),
|
||||||
// "default" is required to be able to parse an empty list of rules,
|
#[serde(rename = "CORSRule")]
|
||||||
// cf https://docs.rs/quick-xml/latest/quick_xml/de/#sequences-xsall-and-xssequence-xml-schema-types
|
|
||||||
#[serde(rename = "CORSRule", default)]
|
|
||||||
pub cors_rules: Vec<CorsRule>,
|
pub cors_rules: Vec<CorsRule>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -272,26 +270,4 @@ mod tests {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_deserialize_norules() -> Result<(), Error> {
|
|
||||||
let message = r#"<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/" />"#;
|
|
||||||
let conf: CorsConfiguration = from_str(message).unwrap();
|
|
||||||
let ref_value = CorsConfiguration {
|
|
||||||
xmlns: (),
|
|
||||||
cors_rules: vec![],
|
|
||||||
};
|
|
||||||
assert_eq! {
|
|
||||||
ref_value,
|
|
||||||
conf
|
|
||||||
};
|
|
||||||
|
|
||||||
let message2 = to_xml_with_header(&ref_value)?;
|
|
||||||
|
|
||||||
let cleanup = |c: &str| c.replace(char::is_whitespace, "");
|
|
||||||
assert_eq!(cleanup(message), cleanup(&message2));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
|
use err_derive::Error;
|
||||||
use hyper::header::HeaderValue;
|
use hyper::header::HeaderValue;
|
||||||
use hyper::{HeaderMap, StatusCode};
|
use hyper::{HeaderMap, StatusCode};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use garage_model::helper::error::Error as HelperError;
|
use garage_model::helper::error::Error as HelperError;
|
||||||
|
|
||||||
|
|
@ -25,67 +25,67 @@ use crate::xml as s3_xml;
|
||||||
/// Errors of this crate
|
/// Errors of this crate
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
/// Error from common error
|
/// Error from common error
|
||||||
Common(#[from] CommonError),
|
Common(#[error(source)] CommonError),
|
||||||
|
|
||||||
// Category: cannot process
|
// Category: cannot process
|
||||||
/// Authorization Header Malformed
|
/// Authorization Header Malformed
|
||||||
#[error("Authorization header malformed, unexpected scope: {0}")]
|
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)]
|
||||||
AuthorizationHeaderMalformed(String),
|
AuthorizationHeaderMalformed(String),
|
||||||
|
|
||||||
/// The object requested don't exists
|
/// The object requested don't exists
|
||||||
#[error("Key not found")]
|
#[error(display = "Key not found")]
|
||||||
NoSuchKey,
|
NoSuchKey,
|
||||||
|
|
||||||
/// The multipart upload requested don't exists
|
/// The multipart upload requested don't exists
|
||||||
#[error("Upload not found")]
|
#[error(display = "Upload not found")]
|
||||||
NoSuchUpload,
|
NoSuchUpload,
|
||||||
|
|
||||||
/// Precondition failed (e.g. x-amz-copy-source-if-match)
|
/// Precondition failed (e.g. x-amz-copy-source-if-match)
|
||||||
#[error("At least one of the preconditions you specified did not hold")]
|
#[error(display = "At least one of the preconditions you specified did not hold")]
|
||||||
PreconditionFailed,
|
PreconditionFailed,
|
||||||
|
|
||||||
/// Parts specified in CMU request do not match parts actually uploaded
|
/// Parts specified in CMU request do not match parts actually uploaded
|
||||||
#[error("Parts given to CompleteMultipartUpload do not match uploaded parts")]
|
#[error(display = "Parts given to CompleteMultipartUpload do not match uploaded parts")]
|
||||||
InvalidPart,
|
InvalidPart,
|
||||||
|
|
||||||
/// Parts given to CompleteMultipartUpload were not in ascending order
|
/// Parts given to CompleteMultipartUpload were not in ascending order
|
||||||
#[error("Parts given to CompleteMultipartUpload were not in ascending order")]
|
#[error(display = "Parts given to CompleteMultipartUpload were not in ascending order")]
|
||||||
InvalidPartOrder,
|
InvalidPartOrder,
|
||||||
|
|
||||||
/// In CompleteMultipartUpload: not enough data
|
/// In CompleteMultipartUpload: not enough data
|
||||||
/// (here we are more lenient than AWS S3)
|
/// (here we are more lenient than AWS S3)
|
||||||
#[error("Proposed upload is smaller than the minimum allowed object size")]
|
#[error(display = "Proposed upload is smaller than the minimum allowed object size")]
|
||||||
EntityTooSmall,
|
EntityTooSmall,
|
||||||
|
|
||||||
// Category: bad request
|
// Category: bad request
|
||||||
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
/// The request contained an invalid UTF-8 sequence in its path or in other parameters
|
||||||
#[error("Invalid UTF-8: {0}")]
|
#[error(display = "Invalid UTF-8: {}", _0)]
|
||||||
InvalidUtf8Str(#[from] std::str::Utf8Error),
|
InvalidUtf8Str(#[error(source)] std::str::Utf8Error),
|
||||||
|
|
||||||
/// The request used an invalid path
|
/// The request used an invalid path
|
||||||
#[error("Invalid UTF-8: {0}")]
|
#[error(display = "Invalid UTF-8: {}", _0)]
|
||||||
InvalidUtf8String(#[from] std::string::FromUtf8Error),
|
InvalidUtf8String(#[error(source)] std::string::FromUtf8Error),
|
||||||
|
|
||||||
/// The client sent invalid XML data
|
/// The client sent invalid XML data
|
||||||
#[error("Invalid XML: {0}")]
|
#[error(display = "Invalid XML: {}", _0)]
|
||||||
InvalidXml(String),
|
InvalidXml(String),
|
||||||
|
|
||||||
/// The client sent a range header with invalid value
|
/// The client sent a range header with invalid value
|
||||||
#[error("Invalid HTTP range: {0:?}")]
|
#[error(display = "Invalid HTTP range: {:?}", _0)]
|
||||||
InvalidRange((http_range::HttpRangeParseError, u64)),
|
InvalidRange(#[error(from)] (http_range::HttpRangeParseError, u64)),
|
||||||
|
|
||||||
/// The client sent a range header with invalid value
|
/// The client sent a range header with invalid value
|
||||||
#[error("Invalid encryption algorithm: {0:?}, should be AES256")]
|
#[error(display = "Invalid encryption algorithm: {:?}, should be AES256", _0)]
|
||||||
InvalidEncryptionAlgorithm(String),
|
InvalidEncryptionAlgorithm(String),
|
||||||
|
|
||||||
/// The provided digest (checksum) value was invalid
|
/// The provided digest (checksum) value was invalid
|
||||||
#[error("Invalid digest: {0}")]
|
#[error(display = "Invalid digest: {}", _0)]
|
||||||
InvalidDigest(String),
|
InvalidDigest(String),
|
||||||
|
|
||||||
/// The client sent a request for an action not supported by garage
|
/// The client sent a request for an action not supported by garage
|
||||||
#[error("Unimplemented action: {0}")]
|
#[error(display = "Unimplemented action: {}", _0)]
|
||||||
NotImplemented(String),
|
NotImplemented(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -99,12 +99,6 @@ impl From<HelperError> for Error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<(http_range::HttpRangeParseError, u64)> for Error {
|
|
||||||
fn from(err: (http_range::HttpRangeParseError, u64)) -> Error {
|
|
||||||
Error::InvalidRange(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<roxmltree::Error> for Error {
|
impl From<roxmltree::Error> for Error {
|
||||||
fn from(err: roxmltree::Error) -> Self {
|
fn from(err: roxmltree::Error) -> Self {
|
||||||
Self::InvalidXml(format!("{}", err))
|
Self::InvalidXml(format!("{}", err))
|
||||||
|
|
|
||||||
|
|
@ -19,13 +19,12 @@ use garage_net::stream::ByteStream;
|
||||||
use garage_rpc::rpc_helper::OrderTag;
|
use garage_rpc::rpc_helper::OrderTag;
|
||||||
use garage_table::EmptyKey;
|
use garage_table::EmptyKey;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::{Error as UtilError, OkOrMessage};
|
use garage_util::error::OkOrMessage;
|
||||||
|
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
use garage_model::s3::object_table::*;
|
use garage_model::s3::object_table::*;
|
||||||
use garage_model::s3::version_table::*;
|
use garage_model::s3::version_table::*;
|
||||||
|
|
||||||
use garage_api_common::common_error::CommonError;
|
|
||||||
use garage_api_common::helpers::*;
|
use garage_api_common::helpers::*;
|
||||||
use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE};
|
use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE};
|
||||||
|
|
||||||
|
|
@ -216,7 +215,6 @@ pub async fn handle_head_without_ctx(
|
||||||
.get(&object_version.uuid, &EmptyKey)
|
.get(&object_version.uuid, &EmptyKey)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(Error::NoSuchKey)?;
|
.ok_or(Error::NoSuchKey)?;
|
||||||
check_version_not_deleted(&version)?;
|
|
||||||
|
|
||||||
let (part_offset, part_end) =
|
let (part_offset, part_end) =
|
||||||
calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?;
|
calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?;
|
||||||
|
|
@ -367,21 +365,6 @@ pub async fn handle_get_without_ctx(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn check_version_not_deleted(version: &Version) -> Result<(), Error> {
|
|
||||||
if version.deleted.get() {
|
|
||||||
// the version was deleted between when the object_table was consulted
|
|
||||||
// and now, this could mean the object was deleted, or overriden.
|
|
||||||
// Rather than say the key doesn't exist, return a transient error
|
|
||||||
// to signal the client to try again.
|
|
||||||
return Err(CommonError::InternalError(UtilError::Message(
|
|
||||||
"conflict/inconsistency between object and version state, version is deleted"
|
|
||||||
.to_string(),
|
|
||||||
))
|
|
||||||
.into());
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn handle_get_full(
|
async fn handle_get_full(
|
||||||
garage: Arc<Garage>,
|
garage: Arc<Garage>,
|
||||||
version: &ObjectVersion,
|
version: &ObjectVersion,
|
||||||
|
|
@ -448,7 +431,6 @@ pub fn full_object_byte_stream(
|
||||||
.ok_or_message("channel closed")?;
|
.ok_or_message("channel closed")?;
|
||||||
|
|
||||||
let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
|
let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
|
||||||
check_version_not_deleted(&version)?;
|
|
||||||
for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
|
for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
|
||||||
let stream_block_i = encryption
|
let stream_block_i = encryption
|
||||||
.get_block(&garage, &vb.hash, Some(order_stream.order(i as u64)))
|
.get_block(&garage, &vb.hash, Some(order_stream.order(i as u64)))
|
||||||
|
|
@ -464,14 +446,6 @@ pub fn full_object_byte_stream(
|
||||||
{
|
{
|
||||||
Ok(()) => (),
|
Ok(()) => (),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// TODO i think this is a bad idea, we should log
|
|
||||||
// an error and stop there. If the error happens to
|
|
||||||
// be exactly the size of what hasn't been streamed
|
|
||||||
// yet, the client will see the request as a
|
|
||||||
// success
|
|
||||||
// instead truncating the output notify the client
|
|
||||||
// something happened with their download, so that
|
|
||||||
// they can retry it
|
|
||||||
let _ = tx.send(error_stream_item(e)).await;
|
let _ = tx.send(error_stream_item(e)).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -523,7 +497,7 @@ async fn handle_get_range(
|
||||||
.get(&version.uuid, &EmptyKey)
|
.get(&version.uuid, &EmptyKey)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(Error::NoSuchKey)?;
|
.ok_or(Error::NoSuchKey)?;
|
||||||
check_version_not_deleted(&version)?;
|
|
||||||
let body =
|
let body =
|
||||||
body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end);
|
body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end);
|
||||||
Ok(resp_builder.body(body)?)
|
Ok(resp_builder.body(body)?)
|
||||||
|
|
@ -574,8 +548,6 @@ async fn handle_get_part(
|
||||||
.await?
|
.await?
|
||||||
.ok_or(Error::NoSuchKey)?;
|
.ok_or(Error::NoSuchKey)?;
|
||||||
|
|
||||||
check_version_not_deleted(&version)?;
|
|
||||||
|
|
||||||
let (begin, end) =
|
let (begin, end) =
|
||||||
calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?;
|
calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?;
|
||||||
|
|
||||||
|
|
@ -845,9 +817,7 @@ impl PreconditionHeaders {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check(&self, v: &ObjectVersion, etag: &str) -> Result<Option<StatusCode>, Error> {
|
fn check(&self, v: &ObjectVersion, etag: &str) -> Result<Option<StatusCode>, Error> {
|
||||||
// we store date with ms precision, but headers are precise to the second: truncate
|
let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp);
|
||||||
// the timestamp to handle the same-second edge case
|
|
||||||
let v_date = UNIX_EPOCH + Duration::from_secs(v.timestamp / 1000);
|
|
||||||
|
|
||||||
// Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6
|
// Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ pub async fn handle_get_lifecycle(ctx: ReqCtx) -> Result<Response<ResBody>, Erro
|
||||||
.body(string_body(xml))?)
|
.body(string_body(xml))?)
|
||||||
} else {
|
} else {
|
||||||
Ok(Response::builder()
|
Ok(Response::builder()
|
||||||
.status(StatusCode::NOT_FOUND)
|
.status(StatusCode::NO_CONTENT)
|
||||||
.body(empty_body())?)
|
.body(empty_body())?)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -141,26 +141,10 @@ pub async fn handle_post_object(
|
||||||
|
|
||||||
let mut conditions = decoded_policy.into_conditions()?;
|
let mut conditions = decoded_policy.into_conditions()?;
|
||||||
|
|
||||||
// If there are conditions on the bucket name, check these against the actual bucket_name rather
|
|
||||||
// than the one in params, which is allowed to be absent.
|
|
||||||
if let Some(conds) = conditions.params.remove("bucket") {
|
|
||||||
for cond in conds {
|
|
||||||
let ok = match cond {
|
|
||||||
Operation::Equal(s) => s.as_str() == bucket_name,
|
|
||||||
Operation::StartsWith(s) => bucket_name.starts_with(&s),
|
|
||||||
};
|
|
||||||
if !ok {
|
|
||||||
return Err(Error::bad_request(
|
|
||||||
"Key 'bucket' has value not allowed in policy",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (param_key, value) in params.iter() {
|
for (param_key, value) in params.iter() {
|
||||||
let param_key = param_key.as_str();
|
let param_key = param_key.as_str();
|
||||||
match param_key {
|
match param_key {
|
||||||
"policy" | "x-amz-signature" | "bucket" => (), // this is always accepted, as it's required to validate other fields
|
"policy" | "x-amz-signature" => (), // this is always accepted, as it's required to validate other fields
|
||||||
"content-type" => {
|
"content-type" => {
|
||||||
let conds = conditions.params.remove("content-type").ok_or_else(|| {
|
let conds = conditions.params.remove("content-type").ok_or_else(|| {
|
||||||
Error::bad_request(format!("Key '{}' is not allowed in policy", param_key))
|
Error::bad_request(format!("Key '{}' is not allowed in policy", param_key))
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,8 @@ use crate::encryption::EncryptionParams;
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION;
|
use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION;
|
||||||
|
|
||||||
|
const PUT_BLOCKS_MAX_PARALLEL: usize = 3;
|
||||||
|
|
||||||
pub(crate) struct SaveStreamResult {
|
pub(crate) struct SaveStreamResult {
|
||||||
pub(crate) version_uuid: Uuid,
|
pub(crate) version_uuid: Uuid,
|
||||||
pub(crate) version_timestamp: u64,
|
pub(crate) version_timestamp: u64,
|
||||||
|
|
@ -491,7 +493,7 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
|
||||||
};
|
};
|
||||||
let recv_next = async {
|
let recv_next = async {
|
||||||
// If more than a maximum number of writes are in progress, don't add more for now
|
// If more than a maximum number of writes are in progress, don't add more for now
|
||||||
if currently_running >= ctx.garage.config.block_max_concurrent_writes_per_request {
|
if currently_running >= PUT_BLOCKS_MAX_PARALLEL {
|
||||||
futures::future::pending().await
|
futures::future::pending().await
|
||||||
} else {
|
} else {
|
||||||
block_rx3.recv().await
|
block_rx3.recv().await
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,6 @@ pub fn xmlns_tag<S: Serializer>(_v: &(), s: S) -> Result<S::Ok, S::Error> {
|
||||||
s.serialize_str("http://s3.amazonaws.com/doc/2006-03-01/")
|
s.serialize_str("http://s3.amazonaws.com/doc/2006-03-01/")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn xmlns_xsi_tag<S: Serializer>(_v: &(), s: S) -> Result<S::Ok, S::Error> {
|
|
||||||
s.serialize_str("http://www.w3.org/2001/XMLSchema-instance")
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct Value(#[serde(rename = "$value")] pub String);
|
pub struct Value(#[serde(rename = "$value")] pub String);
|
||||||
|
|
||||||
|
|
@ -323,42 +319,6 @@ pub struct PostObject {
|
||||||
pub etag: Value,
|
pub etag: Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct Grantee {
|
|
||||||
#[serde(rename = "xmlns:xsi", serialize_with = "xmlns_xsi_tag")]
|
|
||||||
pub xmlns_xsi: (),
|
|
||||||
#[serde(rename = "xsi:type")]
|
|
||||||
pub typ: String,
|
|
||||||
#[serde(rename = "DisplayName")]
|
|
||||||
pub display_name: Option<Value>,
|
|
||||||
#[serde(rename = "ID")]
|
|
||||||
pub id: Option<Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct Grant {
|
|
||||||
#[serde(rename = "Grantee")]
|
|
||||||
pub grantee: Grantee,
|
|
||||||
#[serde(rename = "Permission")]
|
|
||||||
pub permission: Value,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct AccessControlList {
|
|
||||||
#[serde(rename = "Grant")]
|
|
||||||
pub entries: Vec<Grant>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct AccessControlPolicy {
|
|
||||||
#[serde(serialize_with = "xmlns_tag")]
|
|
||||||
pub xmlns: (),
|
|
||||||
#[serde(rename = "Owner")]
|
|
||||||
pub owner: Option<Owner>,
|
|
||||||
#[serde(rename = "AccessControlList")]
|
|
||||||
pub acl: AccessControlList,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
@ -467,43 +427,6 @@ mod tests {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn get_bucket_acl_result() -> Result<(), ApiError> {
|
|
||||||
let grant = Grant {
|
|
||||||
grantee: Grantee {
|
|
||||||
xmlns_xsi: (),
|
|
||||||
typ: "CanonicalUser".to_string(),
|
|
||||||
display_name: Some(Value("owner_name".to_string())),
|
|
||||||
id: Some(Value("qsdfjklm".to_string())),
|
|
||||||
},
|
|
||||||
permission: Value("FULL_CONTROL".to_string()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let get_bucket_acl = AccessControlPolicy {
|
|
||||||
xmlns: (),
|
|
||||||
owner: None,
|
|
||||||
acl: AccessControlList {
|
|
||||||
entries: vec![grant],
|
|
||||||
},
|
|
||||||
};
|
|
||||||
assert_eq!(
|
|
||||||
to_xml_with_header(&get_bucket_acl)?,
|
|
||||||
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
|
|
||||||
<AccessControlPolicy xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">\
|
|
||||||
<AccessControlList>\
|
|
||||||
<Grant>\
|
|
||||||
<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"CanonicalUser\">\
|
|
||||||
<DisplayName>owner_name</DisplayName>\
|
|
||||||
<ID>qsdfjklm</ID>\
|
|
||||||
</Grantee>\
|
|
||||||
<Permission>FULL_CONTROL</Permission>\
|
|
||||||
</Grant>\
|
|
||||||
</AccessControlList>\
|
|
||||||
</AccessControlPolicy>"
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn delete_result() -> Result<(), ApiError> {
|
fn delete_result() -> Result<(), ApiError> {
|
||||||
let delete_result = DeleteResult {
|
let delete_result = DeleteResult {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_block"
|
name = "garage_block"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ use opentelemetry::{
|
||||||
Context,
|
Context,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use garage_net::endpoint::RpcInFlightLimiter;
|
||||||
use garage_net::stream::{read_stream_to_end, stream_asyncread, ByteStream};
|
use garage_net::stream::{read_stream_to_end, stream_asyncread, ByteStream};
|
||||||
|
|
||||||
use garage_db as db;
|
use garage_db as db;
|
||||||
|
|
@ -50,8 +51,6 @@ pub const INLINE_THRESHOLD: usize = 3072;
|
||||||
// to delete the block locally.
|
// to delete the block locally.
|
||||||
pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600);
|
pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600);
|
||||||
|
|
||||||
const BLOCK_READ_SEMAPHORE_TIMEOUT: Duration = Duration::from_secs(15);
|
|
||||||
|
|
||||||
/// RPC messages used to share blocks of data between nodes
|
/// RPC messages used to share blocks of data between nodes
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub enum BlockRpc {
|
pub enum BlockRpc {
|
||||||
|
|
@ -89,7 +88,6 @@ pub struct BlockManager {
|
||||||
disable_scrub: bool,
|
disable_scrub: bool,
|
||||||
|
|
||||||
mutation_lock: Vec<Mutex<BlockManagerLocked>>,
|
mutation_lock: Vec<Mutex<BlockManagerLocked>>,
|
||||||
read_semaphore: Semaphore,
|
|
||||||
|
|
||||||
pub rc: BlockRc,
|
pub rc: BlockRc,
|
||||||
pub resync: BlockResyncManager,
|
pub resync: BlockResyncManager,
|
||||||
|
|
@ -179,8 +177,6 @@ impl BlockManager {
|
||||||
.iter()
|
.iter()
|
||||||
.map(|_| Mutex::new(BlockManagerLocked()))
|
.map(|_| Mutex::new(BlockManagerLocked()))
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
|
|
||||||
read_semaphore: Semaphore::new(config.block_max_concurrent_reads),
|
|
||||||
rc,
|
rc,
|
||||||
resync,
|
resync,
|
||||||
system,
|
system,
|
||||||
|
|
@ -300,6 +296,7 @@ impl BlockManager {
|
||||||
&node_id,
|
&node_id,
|
||||||
BlockRpc::GetBlock(*hash, order_tag),
|
BlockRpc::GetBlock(*hash, order_tag),
|
||||||
priority,
|
priority,
|
||||||
|
RpcInFlightLimiter::TableWrite,
|
||||||
);
|
);
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
res = rpc => {
|
res = rpc => {
|
||||||
|
|
@ -413,8 +410,8 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get number of items in the refcount table
|
/// Get number of items in the refcount table
|
||||||
pub fn rc_approximate_len(&self) -> Result<usize, Error> {
|
pub fn rc_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.rc.rc_table.approximate_len()?)
|
Ok(self.rc.rc_table.len()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send command to start/stop/manager scrub worker
|
/// Send command to start/stop/manager scrub worker
|
||||||
|
|
@ -432,7 +429,7 @@ impl BlockManager {
|
||||||
|
|
||||||
/// List all resync errors
|
/// List all resync errors
|
||||||
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
|
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
|
||||||
let mut blocks = Vec::with_capacity(self.resync.errors.approximate_len()?);
|
let mut blocks = Vec::with_capacity(self.resync.errors.len()?);
|
||||||
for ent in self.resync.errors.iter()? {
|
for ent in self.resync.errors.iter()? {
|
||||||
let (hash, cnt) = ent?;
|
let (hash, cnt) = ent?;
|
||||||
let cnt = ErrorCounter::decode(&cnt);
|
let cnt = ErrorCounter::decode(&cnt);
|
||||||
|
|
@ -562,6 +559,9 @@ impl BlockManager {
|
||||||
match self.find_block(hash).await {
|
match self.find_block(hash).await {
|
||||||
Some(p) => self.read_block_from(hash, &p).await,
|
Some(p) => self.read_block_from(hash, &p).await,
|
||||||
None => {
|
None => {
|
||||||
|
// Not found but maybe we should have had it ??
|
||||||
|
self.resync
|
||||||
|
.put_to_resync(hash, 2 * self.system.rpc_helper().rpc_timeout())?;
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
"block {:?} not found on node",
|
"block {:?} not found on node",
|
||||||
hash
|
hash
|
||||||
|
|
@ -583,15 +583,6 @@ impl BlockManager {
|
||||||
) -> Result<DataBlock, Error> {
|
) -> Result<DataBlock, Error> {
|
||||||
let (header, path) = block_path.as_parts_ref();
|
let (header, path) = block_path.as_parts_ref();
|
||||||
|
|
||||||
let permit = tokio::select! {
|
|
||||||
sem = self.read_semaphore.acquire() => sem.ok_or_message("acquire read semaphore")?,
|
|
||||||
_ = tokio::time::sleep(BLOCK_READ_SEMAPHORE_TIMEOUT) => {
|
|
||||||
self.metrics.block_read_semaphore_timeouts.add(1);
|
|
||||||
debug!("read block {:?}: read_semaphore acquire timeout", hash);
|
|
||||||
return Err(Error::Message("read block: read_semaphore acquire timeout".into()));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut f = fs::File::open(&path).await?;
|
let mut f = fs::File::open(&path).await?;
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
f.read_to_end(&mut data).await?;
|
f.read_to_end(&mut data).await?;
|
||||||
|
|
@ -616,8 +607,6 @@ impl BlockManager {
|
||||||
return Err(Error::CorruptData(*hash));
|
return Err(Error::CorruptData(*hash));
|
||||||
}
|
}
|
||||||
|
|
||||||
drop(permit);
|
|
||||||
|
|
||||||
Ok(data)
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -783,7 +772,6 @@ impl BlockManagerLocked {
|
||||||
|
|
||||||
let mut f = fs::File::create(&path_tmp).await?;
|
let mut f = fs::File::create(&path_tmp).await?;
|
||||||
f.write_all(data).await?;
|
f.write_all(data).await?;
|
||||||
f.flush().await?;
|
|
||||||
mgr.metrics.bytes_written.add(data.len() as u64);
|
mgr.metrics.bytes_written.add(data.len() as u64);
|
||||||
|
|
||||||
if mgr.data_fsync {
|
if mgr.data_fsync {
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@ pub struct BlockManagerMetrics {
|
||||||
|
|
||||||
pub(crate) bytes_read: BoundCounter<u64>,
|
pub(crate) bytes_read: BoundCounter<u64>,
|
||||||
pub(crate) block_read_duration: BoundValueRecorder<f64>,
|
pub(crate) block_read_duration: BoundValueRecorder<f64>,
|
||||||
pub(crate) block_read_semaphore_timeouts: BoundCounter<u64>,
|
|
||||||
pub(crate) bytes_written: BoundCounter<u64>,
|
pub(crate) bytes_written: BoundCounter<u64>,
|
||||||
pub(crate) block_write_duration: BoundValueRecorder<f64>,
|
pub(crate) block_write_duration: BoundValueRecorder<f64>,
|
||||||
pub(crate) delete_counter: BoundCounter<u64>,
|
pub(crate) delete_counter: BoundCounter<u64>,
|
||||||
|
|
@ -51,7 +50,7 @@ impl BlockManagerMetrics {
|
||||||
.init(),
|
.init(),
|
||||||
_rc_size: meter
|
_rc_size: meter
|
||||||
.u64_value_observer("block.rc_size", move |observer| {
|
.u64_value_observer("block.rc_size", move |observer| {
|
||||||
if let Ok(value) = rc_tree.approximate_len() {
|
if let Ok(value) = rc_tree.len() {
|
||||||
observer.observe(value as u64, &[])
|
observer.observe(value as u64, &[])
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -59,7 +58,7 @@ impl BlockManagerMetrics {
|
||||||
.init(),
|
.init(),
|
||||||
_resync_queue_len: meter
|
_resync_queue_len: meter
|
||||||
.u64_value_observer("block.resync_queue_length", move |observer| {
|
.u64_value_observer("block.resync_queue_length", move |observer| {
|
||||||
if let Ok(value) = resync_queue.approximate_len() {
|
if let Ok(value) = resync_queue.len() {
|
||||||
observer.observe(value as u64, &[]);
|
observer.observe(value as u64, &[]);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -69,7 +68,7 @@ impl BlockManagerMetrics {
|
||||||
.init(),
|
.init(),
|
||||||
_resync_errored_blocks: meter
|
_resync_errored_blocks: meter
|
||||||
.u64_value_observer("block.resync_errored_blocks", move |observer| {
|
.u64_value_observer("block.resync_errored_blocks", move |observer| {
|
||||||
if let Ok(value) = resync_errors.approximate_len() {
|
if let Ok(value) = resync_errors.len() {
|
||||||
observer.observe(value as u64, &[]);
|
observer.observe(value as u64, &[]);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -120,11 +119,6 @@ impl BlockManagerMetrics {
|
||||||
.with_description("Duration of block read operations")
|
.with_description("Duration of block read operations")
|
||||||
.init()
|
.init()
|
||||||
.bind(&[]),
|
.bind(&[]),
|
||||||
block_read_semaphore_timeouts: meter
|
|
||||||
.u64_counter("block.read_semaphore_timeouts")
|
|
||||||
.with_description("Number of block reads that failed due to semaphore acquire timeout")
|
|
||||||
.init()
|
|
||||||
.bind(&[]),
|
|
||||||
bytes_written: meter
|
bytes_written: meter
|
||||||
.u64_counter("block.bytes_written")
|
.u64_counter("block.bytes_written")
|
||||||
.with_description("Number of bytes written to disk")
|
.with_description("Number of bytes written to disk")
|
||||||
|
|
|
||||||
|
|
@ -106,13 +106,13 @@ impl BlockResyncManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get length of resync queue
|
/// Get length of resync queue
|
||||||
pub fn queue_approximate_len(&self) -> Result<usize, Error> {
|
pub fn queue_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.queue.approximate_len()?)
|
Ok(self.queue.len()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get number of blocks that have an error
|
/// Get number of blocks that have an error
|
||||||
pub fn errors_approximate_len(&self) -> Result<usize, Error> {
|
pub fn errors_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.errors.approximate_len()?)
|
Ok(self.errors.len()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear the error counter for a block and put it in queue immediately
|
/// Clear the error counter for a block and put it in queue immediately
|
||||||
|
|
@ -133,14 +133,6 @@ impl BlockResyncManager {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear the entire resync queue and list of errored blocks
|
|
||||||
/// Corresponds to `garage repair clear-resync-queue`
|
|
||||||
pub fn clear_resync_queue(&self) -> Result<(), Error> {
|
|
||||||
self.queue.clear()?;
|
|
||||||
self.errors.clear()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn register_bg_vars(&self, vars: &mut vars::BgVars) {
|
pub fn register_bg_vars(&self, vars: &mut vars::BgVars) {
|
||||||
let notify = self.notify.clone();
|
let notify = self.notify.clone();
|
||||||
vars.register_rw(
|
vars.register_rw(
|
||||||
|
|
@ -556,11 +548,9 @@ impl Worker for ResyncWorker {
|
||||||
}
|
}
|
||||||
|
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
queue_length: Some(self.manager.resync.queue_approximate_len().unwrap_or(0) as u64),
|
queue_length: Some(self.manager.resync.queue_len().unwrap_or(0) as u64),
|
||||||
tranquility: Some(tranquility),
|
tranquility: Some(tranquility),
|
||||||
persistent_errors: Some(
|
persistent_errors: Some(self.manager.resync.errors_len().unwrap_or(0) as u64),
|
||||||
self.manager.resync.errors_approximate_len().unwrap_or(0) as u64
|
|
||||||
),
|
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_db"
|
name = "garage_db"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -12,18 +12,14 @@ readme = "../../README.md"
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
|
|
||||||
heed = { workspace = true, optional = true }
|
heed = { workspace = true, optional = true }
|
||||||
|
|
||||||
rusqlite = { workspace = true, optional = true, features = ["backup"] }
|
rusqlite = { workspace = true, optional = true, features = ["backup"] }
|
||||||
r2d2 = { workspace = true, optional = true }
|
r2d2 = { workspace = true, optional = true }
|
||||||
r2d2_sqlite = { workspace = true, optional = true }
|
r2d2_sqlite = { workspace = true, optional = true }
|
||||||
|
|
||||||
fjall = { workspace = true, optional = true }
|
|
||||||
parking_lot = { workspace = true, optional = true }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mktemp.workspace = true
|
mktemp.workspace = true
|
||||||
|
|
||||||
|
|
@ -31,5 +27,4 @@ mktemp.workspace = true
|
||||||
default = [ "lmdb", "sqlite" ]
|
default = [ "lmdb", "sqlite" ]
|
||||||
bundled-libs = [ "rusqlite?/bundled" ]
|
bundled-libs = [ "rusqlite?/bundled" ]
|
||||||
lmdb = [ "heed" ]
|
lmdb = [ "heed" ]
|
||||||
fjall = [ "dep:fjall", "dep:parking_lot" ]
|
|
||||||
sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ]
|
sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ]
|
||||||
|
|
|
||||||
|
|
@ -1,453 +0,0 @@
|
||||||
use core::ops::Bound;
|
|
||||||
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
|
|
||||||
|
|
||||||
use fjall::{
|
|
||||||
PartitionCreateOptions, PersistMode, TransactionalKeyspace, TransactionalPartitionHandle,
|
|
||||||
WriteTransaction,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
open::{Engine, OpenOpt},
|
|
||||||
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
|
||||||
TxResult, TxValueIter, Value, ValueIter,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub use fjall;
|
|
||||||
|
|
||||||
// --
|
|
||||||
|
|
||||||
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
|
|
||||||
info!("Opening Fjall database at: {}", path.display());
|
|
||||||
if opt.fsync {
|
|
||||||
return Err(Error(
|
|
||||||
"metadata_fsync is not supported with the Fjall database engine".into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
let mut config = fjall::Config::new(path);
|
|
||||||
if let Some(block_cache_size) = opt.fjall_block_cache_size {
|
|
||||||
config = config.cache_size(block_cache_size as u64);
|
|
||||||
}
|
|
||||||
let keyspace = config.open_transactional()?;
|
|
||||||
Ok(FjallDb::init(keyspace))
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- err
|
|
||||||
|
|
||||||
impl From<fjall::Error> for Error {
|
|
||||||
fn from(e: fjall::Error) -> Error {
|
|
||||||
Error(format!("fjall: {}", e).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<fjall::LsmError> for Error {
|
|
||||||
fn from(e: fjall::LsmError) -> Error {
|
|
||||||
Error(format!("fjall lsm_tree: {}", e).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<fjall::Error> for TxOpError {
|
|
||||||
fn from(e: fjall::Error) -> TxOpError {
|
|
||||||
TxOpError(e.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- db
|
|
||||||
|
|
||||||
pub struct FjallDb {
|
|
||||||
keyspace: TransactionalKeyspace,
|
|
||||||
trees: RwLock<Vec<(String, TransactionalPartitionHandle)>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
type ByteRefRangeBound<'r> = (Bound<&'r [u8]>, Bound<&'r [u8]>);
|
|
||||||
|
|
||||||
impl FjallDb {
|
|
||||||
pub fn init(keyspace: TransactionalKeyspace) -> Db {
|
|
||||||
let s = Self {
|
|
||||||
keyspace,
|
|
||||||
trees: RwLock::new(Vec::new()),
|
|
||||||
};
|
|
||||||
Db(Arc::new(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_tree(
|
|
||||||
&self,
|
|
||||||
i: usize,
|
|
||||||
) -> Result<MappedRwLockReadGuard<'_, TransactionalPartitionHandle>> {
|
|
||||||
RwLockReadGuard::try_map(self.trees.read(), |trees: &Vec<_>| {
|
|
||||||
trees.get(i).map(|tup| &tup.1)
|
|
||||||
})
|
|
||||||
.map_err(|_| Error("invalid tree id".into()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IDb for FjallDb {
|
|
||||||
fn engine(&self) -> String {
|
|
||||||
"Fjall (EXPERIMENTAL!)".into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn open_tree(&self, name: &str) -> Result<usize> {
|
|
||||||
let mut trees = self.trees.write();
|
|
||||||
let safe_name = encode_name(name)?;
|
|
||||||
if let Some(i) = trees.iter().position(|(name, _)| *name == safe_name) {
|
|
||||||
Ok(i)
|
|
||||||
} else {
|
|
||||||
let tree = self
|
|
||||||
.keyspace
|
|
||||||
.open_partition(&safe_name, PartitionCreateOptions::default())?;
|
|
||||||
let i = trees.len();
|
|
||||||
trees.push((safe_name, tree));
|
|
||||||
Ok(i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_trees(&self) -> Result<Vec<String>> {
|
|
||||||
Ok(self
|
|
||||||
.keyspace
|
|
||||||
.list_partitions()
|
|
||||||
.iter()
|
|
||||||
.map(|n| decode_name(&n))
|
|
||||||
.collect::<Result<Vec<_>>>()?)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
|
|
||||||
std::fs::create_dir_all(base_path)?;
|
|
||||||
let path = Engine::Fjall.db_path(base_path);
|
|
||||||
|
|
||||||
let source_state = self.keyspace.read_tx();
|
|
||||||
let copy_keyspace = fjall::Config::new(path).open()?;
|
|
||||||
|
|
||||||
for partition_name in self.keyspace.list_partitions() {
|
|
||||||
let source_partition = self
|
|
||||||
.keyspace
|
|
||||||
.open_partition(&partition_name, PartitionCreateOptions::default())?;
|
|
||||||
let copy_partition =
|
|
||||||
copy_keyspace.open_partition(&partition_name, PartitionCreateOptions::default())?;
|
|
||||||
|
|
||||||
for entry in source_state.iter(&source_partition) {
|
|
||||||
let (key, value) = entry?;
|
|
||||||
copy_partition.insert(key, value)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
copy_keyspace.persist(PersistMode::SyncAll)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----
|
|
||||||
|
|
||||||
fn get(&self, tree_idx: usize, key: &[u8]) -> Result<Option<Value>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
let val = tx.get(&tree, key)?;
|
|
||||||
match val {
|
|
||||||
None => Ok(None),
|
|
||||||
Some(v) => Ok(Some(v.to_vec())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn approximate_len(&self, tree_idx: usize) -> Result<usize> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
Ok(tree.approximate_len())
|
|
||||||
}
|
|
||||||
fn is_empty(&self, tree_idx: usize) -> Result<bool> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
Ok(tx.is_empty(&tree)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert(&self, tree_idx: usize, key: &[u8], value: &[u8]) -> Result<()> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let mut tx = self.keyspace.write_tx();
|
|
||||||
tx.insert(&tree, key, value);
|
|
||||||
tx.commit()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn remove(&self, tree_idx: usize, key: &[u8]) -> Result<()> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let mut tx = self.keyspace.write_tx();
|
|
||||||
tx.remove(&tree, key);
|
|
||||||
tx.commit()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clear(&self, tree_idx: usize) -> Result<()> {
|
|
||||||
let mut trees = self.trees.write();
|
|
||||||
|
|
||||||
if tree_idx >= trees.len() {
|
|
||||||
return Err(Error("invalid tree id".into()));
|
|
||||||
}
|
|
||||||
let (name, tree) = trees.remove(tree_idx);
|
|
||||||
|
|
||||||
self.keyspace.delete_partition(tree)?;
|
|
||||||
let tree = self
|
|
||||||
.keyspace
|
|
||||||
.open_partition(&name, PartitionCreateOptions::default())?;
|
|
||||||
trees.insert(tree_idx, (name, tree));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iter(&self, tree_idx: usize) -> Result<ValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
Ok(Box::new(tx.iter(&tree).map(iterator_remap)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iter_rev(&self, tree_idx: usize) -> Result<ValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
Ok(Box::new(tx.iter(&tree).rev().map(iterator_remap)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn range<'r>(
|
|
||||||
&self,
|
|
||||||
tree_idx: usize,
|
|
||||||
low: Bound<&'r [u8]>,
|
|
||||||
high: Bound<&'r [u8]>,
|
|
||||||
) -> Result<ValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
Ok(Box::new(
|
|
||||||
tx.range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high))
|
|
||||||
.map(iterator_remap),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
fn range_rev<'r>(
|
|
||||||
&self,
|
|
||||||
tree_idx: usize,
|
|
||||||
low: Bound<&'r [u8]>,
|
|
||||||
high: Bound<&'r [u8]>,
|
|
||||||
) -> Result<ValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let tx = self.keyspace.read_tx();
|
|
||||||
Ok(Box::new(
|
|
||||||
tx.range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high))
|
|
||||||
.rev()
|
|
||||||
.map(iterator_remap),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----
|
|
||||||
|
|
||||||
fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> {
|
|
||||||
let trees = self.trees.read();
|
|
||||||
let mut tx = FjallTx {
|
|
||||||
trees: &trees[..],
|
|
||||||
tx: self.keyspace.write_tx(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let res = f.try_on(&mut tx);
|
|
||||||
match res {
|
|
||||||
TxFnResult::Ok(on_commit) => {
|
|
||||||
tx.tx.commit().map_err(Error::from).map_err(TxError::Db)?;
|
|
||||||
Ok(on_commit)
|
|
||||||
}
|
|
||||||
TxFnResult::Abort => {
|
|
||||||
tx.tx.rollback();
|
|
||||||
Err(TxError::Abort(()))
|
|
||||||
}
|
|
||||||
TxFnResult::DbErr => {
|
|
||||||
tx.tx.rollback();
|
|
||||||
Err(TxError::Db(Error(
|
|
||||||
"(this message will be discarded)".into(),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----
|
|
||||||
|
|
||||||
struct FjallTx<'a> {
|
|
||||||
trees: &'a [(String, TransactionalPartitionHandle)],
|
|
||||||
tx: WriteTransaction<'a>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> FjallTx<'a> {
|
|
||||||
fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalPartitionHandle> {
|
|
||||||
self.trees.get(i).map(|tup| &tup.1).ok_or_else(|| {
|
|
||||||
TxOpError(Error(
|
|
||||||
"invalid tree id (it might have been openned after the transaction started)".into(),
|
|
||||||
))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ITx for FjallTx<'a> {
|
|
||||||
fn get(&self, tree_idx: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
match self.tx.get(tree, key)? {
|
|
||||||
Some(v) => Ok(Some(v.to_vec())),
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn len(&self, tree_idx: usize) -> TxOpResult<usize> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
Ok(self.tx.len(tree)? as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert(&mut self, tree_idx: usize, key: &[u8], value: &[u8]) -> TxOpResult<()> {
|
|
||||||
let tree = self.get_tree(tree_idx)?.clone();
|
|
||||||
self.tx.insert(&tree, key, value);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn remove(&mut self, tree_idx: usize, key: &[u8]) -> TxOpResult<()> {
|
|
||||||
let tree = self.get_tree(tree_idx)?.clone();
|
|
||||||
self.tx.remove(&tree, key);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn clear(&mut self, _tree_idx: usize) -> TxOpResult<()> {
|
|
||||||
unimplemented!("LSM tree clearing in cross-partition transaction is not supported")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iter(&self, tree_idx: usize) -> TxOpResult<TxValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?.clone();
|
|
||||||
Ok(Box::new(self.tx.iter(&tree).map(iterator_remap_tx)))
|
|
||||||
}
|
|
||||||
fn iter_rev(&self, tree_idx: usize) -> TxOpResult<TxValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?.clone();
|
|
||||||
Ok(Box::new(self.tx.iter(&tree).rev().map(iterator_remap_tx)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn range<'r>(
|
|
||||||
&self,
|
|
||||||
tree_idx: usize,
|
|
||||||
low: Bound<&'r [u8]>,
|
|
||||||
high: Bound<&'r [u8]>,
|
|
||||||
) -> TxOpResult<TxValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let low = clone_bound(low);
|
|
||||||
let high = clone_bound(high);
|
|
||||||
Ok(Box::new(
|
|
||||||
self.tx
|
|
||||||
.range::<Vec<u8>, ByteVecRangeBounds>(&tree, (low, high))
|
|
||||||
.map(iterator_remap_tx),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
fn range_rev<'r>(
|
|
||||||
&self,
|
|
||||||
tree_idx: usize,
|
|
||||||
low: Bound<&'r [u8]>,
|
|
||||||
high: Bound<&'r [u8]>,
|
|
||||||
) -> TxOpResult<TxValueIter<'_>> {
|
|
||||||
let tree = self.get_tree(tree_idx)?;
|
|
||||||
let low = clone_bound(low);
|
|
||||||
let high = clone_bound(high);
|
|
||||||
Ok(Box::new(
|
|
||||||
self.tx
|
|
||||||
.range::<Vec<u8>, ByteVecRangeBounds>(&tree, (low, high))
|
|
||||||
.rev()
|
|
||||||
.map(iterator_remap_tx),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- maps fjall's (k, v) to ours
|
|
||||||
|
|
||||||
fn iterator_remap(r: fjall::Result<(fjall::Slice, fjall::Slice)>) -> Result<(Value, Value)> {
|
|
||||||
r.map(|(k, v)| (k.to_vec(), v.to_vec()))
|
|
||||||
.map_err(|e| e.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iterator_remap_tx(r: fjall::Result<(fjall::Slice, fjall::Slice)>) -> TxOpResult<(Value, Value)> {
|
|
||||||
r.map(|(k, v)| (k.to_vec(), v.to_vec()))
|
|
||||||
.map_err(|e| e.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- utils to deal with Garage's tightness on Bound lifetimes
|
|
||||||
|
|
||||||
type ByteVecBound = Bound<Vec<u8>>;
|
|
||||||
type ByteVecRangeBounds = (ByteVecBound, ByteVecBound);
|
|
||||||
|
|
||||||
fn clone_bound(bound: Bound<&[u8]>) -> ByteVecBound {
|
|
||||||
let value = match bound {
|
|
||||||
Bound::Excluded(v) | Bound::Included(v) => v.to_vec(),
|
|
||||||
Bound::Unbounded => vec![],
|
|
||||||
};
|
|
||||||
|
|
||||||
match bound {
|
|
||||||
Bound::Included(_) => Bound::Included(value),
|
|
||||||
Bound::Excluded(_) => Bound::Excluded(value),
|
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- utils to encode table names --
|
|
||||||
|
|
||||||
fn encode_name(s: &str) -> Result<String> {
|
|
||||||
let base = 'A' as u32;
|
|
||||||
|
|
||||||
let mut ret = String::with_capacity(s.len() + 10);
|
|
||||||
for c in s.chars() {
|
|
||||||
if c.is_alphanumeric() || c == '_' || c == '-' || c == '#' {
|
|
||||||
ret.push(c);
|
|
||||||
} else if c <= u8::MAX as char {
|
|
||||||
ret.push('$');
|
|
||||||
let c_hi = c as u32 / 16;
|
|
||||||
let c_lo = c as u32 % 16;
|
|
||||||
ret.push(char::from_u32(base + c_hi).unwrap());
|
|
||||||
ret.push(char::from_u32(base + c_lo).unwrap());
|
|
||||||
} else {
|
|
||||||
return Err(Error(
|
|
||||||
format!("table name {} could not be safely encoded", s).into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(ret)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode_name(s: &str) -> Result<String> {
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
|
|
||||||
let errfn = || Error(format!("encoded table name {} is invalid", s).into());
|
|
||||||
let c_map = |c: char| {
|
|
||||||
let c = c as u32;
|
|
||||||
let base = 'A' as u32;
|
|
||||||
if (base..base + 16).contains(&c) {
|
|
||||||
Some(c - base)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut ret = String::with_capacity(s.len());
|
|
||||||
let mut it = s.chars();
|
|
||||||
while let Some(c) = it.next() {
|
|
||||||
if c == '$' {
|
|
||||||
let c_hi = it.next().and_then(c_map).ok_or_else(errfn)?;
|
|
||||||
let c_lo = it.next().and_then(c_map).ok_or_else(errfn)?;
|
|
||||||
let c_dec = char::try_from(c_hi * 16 + c_lo).map_err(|_| errfn())?;
|
|
||||||
ret.push(c_dec);
|
|
||||||
} else {
|
|
||||||
ret.push(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(ret)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_encdec_name() {
|
|
||||||
for name in [
|
|
||||||
"testname",
|
|
||||||
"test_name",
|
|
||||||
"test name",
|
|
||||||
"test$name",
|
|
||||||
"test:name@help.me$get/this**right",
|
|
||||||
] {
|
|
||||||
let encname = encode_name(name).unwrap();
|
|
||||||
assert!(!encname.contains(' '));
|
|
||||||
assert!(!encname.contains('.'));
|
|
||||||
assert!(!encname.contains('*'));
|
|
||||||
assert_eq!(*name, decode_name(&encname).unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate tracing;
|
extern crate tracing;
|
||||||
|
|
||||||
#[cfg(feature = "fjall")]
|
|
||||||
pub mod fjall_adapter;
|
|
||||||
#[cfg(feature = "lmdb")]
|
#[cfg(feature = "lmdb")]
|
||||||
pub mod lmdb_adapter;
|
pub mod lmdb_adapter;
|
||||||
#[cfg(feature = "sqlite")]
|
#[cfg(feature = "sqlite")]
|
||||||
|
|
@ -20,7 +18,7 @@ use std::cell::Cell;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use thiserror::Error;
|
use err_derive::Error;
|
||||||
|
|
||||||
pub use open::*;
|
pub use open::*;
|
||||||
|
|
||||||
|
|
@ -44,7 +42,7 @@ pub type TxValueIter<'a> = Box<dyn std::iter::Iterator<Item = TxOpResult<(Value,
|
||||||
// ----
|
// ----
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
pub struct Error(pub Cow<'static, str>);
|
pub struct Error(pub Cow<'static, str>);
|
||||||
|
|
||||||
impl From<std::io::Error> for Error {
|
impl From<std::io::Error> for Error {
|
||||||
|
|
@ -56,7 +54,7 @@ impl From<std::io::Error> for Error {
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
pub struct TxOpError(pub(crate) Error);
|
pub struct TxOpError(pub(crate) Error);
|
||||||
pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
|
pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
|
||||||
|
|
||||||
|
|
@ -106,44 +104,32 @@ impl Db {
|
||||||
result: Cell::new(None),
|
result: Cell::new(None),
|
||||||
};
|
};
|
||||||
let tx_res = self.0.transaction(&f);
|
let tx_res = self.0.transaction(&f);
|
||||||
let fn_res = f.result.into_inner();
|
let ret = f
|
||||||
|
.result
|
||||||
|
.into_inner()
|
||||||
|
.expect("Transaction did not store result");
|
||||||
|
|
||||||
match (tx_res, fn_res) {
|
match tx_res {
|
||||||
(Ok(on_commit), Some(Ok(value))) => {
|
Ok(on_commit) => match ret {
|
||||||
// Transaction succeeded
|
Ok(value) => {
|
||||||
// TxFn stored the value to return to the user in fn_res
|
on_commit.into_iter().for_each(|f| f());
|
||||||
// tx_res contains the on_commit list of callbacks, run them now
|
Ok(value)
|
||||||
on_commit.into_iter().for_each(|f| f());
|
}
|
||||||
Ok(value)
|
_ => unreachable!(),
|
||||||
}
|
},
|
||||||
(Err(TxError::Abort(())), Some(Err(TxError::Abort(e)))) => {
|
Err(TxError::Abort(())) => match ret {
|
||||||
// Transaction was aborted by user code
|
Err(TxError::Abort(e)) => Err(TxError::Abort(e)),
|
||||||
// The abort error value is stored in fn_res
|
_ => unreachable!(),
|
||||||
Err(TxError::Abort(e))
|
},
|
||||||
}
|
Err(TxError::Db(e2)) => match ret {
|
||||||
(Err(TxError::Db(_tx_e)), Some(Err(TxError::Db(fn_e)))) => {
|
// Ok was stored -> the error occurred when finalizing
|
||||||
// Transaction encountered a DB error in user code
|
// transaction
|
||||||
// The error value encountered is the one in fn_res,
|
Ok(_) => Err(TxError::Db(e2)),
|
||||||
// tx_res contains only a dummy error message
|
// An error was already stored: that's the one we want to
|
||||||
Err(TxError::Db(fn_e))
|
// return
|
||||||
}
|
Err(TxError::Db(e)) => Err(TxError::Db(e)),
|
||||||
(Err(TxError::Db(tx_e)), None) => {
|
_ => unreachable!(),
|
||||||
// Transaction encounterred a DB error when initializing the transaction,
|
},
|
||||||
// before user code was called
|
|
||||||
Err(TxError::Db(tx_e))
|
|
||||||
}
|
|
||||||
(Err(TxError::Db(tx_e)), Some(Ok(_))) => {
|
|
||||||
// Transaction encounterred a DB error when commiting the transaction,
|
|
||||||
// after user code was called
|
|
||||||
Err(TxError::Db(tx_e))
|
|
||||||
}
|
|
||||||
(tx_res, fn_res) => {
|
|
||||||
panic!(
|
|
||||||
"unexpected error case: tx_res={:?}, fn_res={:?}",
|
|
||||||
tx_res.map(|_| "..."),
|
|
||||||
fn_res.map(|x| x.map(|_| "...").map_err(|_| "..."))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -166,7 +152,7 @@ impl Db {
|
||||||
let tree_names = other.list_trees()?;
|
let tree_names = other.list_trees()?;
|
||||||
for name in tree_names {
|
for name in tree_names {
|
||||||
let tree = self.open_tree(&name)?;
|
let tree = self.open_tree(&name)?;
|
||||||
if !tree.is_empty()? {
|
if tree.len()? > 0 {
|
||||||
return Err(Error(format!("tree {} already contains data", name).into()));
|
return Err(Error(format!("tree {} already contains data", name).into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -208,12 +194,8 @@ impl Tree {
|
||||||
self.0.get(self.1, key.as_ref())
|
self.0.get(self.1, key.as_ref())
|
||||||
}
|
}
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn approximate_len(&self) -> Result<usize> {
|
pub fn len(&self) -> Result<usize> {
|
||||||
self.0.approximate_len(self.1)
|
self.0.len(self.1)
|
||||||
}
|
|
||||||
#[inline]
|
|
||||||
pub fn is_empty(&self) -> Result<bool> {
|
|
||||||
self.0.is_empty(self.1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
@ -351,8 +333,7 @@ pub(crate) trait IDb: Send + Sync {
|
||||||
fn snapshot(&self, path: &PathBuf) -> Result<()>;
|
fn snapshot(&self, path: &PathBuf) -> Result<()>;
|
||||||
|
|
||||||
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
|
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
|
||||||
fn approximate_len(&self, tree: usize) -> Result<usize>;
|
fn len(&self, tree: usize) -> Result<usize>;
|
||||||
fn is_empty(&self, tree: usize) -> Result<bool>;
|
|
||||||
|
|
||||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()>;
|
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()>;
|
||||||
fn remove(&self, tree: usize, key: &[u8]) -> Result<()>;
|
fn remove(&self, tree: usize, key: &[u8]) -> Result<()>;
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
use core::ops::Bound;
|
use core::ops::Bound;
|
||||||
|
use core::ptr::NonNull;
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::marker::PhantomPinned;
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
|
@ -11,55 +11,12 @@ use heed::types::ByteSlice;
|
||||||
use heed::{BytesDecode, Env, RoTxn, RwTxn, UntypedDatabase as Database};
|
use heed::{BytesDecode, Env, RoTxn, RwTxn, UntypedDatabase as Database};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
open::{Engine, OpenOpt},
|
|
||||||
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
||||||
TxResult, TxValueIter, Value, ValueIter,
|
TxResult, TxValueIter, Value, ValueIter,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use heed;
|
pub use heed;
|
||||||
|
|
||||||
// ---- top-level open function
|
|
||||||
|
|
||||||
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
|
|
||||||
info!("Opening LMDB database at: {}", path.display());
|
|
||||||
if let Err(e) = std::fs::create_dir_all(&path) {
|
|
||||||
return Err(Error(
|
|
||||||
format!("Unable to create LMDB data directory: {}", e).into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let map_size = match opt.lmdb_map_size {
|
|
||||||
None => recommended_map_size(),
|
|
||||||
Some(v) => v - (v % 4096),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut env_builder = heed::EnvOpenOptions::new();
|
|
||||||
env_builder.max_dbs(100);
|
|
||||||
env_builder.map_size(map_size);
|
|
||||||
env_builder.max_readers(2048);
|
|
||||||
unsafe {
|
|
||||||
env_builder.flag(heed::flags::Flags::MdbNoRdAhead);
|
|
||||||
env_builder.flag(heed::flags::Flags::MdbNoMetaSync);
|
|
||||||
if !opt.fsync {
|
|
||||||
env_builder.flag(heed::flags::Flags::MdbNoSync);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match env_builder.open(&path) {
|
|
||||||
Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => {
|
|
||||||
return Err(Error(
|
|
||||||
"OutOfMemory error while trying to open LMDB database. This can happen \
|
|
||||||
if your operating system is not allowing you to use sufficient virtual \
|
|
||||||
memory address space. Please check that no limit is set (ulimit -v). \
|
|
||||||
You may also try to set a smaller `lmdb_map_size` configuration parameter. \
|
|
||||||
On 32-bit machines, you should probably switch to another database engine."
|
|
||||||
.into(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
Err(e) => Err(Error(format!("Cannot open LMDB database: {}", e).into())),
|
|
||||||
Ok(db) => Ok(LmdbDb::init(db)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -- err
|
// -- err
|
||||||
|
|
||||||
impl From<heed::Error> for Error {
|
impl From<heed::Error> for Error {
|
||||||
|
|
@ -147,9 +104,10 @@ impl IDb for LmdbDb {
|
||||||
Ok(ret2)
|
Ok(ret2)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
|
fn snapshot(&self, to: &PathBuf) -> Result<()> {
|
||||||
std::fs::create_dir_all(base_path)?;
|
std::fs::create_dir_all(to)?;
|
||||||
let path = Engine::Lmdb.db_path(base_path);
|
let mut path = to.clone();
|
||||||
|
path.push("data.mdb");
|
||||||
self.db
|
self.db
|
||||||
.copy_to_path(path, heed::CompactionOption::Enabled)?;
|
.copy_to_path(path, heed::CompactionOption::Enabled)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -168,16 +126,11 @@ impl IDb for LmdbDb {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn approximate_len(&self, tree: usize) -> Result<usize> {
|
fn len(&self, tree: usize) -> Result<usize> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let tx = self.db.read_txn()?;
|
let tx = self.db.read_txn()?;
|
||||||
Ok(tree.len(&tx)?.try_into().unwrap())
|
Ok(tree.len(&tx)?.try_into().unwrap())
|
||||||
}
|
}
|
||||||
fn is_empty(&self, tree: usize) -> Result<bool> {
|
|
||||||
let tree = self.get_tree(tree)?;
|
|
||||||
let tx = self.db.read_txn()?;
|
|
||||||
Ok(tree.is_empty(&tx)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
|
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
|
|
@ -206,15 +159,13 @@ impl IDb for LmdbDb {
|
||||||
fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
|
fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let tx = self.db.read_txn()?;
|
let tx = self.db.read_txn()?;
|
||||||
// Safety: the cloture does not store its argument anywhere,
|
TxAndIterator::make(tx, |tx| Ok(tree.iter(tx)?))
|
||||||
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.iter(tx)?)) }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
|
fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let tx = self.db.read_txn()?;
|
let tx = self.db.read_txn()?;
|
||||||
// Safety: the cloture does not store its argument anywhere,
|
TxAndIterator::make(tx, |tx| Ok(tree.rev_iter(tx)?))
|
||||||
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.rev_iter(tx)?)) }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn range<'r>(
|
fn range<'r>(
|
||||||
|
|
@ -225,8 +176,7 @@ impl IDb for LmdbDb {
|
||||||
) -> Result<ValueIter<'_>> {
|
) -> Result<ValueIter<'_>> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let tx = self.db.read_txn()?;
|
let tx = self.db.read_txn()?;
|
||||||
// Safety: the cloture does not store its argument anywhere,
|
TxAndIterator::make(tx, |tx| Ok(tree.range(tx, &(low, high))?))
|
||||||
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.range(tx, &(low, high))?)) }
|
|
||||||
}
|
}
|
||||||
fn range_rev<'r>(
|
fn range_rev<'r>(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -236,8 +186,7 @@ impl IDb for LmdbDb {
|
||||||
) -> Result<ValueIter<'_>> {
|
) -> Result<ValueIter<'_>> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let tx = self.db.read_txn()?;
|
let tx = self.db.read_txn()?;
|
||||||
// Safety: the cloture does not store its argument anywhere,
|
TxAndIterator::make(tx, |tx| Ok(tree.rev_range(tx, &(low, high))?))
|
||||||
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.rev_range(tx, &(low, high))?)) }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
|
|
@ -367,41 +316,28 @@ where
|
||||||
{
|
{
|
||||||
tx: RoTxn<'a>,
|
tx: RoTxn<'a>,
|
||||||
iter: Option<I>,
|
iter: Option<I>,
|
||||||
_pin: PhantomPinned,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, I> TxAndIterator<'a, I>
|
impl<'a, I> TxAndIterator<'a, I>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
||||||
{
|
{
|
||||||
fn iter(self: Pin<&mut Self>) -> &mut Option<I> {
|
fn make<F>(tx: RoTxn<'a>, iterfun: F) -> Result<ValueIter<'a>>
|
||||||
// Safety: iter is not structural
|
|
||||||
unsafe { &mut self.get_unchecked_mut().iter }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Safety: iterfun must not store its argument anywhere but in its result.
|
|
||||||
unsafe fn make<F>(tx: RoTxn<'a>, iterfun: F) -> Result<ValueIter<'a>>
|
|
||||||
where
|
where
|
||||||
F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
|
F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
|
||||||
{
|
{
|
||||||
let res = TxAndIterator {
|
let res = TxAndIterator { tx, iter: None };
|
||||||
tx,
|
|
||||||
iter: None,
|
|
||||||
_pin: PhantomPinned,
|
|
||||||
};
|
|
||||||
let mut boxed = Box::pin(res);
|
let mut boxed = Box::pin(res);
|
||||||
|
|
||||||
let tx_lifetime_overextended: &'a RoTxn<'a> = {
|
// This unsafe allows us to bypass lifetime checks
|
||||||
let tx = &boxed.tx;
|
let tx = unsafe { NonNull::from(&boxed.tx).as_ref() };
|
||||||
// Safety: Artificially extending the lifetime because
|
let iter = iterfun(tx)?;
|
||||||
// this reference will only be stored and accessed from the
|
|
||||||
// returned ValueIter which guarantees that it is destroyed
|
|
||||||
// before the tx it is pointing to.
|
|
||||||
unsafe { &*&raw const *tx }
|
|
||||||
};
|
|
||||||
let iter = iterfun(&tx_lifetime_overextended)?;
|
|
||||||
|
|
||||||
*boxed.as_mut().iter() = Some(iter);
|
let mut_ref = Pin::as_mut(&mut boxed);
|
||||||
|
// This unsafe allows us to write in a field of the pinned struct
|
||||||
|
unsafe {
|
||||||
|
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Box::new(TxAndIteratorPin(boxed)))
|
Ok(Box::new(TxAndIteratorPin(boxed)))
|
||||||
}
|
}
|
||||||
|
|
@ -412,10 +348,8 @@ where
|
||||||
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
||||||
{
|
{
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
// Safety: `new_unchecked` is okay because we know this value is never
|
// ensure the iterator is dropped before the RoTxn it references
|
||||||
// used again after being dropped.
|
drop(self.iter.take());
|
||||||
let this = unsafe { Pin::new_unchecked(self) };
|
|
||||||
drop(this.iter().take());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -431,12 +365,13 @@ where
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let mut_ref = Pin::as_mut(&mut self.0);
|
let mut_ref = Pin::as_mut(&mut self.0);
|
||||||
let next = mut_ref.iter().as_mut()?.next()?;
|
// This unsafe allows us to mutably access the iterator field
|
||||||
let res = match next {
|
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
|
||||||
Err(e) => Err(e.into()),
|
match next {
|
||||||
Ok((k, v)) => Ok((k.to_vec(), v.to_vec())),
|
None => None,
|
||||||
};
|
Some(Err(e)) => Some(Err(e.into())),
|
||||||
Some(res)
|
Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ use crate::{Db, Error, Result};
|
||||||
pub enum Engine {
|
pub enum Engine {
|
||||||
Lmdb,
|
Lmdb,
|
||||||
Sqlite,
|
Sqlite,
|
||||||
Fjall,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Engine {
|
impl Engine {
|
||||||
|
|
@ -20,26 +19,8 @@ impl Engine {
|
||||||
match self {
|
match self {
|
||||||
Self::Lmdb => "lmdb",
|
Self::Lmdb => "lmdb",
|
||||||
Self::Sqlite => "sqlite",
|
Self::Sqlite => "sqlite",
|
||||||
Self::Fjall => "fjall",
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return engine-specific DB path from base path
|
|
||||||
pub fn db_path(&self, base_path: &PathBuf) -> PathBuf {
|
|
||||||
let mut ret = base_path.clone();
|
|
||||||
match self {
|
|
||||||
Self::Lmdb => {
|
|
||||||
ret.push("db.lmdb");
|
|
||||||
}
|
|
||||||
Self::Sqlite => {
|
|
||||||
ret.push("db.sqlite");
|
|
||||||
}
|
|
||||||
Self::Fjall => {
|
|
||||||
ret.push("db.fjall");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Engine {
|
impl std::fmt::Display for Engine {
|
||||||
|
|
@ -55,11 +36,10 @@ impl std::str::FromStr for Engine {
|
||||||
match text {
|
match text {
|
||||||
"lmdb" | "heed" => Ok(Self::Lmdb),
|
"lmdb" | "heed" => Ok(Self::Lmdb),
|
||||||
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
|
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
|
||||||
"fjall" => Ok(Self::Fjall),
|
|
||||||
"sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.4).".into())),
|
"sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.4).".into())),
|
||||||
kind => Err(Error(
|
kind => Err(Error(
|
||||||
format!(
|
format!(
|
||||||
"Invalid DB engine: {} (options are: lmdb, sqlite, fjall)",
|
"Invalid DB engine: {} (options are: lmdb, sqlite)",
|
||||||
kind
|
kind
|
||||||
)
|
)
|
||||||
.into(),
|
.into(),
|
||||||
|
|
@ -71,7 +51,6 @@ impl std::str::FromStr for Engine {
|
||||||
pub struct OpenOpt {
|
pub struct OpenOpt {
|
||||||
pub fsync: bool,
|
pub fsync: bool,
|
||||||
pub lmdb_map_size: Option<usize>,
|
pub lmdb_map_size: Option<usize>,
|
||||||
pub fjall_block_cache_size: Option<usize>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for OpenOpt {
|
impl Default for OpenOpt {
|
||||||
|
|
@ -79,7 +58,6 @@ impl Default for OpenOpt {
|
||||||
Self {
|
Self {
|
||||||
fsync: false,
|
fsync: false,
|
||||||
lmdb_map_size: None,
|
lmdb_map_size: None,
|
||||||
fjall_block_cache_size: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -88,15 +66,53 @@ pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> {
|
||||||
match engine {
|
match engine {
|
||||||
// ---- Sqlite DB ----
|
// ---- Sqlite DB ----
|
||||||
#[cfg(feature = "sqlite")]
|
#[cfg(feature = "sqlite")]
|
||||||
Engine::Sqlite => crate::sqlite_adapter::open_db(path, opt),
|
Engine::Sqlite => {
|
||||||
|
info!("Opening Sqlite database at: {}", path.display());
|
||||||
|
let manager = r2d2_sqlite::SqliteConnectionManager::file(path);
|
||||||
|
Ok(crate::sqlite_adapter::SqliteDb::new(manager, opt.fsync)?)
|
||||||
|
}
|
||||||
|
|
||||||
// ---- LMDB DB ----
|
// ---- LMDB DB ----
|
||||||
#[cfg(feature = "lmdb")]
|
#[cfg(feature = "lmdb")]
|
||||||
Engine::Lmdb => crate::lmdb_adapter::open_db(path, opt),
|
Engine::Lmdb => {
|
||||||
|
info!("Opening LMDB database at: {}", path.display());
|
||||||
|
if let Err(e) = std::fs::create_dir_all(&path) {
|
||||||
|
return Err(Error(
|
||||||
|
format!("Unable to create LMDB data directory: {}", e).into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
// ---- Fjall DB ----
|
let map_size = match opt.lmdb_map_size {
|
||||||
#[cfg(feature = "fjall")]
|
None => crate::lmdb_adapter::recommended_map_size(),
|
||||||
Engine::Fjall => crate::fjall_adapter::open_db(path, opt),
|
Some(v) => v - (v % 4096),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut env_builder = heed::EnvOpenOptions::new();
|
||||||
|
env_builder.max_dbs(100);
|
||||||
|
env_builder.map_size(map_size);
|
||||||
|
env_builder.max_readers(2048);
|
||||||
|
unsafe {
|
||||||
|
env_builder.flag(crate::lmdb_adapter::heed::flags::Flags::MdbNoRdAhead);
|
||||||
|
env_builder.flag(crate::lmdb_adapter::heed::flags::Flags::MdbNoMetaSync);
|
||||||
|
if !opt.fsync {
|
||||||
|
env_builder.flag(heed::flags::Flags::MdbNoSync);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match env_builder.open(&path) {
|
||||||
|
Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => {
|
||||||
|
return Err(Error(
|
||||||
|
"OutOfMemory error while trying to open LMDB database. This can happen \
|
||||||
|
if your operating system is not allowing you to use sufficient virtual \
|
||||||
|
memory address space. Please check that no limit is set (ulimit -v). \
|
||||||
|
You may also try to set a smaller `lmdb_map_size` configuration parameter. \
|
||||||
|
On 32-bit machines, you should probably switch to another database engine."
|
||||||
|
.into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Err(e) => Err(Error(format!("Cannot open LMDB database: {}", e).into())),
|
||||||
|
Ok(db) => Ok(crate::lmdb_adapter::LmdbDb::init(db)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Pattern is unreachable when all supported DB engines are compiled into binary. The allow
|
// Pattern is unreachable when all supported DB engines are compiled into binary. The allow
|
||||||
// attribute is added so that we won't have to change this match in case stop building
|
// attribute is added so that we won't have to change this match in case stop building
|
||||||
|
|
|
||||||
|
|
@ -11,23 +11,12 @@ use r2d2_sqlite::SqliteConnectionManager;
|
||||||
use rusqlite::{params, Rows, Statement, Transaction};
|
use rusqlite::{params, Rows, Statement, Transaction};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
open::{Engine, OpenOpt},
|
|
||||||
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
||||||
TxResult, TxValueIter, Value, ValueIter,
|
TxResult, TxValueIter, Value, ValueIter,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use rusqlite;
|
pub use rusqlite;
|
||||||
|
|
||||||
// ---- top-level open function
|
|
||||||
|
|
||||||
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
|
|
||||||
info!("Opening Sqlite database at: {}", path.display());
|
|
||||||
let manager = r2d2_sqlite::SqliteConnectionManager::file(path);
|
|
||||||
Ok(SqliteDb::new(manager, opt.fsync)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----
|
|
||||||
|
|
||||||
type Connection = r2d2::PooledConnection<SqliteConnectionManager>;
|
type Connection = r2d2::PooledConnection<SqliteConnectionManager>;
|
||||||
|
|
||||||
// --- err
|
// --- err
|
||||||
|
|
@ -150,18 +139,17 @@ impl IDb for SqliteDb {
|
||||||
Ok(trees)
|
Ok(trees)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
|
fn snapshot(&self, to: &PathBuf) -> Result<()> {
|
||||||
std::fs::create_dir_all(base_path)?;
|
fn progress(p: rusqlite::backup::Progress) {
|
||||||
let path = Engine::Sqlite
|
let percent = (p.pagecount - p.remaining) * 100 / p.pagecount;
|
||||||
.db_path(&base_path)
|
info!("Sqlite snapshot progress: {}%", percent);
|
||||||
.into_os_string()
|
}
|
||||||
.into_string()
|
std::fs::create_dir_all(to)?;
|
||||||
.map_err(|_| Error("invalid sqlite path string".into()))?;
|
let mut path = to.clone();
|
||||||
|
path.push("db.sqlite");
|
||||||
info!("Start sqlite VACUUM INTO `{}`", path);
|
self.db
|
||||||
self.db.get()?.execute("VACUUM INTO ?1", params![path])?;
|
.get()?
|
||||||
info!("Finished sqlite VACUUM INTO `{}`", path);
|
.backup(rusqlite::DatabaseName::Main, path, Some(progress))?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -172,7 +160,7 @@ impl IDb for SqliteDb {
|
||||||
self.internal_get(&self.db.get()?, &tree, key)
|
self.internal_get(&self.db.get()?, &tree, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn approximate_len(&self, tree: usize) -> Result<usize> {
|
fn len(&self, tree: usize) -> Result<usize> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let db = self.db.get()?;
|
let db = self.db.get()?;
|
||||||
|
|
||||||
|
|
@ -184,10 +172,6 @@ impl IDb for SqliteDb {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_empty(&self, tree: usize) -> Result<bool> {
|
|
||||||
Ok(self.approximate_len(tree)? == 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
|
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
|
||||||
let tree = self.get_tree(tree)?;
|
let tree = self.get_tree(tree)?;
|
||||||
let db = self.db.get()?;
|
let db = self.db.get()?;
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
fn test_suite(db: Db) {
|
fn test_suite(db: Db) {
|
||||||
let tree = db.open_tree("tree:this_is_a_tree").unwrap();
|
let tree = db.open_tree("tree").unwrap();
|
||||||
|
|
||||||
let ka: &[u8] = &b"test"[..];
|
let ka: &[u8] = &b"test"[..];
|
||||||
let kb: &[u8] = &b"zwello"[..];
|
let kb: &[u8] = &b"zwello"[..];
|
||||||
|
|
@ -14,7 +14,7 @@ fn test_suite(db: Db) {
|
||||||
|
|
||||||
assert!(tree.insert(ka, va).is_ok());
|
assert!(tree.insert(ka, va).is_ok());
|
||||||
assert_eq!(tree.get(ka).unwrap().unwrap(), va);
|
assert_eq!(tree.get(ka).unwrap().unwrap(), va);
|
||||||
assert_eq!(tree.iter().unwrap().count(), 1);
|
assert_eq!(tree.len().unwrap(), 1);
|
||||||
|
|
||||||
// ---- test transaction logic ----
|
// ---- test transaction logic ----
|
||||||
|
|
||||||
|
|
@ -148,15 +148,3 @@ fn test_sqlite_db() {
|
||||||
let db = SqliteDb::new(manager, false).unwrap();
|
let db = SqliteDb::new(manager, false).unwrap();
|
||||||
test_suite(db);
|
test_suite(db);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[cfg(feature = "fjall")]
|
|
||||||
fn test_fjall_db() {
|
|
||||||
use crate::fjall_adapter::{fjall, FjallDb};
|
|
||||||
|
|
||||||
let path = mktemp::Temp::new_dir().unwrap();
|
|
||||||
let config = fjall::Config::new(path).temporary(true);
|
|
||||||
let keyspace = config.open_transactional().unwrap();
|
|
||||||
let db = FjallDb::init(keyspace);
|
|
||||||
test_suite(db);
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -57,7 +57,6 @@ opentelemetry.workspace = true
|
||||||
opentelemetry-prometheus = { workspace = true, optional = true }
|
opentelemetry-prometheus = { workspace = true, optional = true }
|
||||||
opentelemetry-otlp = { workspace = true, optional = true }
|
opentelemetry-otlp = { workspace = true, optional = true }
|
||||||
syslog-tracing = { workspace = true, optional = true }
|
syslog-tracing = { workspace = true, optional = true }
|
||||||
tracing-journald = { workspace = true, optional = true }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
garage_api_common.workspace = true
|
garage_api_common.workspace = true
|
||||||
|
|
@ -91,7 +90,6 @@ k2v = [ "garage_util/k2v", "garage_api_k2v" ]
|
||||||
# Database engines
|
# Database engines
|
||||||
lmdb = [ "garage_model/lmdb" ]
|
lmdb = [ "garage_model/lmdb" ]
|
||||||
sqlite = [ "garage_model/sqlite" ]
|
sqlite = [ "garage_model/sqlite" ]
|
||||||
fjall = [ "garage_model/fjall" ]
|
|
||||||
|
|
||||||
# Automatic registration and discovery via Consul API
|
# Automatic registration and discovery via Consul API
|
||||||
consul-discovery = [ "garage_rpc/consul-discovery" ]
|
consul-discovery = [ "garage_rpc/consul-discovery" ]
|
||||||
|
|
@ -103,8 +101,6 @@ metrics = [ "garage_api_admin/metrics", "opentelemetry-prometheus" ]
|
||||||
telemetry-otlp = [ "opentelemetry-otlp" ]
|
telemetry-otlp = [ "opentelemetry-otlp" ]
|
||||||
# Logging to syslog
|
# Logging to syslog
|
||||||
syslog = [ "syslog-tracing" ]
|
syslog = [ "syslog-tracing" ]
|
||||||
# Logging to journald
|
|
||||||
journald = [ "tracing-journald" ]
|
|
||||||
|
|
||||||
# NOTE: bundled-libs and system-libs should be treat as mutually exclusive;
|
# NOTE: bundled-libs and system-libs should be treat as mutually exclusive;
|
||||||
# exactly one of them should be enabled.
|
# exactly one of them should be enabled.
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,6 @@ impl AdminRpcHandler {
|
||||||
let mut obj_dels = 0;
|
let mut obj_dels = 0;
|
||||||
let mut mpu_dels = 0;
|
let mut mpu_dels = 0;
|
||||||
let mut ver_dels = 0;
|
let mut ver_dels = 0;
|
||||||
let mut br_dels = 0;
|
|
||||||
|
|
||||||
for hash in blocks {
|
for hash in blocks {
|
||||||
let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?;
|
let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?;
|
||||||
|
|
@ -132,19 +131,12 @@ impl AdminRpcHandler {
|
||||||
ver_dels += 1;
|
ver_dels += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !br.deleted.get() {
|
|
||||||
let mut br = br;
|
|
||||||
br.deleted.set();
|
|
||||||
self.garage.block_ref_table.insert(&br).await?;
|
|
||||||
br_dels += 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(AdminRpc::Ok(format!(
|
Ok(AdminRpc::Ok(format!(
|
||||||
"Purged {} blocks: marked {} block refs, {} versions, {} objects and {} multipart uploads as deleted",
|
"Purged {} blocks, {} versions, {} objects, {} multipart uploads",
|
||||||
blocks.len(),
|
blocks.len(),
|
||||||
br_dels,
|
|
||||||
ver_dels,
|
ver_dels,
|
||||||
obj_dels,
|
obj_dels,
|
||||||
mpu_dels,
|
mpu_dels,
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ impl AdminRpcHandler {
|
||||||
|
|
||||||
#[allow(clippy::ptr_arg)]
|
#[allow(clippy::ptr_arg)]
|
||||||
async fn handle_create_bucket(&self, name: &String) -> Result<AdminRpc, Error> {
|
async fn handle_create_bucket(&self, name: &String) -> Result<AdminRpc, Error> {
|
||||||
if !is_valid_bucket_name(name, self.garage.config.allow_punycode) {
|
if !is_valid_bucket_name(name) {
|
||||||
return Err(Error::BadRequest(format!(
|
return Err(Error::BadRequest(format!(
|
||||||
"{}: {}",
|
"{}: {}",
|
||||||
name, INVALID_BUCKET_NAME_MESSAGE
|
name, INVALID_BUCKET_NAME_MESSAGE
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@ use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use format_table::format_table_to_string;
|
use format_table::format_table_to_string;
|
||||||
|
|
||||||
|
use garage_net::endpoint::RpcInFlightLimiter;
|
||||||
|
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::Error as GarageError;
|
use garage_util::error::Error as GarageError;
|
||||||
|
|
@ -118,6 +120,7 @@ impl AdminRpcHandler {
|
||||||
&node,
|
&node,
|
||||||
AdminRpc::LaunchRepair(opt_to_send.clone()),
|
AdminRpc::LaunchRepair(opt_to_send.clone()),
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
if !matches!(resp, Ok(Ok(_))) {
|
if !matches!(resp, Ok(Ok(_))) {
|
||||||
|
|
@ -164,7 +167,12 @@ impl AdminRpcHandler {
|
||||||
let node_id = (*node).into();
|
let node_id = (*node).into();
|
||||||
match self
|
match self
|
||||||
.endpoint
|
.endpoint
|
||||||
.call(&node_id, AdminRpc::Stats(opt), PRIO_NORMAL)
|
.call(
|
||||||
|
&node_id,
|
||||||
|
AdminRpc::Stats(opt),
|
||||||
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(Ok(AdminRpc::Ok(s))) => writeln!(&mut ret, "{}", s).unwrap(),
|
Ok(Ok(AdminRpc::Ok(s))) => writeln!(&mut ret, "{}", s).unwrap(),
|
||||||
|
|
@ -219,7 +227,7 @@ impl AdminRpcHandler {
|
||||||
|
|
||||||
// Gather block manager statistics
|
// Gather block manager statistics
|
||||||
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
|
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
|
||||||
let rc_len = self.garage.block_manager.rc_approximate_len()?.to_string();
|
let rc_len = self.garage.block_manager.rc_len()?.to_string();
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut ret,
|
&mut ret,
|
||||||
|
|
@ -230,13 +238,13 @@ impl AdminRpcHandler {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut ret,
|
&mut ret,
|
||||||
" resync queue length: {}",
|
" resync queue length: {}",
|
||||||
self.garage.block_manager.resync.queue_approximate_len()?
|
self.garage.block_manager.resync.queue_len()?
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut ret,
|
&mut ret,
|
||||||
" blocks with resync errors: {}",
|
" blocks with resync errors: {}",
|
||||||
self.garage.block_manager.resync.errors_approximate_len()?
|
self.garage.block_manager.resync.errors_len()?
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|
@ -346,21 +354,16 @@ impl AdminRpcHandler {
|
||||||
F: TableSchema + 'static,
|
F: TableSchema + 'static,
|
||||||
R: TableReplication + 'static,
|
R: TableReplication + 'static,
|
||||||
{
|
{
|
||||||
let data_len = t
|
let data_len = t.data.store.len().map_err(GarageError::from)?.to_string();
|
||||||
.data
|
let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string();
|
||||||
.store
|
|
||||||
.approximate_len()
|
|
||||||
.map_err(GarageError::from)?
|
|
||||||
.to_string();
|
|
||||||
let mkl_len = t.merkle_updater.merkle_tree_approximate_len()?.to_string();
|
|
||||||
|
|
||||||
Ok(format!(
|
Ok(format!(
|
||||||
" {}\t{}\t{}\t{}\t{}",
|
" {}\t{}\t{}\t{}\t{}",
|
||||||
F::TABLE_NAME,
|
F::TABLE_NAME,
|
||||||
data_len,
|
data_len,
|
||||||
mkl_len,
|
mkl_len,
|
||||||
t.merkle_updater.todo_approximate_len()?,
|
t.merkle_updater.todo_len()?,
|
||||||
t.data.gc_todo_approximate_len()?
|
t.data.gc_todo_len()?
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -412,6 +415,7 @@ impl AdminRpcHandler {
|
||||||
variable: variable.clone(),
|
variable: variable.clone(),
|
||||||
}),
|
}),
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
|
|
@ -461,6 +465,7 @@ impl AdminRpcHandler {
|
||||||
value: value.to_string(),
|
value: value.to_string(),
|
||||||
}),
|
}),
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
|
|
@ -493,6 +498,7 @@ impl AdminRpcHandler {
|
||||||
&to,
|
&to,
|
||||||
AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }),
|
AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }),
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
}))
|
}))
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use format_table::format_table;
|
use format_table::format_table;
|
||||||
|
use garage_net::endpoint::RpcInFlightLimiter;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
|
|
||||||
use garage_rpc::layout::*;
|
use garage_rpc::layout::*;
|
||||||
|
|
@ -200,7 +201,12 @@ pub async fn cmd_connect(
|
||||||
args: ConnectNodeOpt,
|
args: ConnectNodeOpt,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
match rpc_cli
|
match rpc_cli
|
||||||
.call(&rpc_host, SystemRpc::Connect(args.node), PRIO_NORMAL)
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
SystemRpc::Connect(args.node),
|
||||||
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::Ok => {
|
SystemRpc::Ok => {
|
||||||
|
|
@ -216,7 +222,10 @@ pub async fn cmd_admin(
|
||||||
rpc_host: NodeID,
|
rpc_host: NodeID,
|
||||||
args: AdminRpc,
|
args: AdminRpc,
|
||||||
) -> Result<(), HelperError> {
|
) -> Result<(), HelperError> {
|
||||||
match rpc_cli.call(&rpc_host, args, PRIO_NORMAL).await?? {
|
match rpc_cli
|
||||||
|
.call(&rpc_host, args, PRIO_NORMAL, RpcInFlightLimiter::NoLimit)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
AdminRpc::Ok(msg) => {
|
AdminRpc::Ok(msg) => {
|
||||||
println!("{}", msg);
|
println!("{}", msg);
|
||||||
}
|
}
|
||||||
|
|
@ -271,7 +280,12 @@ pub async fn fetch_status(
|
||||||
rpc_host: NodeID,
|
rpc_host: NodeID,
|
||||||
) -> Result<Vec<KnownNodeInfo>, Error> {
|
) -> Result<Vec<KnownNodeInfo>, Error> {
|
||||||
match rpc_cli
|
match rpc_cli
|
||||||
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
SystemRpc::GetKnownNodes,
|
||||||
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
use bytesize::ByteSize;
|
use bytesize::ByteSize;
|
||||||
|
|
||||||
use format_table::format_table;
|
use format_table::format_table;
|
||||||
|
use garage_net::endpoint::RpcInFlightLimiter;
|
||||||
use garage_util::crdt::Crdt;
|
use garage_util::crdt::Crdt;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
|
|
||||||
|
|
@ -45,7 +46,12 @@ pub async fn cmd_assign_role(
|
||||||
args: AssignRoleOpt,
|
args: AssignRoleOpt,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let status = match rpc_cli
|
let status = match rpc_cli
|
||||||
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
SystemRpc::GetKnownNodes,
|
||||||
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||||
|
|
@ -475,7 +481,12 @@ pub async fn fetch_layout(
|
||||||
rpc_host: NodeID,
|
rpc_host: NodeID,
|
||||||
) -> Result<LayoutHistory, Error> {
|
) -> Result<LayoutHistory, Error> {
|
||||||
match rpc_cli
|
match rpc_cli
|
||||||
.call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL)
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
SystemRpc::PullClusterLayout,
|
||||||
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
||||||
|
|
@ -493,6 +504,7 @@ pub async fn send_layout(
|
||||||
&rpc_host,
|
&rpc_host,
|
||||||
SystemRpc::AdvertiseClusterLayout(layout),
|
SystemRpc::AdvertiseClusterLayout(layout),
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await??;
|
.await??;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
|
|
@ -466,10 +466,6 @@ pub enum RepairWhat {
|
||||||
/// Repair (resync/rebalance) the set of stored blocks in the cluster
|
/// Repair (resync/rebalance) the set of stored blocks in the cluster
|
||||||
#[structopt(name = "blocks", version = garage_version())]
|
#[structopt(name = "blocks", version = garage_version())]
|
||||||
Blocks,
|
Blocks,
|
||||||
/// Clear the block resync queue. The list of blocks in errored state
|
|
||||||
/// is cleared as well. You MUST run `garage repair blocks` after invoking this.
|
|
||||||
#[structopt(name = "clear-resync-queue", version = garage_version())]
|
|
||||||
ClearResyncQueue,
|
|
||||||
/// Repropagate object deletions to the version table
|
/// Repropagate object deletions to the version table
|
||||||
#[structopt(name = "versions", version = garage_version())]
|
#[structopt(name = "versions", version = garage_version())]
|
||||||
Versions,
|
Versions,
|
||||||
|
|
@ -482,9 +478,6 @@ pub enum RepairWhat {
|
||||||
/// Recalculate block reference counters
|
/// Recalculate block reference counters
|
||||||
#[structopt(name = "block-rc", version = garage_version())]
|
#[structopt(name = "block-rc", version = garage_version())]
|
||||||
BlockRc,
|
BlockRc,
|
||||||
/// Fix inconsistency in bucket aliases (WARNING: EXPERIMENTAL)
|
|
||||||
#[structopt(name = "aliases", version = garage_version())]
|
|
||||||
Aliases,
|
|
||||||
/// Verify integrity of all blocks on disc
|
/// Verify integrity of all blocks on disc
|
||||||
#[structopt(name = "scrub", version = garage_version())]
|
#[structopt(name = "scrub", version = garage_version())]
|
||||||
Scrub {
|
Scrub {
|
||||||
|
|
|
||||||
|
|
@ -208,43 +208,6 @@ fn init_logging(opt: &Opt) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if std::env::var("GARAGE_LOG_TO_JOURNALD")
|
|
||||||
.map(|x| x == "1" || x == "true")
|
|
||||||
.unwrap_or(false)
|
|
||||||
{
|
|
||||||
#[cfg(feature = "journald")]
|
|
||||||
{
|
|
||||||
use tracing_journald::{Priority, PriorityMappings};
|
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
|
||||||
use tracing_subscriber::util::SubscriberInitExt;
|
|
||||||
|
|
||||||
let registry = tracing_subscriber::registry()
|
|
||||||
.with(tracing_subscriber::fmt::layer().with_writer(std::io::sink))
|
|
||||||
.with(env_filter);
|
|
||||||
match tracing_journald::layer() {
|
|
||||||
Ok(layer) => {
|
|
||||||
registry
|
|
||||||
.with(layer.with_priority_mappings(PriorityMappings {
|
|
||||||
info: Priority::Informational,
|
|
||||||
debug: Priority::Debug,
|
|
||||||
..PriorityMappings::new()
|
|
||||||
}))
|
|
||||||
.init();
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Couldn't connect to journald: {}.", e);
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#[cfg(not(feature = "journald"))]
|
|
||||||
{
|
|
||||||
eprintln!("Journald support is not enabled in this build.");
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
.with_writer(std::io::stderr)
|
.with_writer(std::io::stderr)
|
||||||
.with_env_filter(env_filter)
|
.with_env_filter(env_filter)
|
||||||
|
|
@ -281,7 +244,7 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
|
||||||
// Generate a temporary keypair for our RPC client
|
// Generate a temporary keypair for our RPC client
|
||||||
let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
|
let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
|
||||||
|
|
||||||
let netapp = NetApp::new(GARAGE_VERSION_TAG, network_key, sk, None);
|
let netapp = NetApp::new(GARAGE_VERSION_TAG, network_key, sk, None, None);
|
||||||
|
|
||||||
// Find and parse the address of the target host
|
// Find and parse the address of the target host
|
||||||
let (id, addr, is_default_addr) = if let Some(h) = opt.rpc_host {
|
let (id, addr, is_default_addr) = if let Some(h) = opt.rpc_host {
|
||||||
|
|
|
||||||
|
|
@ -88,15 +88,6 @@ pub async fn launch_online_repair(
|
||||||
garage.block_manager.clone(),
|
garage.block_manager.clone(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
RepairWhat::Aliases => {
|
|
||||||
info!("Repairing bucket aliases (foreground)");
|
|
||||||
garage.locked_helper().await.repair_aliases().await?;
|
|
||||||
}
|
|
||||||
RepairWhat::ClearResyncQueue => {
|
|
||||||
let garage = garage.clone();
|
|
||||||
tokio::task::spawn_blocking(move || garage.block_manager.resync.clear_resync_queue())
|
|
||||||
.await??
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -183,21 +183,10 @@ fn watch_shutdown_signal() -> watch::Receiver<bool> {
|
||||||
let mut sigterm =
|
let mut sigterm =
|
||||||
signal(SignalKind::terminate()).expect("Failed to install SIGTERM handler");
|
signal(SignalKind::terminate()).expect("Failed to install SIGTERM handler");
|
||||||
let mut sighup = signal(SignalKind::hangup()).expect("Failed to install SIGHUP handler");
|
let mut sighup = signal(SignalKind::hangup()).expect("Failed to install SIGHUP handler");
|
||||||
loop {
|
tokio::select! {
|
||||||
tokio::select! {
|
_ = sigint.recv() => info!("Received SIGINT, shutting down."),
|
||||||
_ = sigint.recv() => {
|
_ = sigterm.recv() => info!("Received SIGTERM, shutting down."),
|
||||||
info!("Received SIGINT, shutting down.");
|
_ = sighup.recv() => info!("Received SIGHUP, shutting down."),
|
||||||
break
|
|
||||||
}
|
|
||||||
_ = sigterm.recv() => {
|
|
||||||
info!("Received SIGTERM, shutting down.");
|
|
||||||
break
|
|
||||||
}
|
|
||||||
_ = sighup.recv() => {
|
|
||||||
info!("Received SIGHUP, reload not supported.");
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
send_cancel.send(true).unwrap();
|
send_cancel.send(true).unwrap();
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -63,8 +63,6 @@ rpc_bind_addr = "127.0.0.1:{rpc_port}"
|
||||||
rpc_public_addr = "127.0.0.1:{rpc_port}"
|
rpc_public_addr = "127.0.0.1:{rpc_port}"
|
||||||
rpc_secret = "{secret}"
|
rpc_secret = "{secret}"
|
||||||
|
|
||||||
allow_punycode = true
|
|
||||||
|
|
||||||
[s3_api]
|
[s3_api]
|
||||||
s3_region = "{region}"
|
s3_region = "{region}"
|
||||||
api_bind_addr = "127.0.0.1:{s3_port}"
|
api_bind_addr = "127.0.0.1:{s3_port}"
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,6 @@ async fn test_precondition() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0);
|
let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0);
|
||||||
let same_date = DateTime::from_secs_f64(last_modified.as_secs_f64());
|
|
||||||
let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0);
|
let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0);
|
||||||
{
|
{
|
||||||
let err = ctx
|
let err = ctx
|
||||||
|
|
@ -213,18 +212,6 @@ async fn test_precondition() {
|
||||||
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
|
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
|
||||||
);
|
);
|
||||||
|
|
||||||
let err = ctx
|
|
||||||
.client
|
|
||||||
.get_object()
|
|
||||||
.bucket(&bucket)
|
|
||||||
.key(STD_KEY)
|
|
||||||
.if_modified_since(same_date)
|
|
||||||
.send()
|
|
||||||
.await;
|
|
||||||
assert!(
|
|
||||||
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
|
|
||||||
);
|
|
||||||
|
|
||||||
let o = ctx
|
let o = ctx
|
||||||
.client
|
.client
|
||||||
.get_object()
|
.get_object()
|
||||||
|
|
@ -249,17 +236,6 @@ async fn test_precondition() {
|
||||||
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412)
|
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412)
|
||||||
);
|
);
|
||||||
|
|
||||||
let o = ctx
|
|
||||||
.client
|
|
||||||
.get_object()
|
|
||||||
.bucket(&bucket)
|
|
||||||
.key(STD_KEY)
|
|
||||||
.if_unmodified_since(same_date)
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
|
|
||||||
|
|
||||||
let o = ctx
|
let o = ctx
|
||||||
.client
|
.client
|
||||||
.get_object()
|
.get_object()
|
||||||
|
|
|
||||||
|
|
@ -533,118 +533,3 @@ async fn test_website_check_domain() {
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_website_puny() {
|
|
||||||
const BCKT_NAME: &str = "xn--pda.eu";
|
|
||||||
let ctx = common::context();
|
|
||||||
let bucket = ctx.create_bucket(BCKT_NAME);
|
|
||||||
|
|
||||||
let data = ByteStream::from_static(BODY);
|
|
||||||
|
|
||||||
ctx.client
|
|
||||||
.put_object()
|
|
||||||
.bucket(&bucket)
|
|
||||||
.key("index.html")
|
|
||||||
.body(data)
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let client = Client::builder(TokioExecutor::new()).build_http();
|
|
||||||
|
|
||||||
let req = |suffix| {
|
|
||||||
Request::builder()
|
|
||||||
.method("GET")
|
|
||||||
.uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port))
|
|
||||||
.header("Host", format!("{}{}", BCKT_NAME, suffix))
|
|
||||||
.body(Body::new(Bytes::new()))
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
ctx.garage
|
|
||||||
.command()
|
|
||||||
.args(["bucket", "website", "--allow", BCKT_NAME])
|
|
||||||
.quiet()
|
|
||||||
.expect_success_status("Could not allow website on bucket");
|
|
||||||
|
|
||||||
let mut resp = client.request(req("")).await.unwrap();
|
|
||||||
assert_eq!(resp.status(), StatusCode::OK);
|
|
||||||
assert_eq!(
|
|
||||||
resp.into_body().collect().await.unwrap().to_bytes(),
|
|
||||||
BODY.as_ref()
|
|
||||||
);
|
|
||||||
|
|
||||||
resp = client.request(req(".web.garage")).await.unwrap();
|
|
||||||
assert_eq!(resp.status(), StatusCode::OK);
|
|
||||||
assert_eq!(
|
|
||||||
resp.into_body().collect().await.unwrap().to_bytes(),
|
|
||||||
BODY.as_ref()
|
|
||||||
);
|
|
||||||
|
|
||||||
for bname in [
|
|
||||||
BCKT_NAME.to_string(),
|
|
||||||
format!("{BCKT_NAME}.web.garage"),
|
|
||||||
format!("{BCKT_NAME}.s3.garage"),
|
|
||||||
] {
|
|
||||||
let admin_req = || {
|
|
||||||
Request::builder()
|
|
||||||
.method("GET")
|
|
||||||
.uri(format!(
|
|
||||||
"http://127.0.0.1:{0}/check?domain={1}",
|
|
||||||
ctx.garage.admin_port, bname
|
|
||||||
))
|
|
||||||
.body(Body::new(Bytes::new()))
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
let admin_resp = client.request(admin_req()).await.unwrap();
|
|
||||||
assert_eq!(admin_resp.status(), StatusCode::OK);
|
|
||||||
assert_eq!(
|
|
||||||
admin_resp.into_body().collect().await.unwrap().to_bytes(),
|
|
||||||
format!("Domain '{bname}' is managed by Garage").as_bytes()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_website_object_not_found() {
|
|
||||||
const BCKT_NAME: &str = "not-found";
|
|
||||||
let ctx = common::context();
|
|
||||||
let _bucket = ctx.create_bucket(BCKT_NAME);
|
|
||||||
|
|
||||||
let client = Client::builder(TokioExecutor::new()).build_http();
|
|
||||||
|
|
||||||
let req = |suffix| {
|
|
||||||
Request::builder()
|
|
||||||
.method("GET")
|
|
||||||
.uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port))
|
|
||||||
.header("Host", format!("{}{}", BCKT_NAME, suffix))
|
|
||||||
.body(Body::new(Bytes::new()))
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
ctx.garage
|
|
||||||
.command()
|
|
||||||
.args(["bucket", "website", "--allow", BCKT_NAME])
|
|
||||||
.quiet()
|
|
||||||
.expect_success_status("Could not allow website on bucket");
|
|
||||||
|
|
||||||
let resp = client.request(req("")).await.unwrap();
|
|
||||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
|
||||||
// the error we return by default are *not* xml
|
|
||||||
assert_eq!(
|
|
||||||
resp.headers().get(http::header::CONTENT_TYPE).unwrap(),
|
|
||||||
"text/html; charset=utf-8"
|
|
||||||
);
|
|
||||||
let result = String::from_utf8(
|
|
||||||
resp.into_body()
|
|
||||||
.collect()
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.to_bytes()
|
|
||||||
.to_vec(),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert!(result.contains("not found"));
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -72,16 +72,6 @@ impl K2vClient {
|
||||||
.enable_http2()
|
.enable_http2()
|
||||||
.build();
|
.build();
|
||||||
let client = HttpClient::builder(TokioExecutor::new()).build(connector);
|
let client = HttpClient::builder(TokioExecutor::new()).build(connector);
|
||||||
Self::new_with_client(config, client)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new K2V client with an external client.
|
|
||||||
/// Useful for example if you plan on creating many clients but you want to mutualize the
|
|
||||||
/// underlying thread pools & co.
|
|
||||||
pub fn new_with_client(
|
|
||||||
config: K2vClientConfig,
|
|
||||||
client: HttpClient<HttpsConnector<HttpConnector>, Body>,
|
|
||||||
) -> Result<Self, Error> {
|
|
||||||
let user_agent: std::borrow::Cow<str> = match &config.user_agent {
|
let user_agent: std::borrow::Cow<str> = match &config.user_agent {
|
||||||
Some(ua) => ua.into(),
|
Some(ua) => ua.into(),
|
||||||
None => format!("k2v/{}", env!("CARGO_PKG_VERSION")).into(),
|
None => format!("k2v/{}", env!("CARGO_PKG_VERSION")).into(),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_model"
|
name = "garage_model"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -24,7 +24,7 @@ garage_net.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
blake2.workspace = true
|
blake2.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
http.workspace = true
|
http.workspace = true
|
||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
|
|
@ -44,4 +44,3 @@ default = [ "lmdb", "sqlite" ]
|
||||||
k2v = [ "garage_util/k2v" ]
|
k2v = [ "garage_util/k2v" ]
|
||||||
lmdb = [ "garage_db/lmdb" ]
|
lmdb = [ "garage_db/lmdb" ]
|
||||||
sqlite = [ "garage_db/sqlite" ]
|
sqlite = [ "garage_db/sqlite" ]
|
||||||
fjall = [ "garage_db/fjall" ]
|
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,14 @@ mod v08 {
|
||||||
pub use v08::*;
|
pub use v08::*;
|
||||||
|
|
||||||
impl BucketAlias {
|
impl BucketAlias {
|
||||||
pub fn new(name: String, ts: u64, bucket_id: Option<Uuid>) -> Self {
|
pub fn new(name: String, ts: u64, bucket_id: Option<Uuid>) -> Option<Self> {
|
||||||
BucketAlias {
|
if !is_valid_bucket_name(&name) {
|
||||||
name,
|
None
|
||||||
state: crdt::Lww::raw(ts, bucket_id),
|
} else {
|
||||||
|
Some(BucketAlias {
|
||||||
|
name,
|
||||||
|
state: crdt::Lww::raw(ts, bucket_id),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,7 +80,7 @@ impl TableSchema for BucketAliasTable {
|
||||||
/// In the case of Garage, bucket names must not be hex-encoded
|
/// In the case of Garage, bucket names must not be hex-encoded
|
||||||
/// 32 byte string, which is excluded thanks to the
|
/// 32 byte string, which is excluded thanks to the
|
||||||
/// maximum length of 63 bytes given in the spec.
|
/// maximum length of 63 bytes given in the spec.
|
||||||
pub fn is_valid_bucket_name(n: &str, puny: bool) -> bool {
|
pub fn is_valid_bucket_name(n: &str) -> bool {
|
||||||
// Bucket names must be between 3 and 63 characters
|
// Bucket names must be between 3 and 63 characters
|
||||||
n.len() >= 3 && n.len() <= 63
|
n.len() >= 3 && n.len() <= 63
|
||||||
// Bucket names must be composed of lowercase letters, numbers,
|
// Bucket names must be composed of lowercase letters, numbers,
|
||||||
|
|
@ -88,9 +92,7 @@ pub fn is_valid_bucket_name(n: &str, puny: bool) -> bool {
|
||||||
// Bucket names must not be formatted as an IP address
|
// Bucket names must not be formatted as an IP address
|
||||||
&& n.parse::<std::net::IpAddr>().is_err()
|
&& n.parse::<std::net::IpAddr>().is_err()
|
||||||
// Bucket names must not start with "xn--"
|
// Bucket names must not start with "xn--"
|
||||||
&& (!n.starts_with("xn--") || puny)
|
&& !n.starts_with("xn--")
|
||||||
// We are a bit stricter, to properly restrict punycode in all labels
|
|
||||||
&& (!n.contains(".xn--") || puny)
|
|
||||||
// Bucket names must not end with "-s3alias"
|
// Bucket names must not end with "-s3alias"
|
||||||
&& !n.ends_with("-s3alias")
|
&& !n.ends_with("-s3alias")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -116,17 +116,21 @@ impl Garage {
|
||||||
info!("Opening database...");
|
info!("Opening database...");
|
||||||
let db_engine = db::Engine::from_str(&config.db_engine)
|
let db_engine = db::Engine::from_str(&config.db_engine)
|
||||||
.ok_or_message("Invalid `db_engine` value in configuration file")?;
|
.ok_or_message("Invalid `db_engine` value in configuration file")?;
|
||||||
let db_path = db_engine.db_path(&config.metadata_dir);
|
let mut db_path = config.metadata_dir.clone();
|
||||||
|
match db_engine {
|
||||||
|
db::Engine::Sqlite => {
|
||||||
|
db_path.push("db.sqlite");
|
||||||
|
}
|
||||||
|
db::Engine::Lmdb => {
|
||||||
|
db_path.push("db.lmdb");
|
||||||
|
}
|
||||||
|
}
|
||||||
let db_opt = db::OpenOpt {
|
let db_opt = db::OpenOpt {
|
||||||
fsync: config.metadata_fsync,
|
fsync: config.metadata_fsync,
|
||||||
lmdb_map_size: match config.lmdb_map_size {
|
lmdb_map_size: match config.lmdb_map_size {
|
||||||
v if v == usize::default() => None,
|
v if v == usize::default() => None,
|
||||||
v => Some(v),
|
v => Some(v),
|
||||||
},
|
},
|
||||||
fjall_block_cache_size: match config.fjall_block_cache_size {
|
|
||||||
v if v == usize::default() => None,
|
|
||||||
v => Some(v),
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
let db = db::open_db(&db_path, db_engine, &db_opt)
|
let db = db::open_db(&db_path, db_engine, &db_opt)
|
||||||
.ok_or_message("Unable to open metadata db")?;
|
.ok_or_message("Unable to open metadata db")?;
|
||||||
|
|
@ -171,7 +175,13 @@ impl Garage {
|
||||||
|
|
||||||
// ---- admin tables ----
|
// ---- admin tables ----
|
||||||
info!("Initialize bucket_table...");
|
info!("Initialize bucket_table...");
|
||||||
let bucket_table = Table::new(BucketTable, control_rep_param.clone(), system.clone(), &db);
|
let bucket_table = Table::new(
|
||||||
|
BucketTable,
|
||||||
|
control_rep_param.clone(),
|
||||||
|
system.clone(),
|
||||||
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
|
);
|
||||||
|
|
||||||
info!("Initialize bucket_alias_table...");
|
info!("Initialize bucket_alias_table...");
|
||||||
let bucket_alias_table = Table::new(
|
let bucket_alias_table = Table::new(
|
||||||
|
|
@ -179,9 +189,16 @@ impl Garage {
|
||||||
control_rep_param.clone(),
|
control_rep_param.clone(),
|
||||||
system.clone(),
|
system.clone(),
|
||||||
&db,
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
info!("Initialize key_table_table...");
|
info!("Initialize key_table_table...");
|
||||||
let key_table = Table::new(KeyTable, control_rep_param, system.clone(), &db);
|
let key_table = Table::new(
|
||||||
|
KeyTable,
|
||||||
|
control_rep_param,
|
||||||
|
system.clone(),
|
||||||
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
|
);
|
||||||
|
|
||||||
// ---- S3 tables ----
|
// ---- S3 tables ----
|
||||||
info!("Initialize block_ref_table...");
|
info!("Initialize block_ref_table...");
|
||||||
|
|
@ -192,6 +209,7 @@ impl Garage {
|
||||||
meta_rep_param.clone(),
|
meta_rep_param.clone(),
|
||||||
system.clone(),
|
system.clone(),
|
||||||
&db,
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Initialize version_table...");
|
info!("Initialize version_table...");
|
||||||
|
|
@ -202,10 +220,12 @@ impl Garage {
|
||||||
meta_rep_param.clone(),
|
meta_rep_param.clone(),
|
||||||
system.clone(),
|
system.clone(),
|
||||||
&db,
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Initialize multipart upload counter table...");
|
info!("Initialize multipart upload counter table...");
|
||||||
let mpu_counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), &db);
|
let mpu_counter_table =
|
||||||
|
IndexCounter::new(system.clone(), meta_rep_param.clone(), &db, &config);
|
||||||
|
|
||||||
info!("Initialize multipart upload table...");
|
info!("Initialize multipart upload table...");
|
||||||
let mpu_table = Table::new(
|
let mpu_table = Table::new(
|
||||||
|
|
@ -216,10 +236,12 @@ impl Garage {
|
||||||
meta_rep_param.clone(),
|
meta_rep_param.clone(),
|
||||||
system.clone(),
|
system.clone(),
|
||||||
&db,
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Initialize object counter table...");
|
info!("Initialize object counter table...");
|
||||||
let object_counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), &db);
|
let object_counter_table =
|
||||||
|
IndexCounter::new(system.clone(), meta_rep_param.clone(), &db, &config);
|
||||||
|
|
||||||
info!("Initialize object_table...");
|
info!("Initialize object_table...");
|
||||||
#[allow(clippy::redundant_clone)]
|
#[allow(clippy::redundant_clone)]
|
||||||
|
|
@ -232,6 +254,7 @@ impl Garage {
|
||||||
meta_rep_param.clone(),
|
meta_rep_param.clone(),
|
||||||
system.clone(),
|
system.clone(),
|
||||||
&db,
|
&db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Load lifecycle worker state...");
|
info!("Load lifecycle worker state...");
|
||||||
|
|
@ -241,7 +264,7 @@ impl Garage {
|
||||||
|
|
||||||
// ---- K2V ----
|
// ---- K2V ----
|
||||||
#[cfg(feature = "k2v")]
|
#[cfg(feature = "k2v")]
|
||||||
let k2v = GarageK2V::new(system.clone(), &db, meta_rep_param);
|
let k2v = GarageK2V::new(system.clone(), &db, meta_rep_param, &config);
|
||||||
|
|
||||||
// ---- setup block refcount recalculation ----
|
// ---- setup block refcount recalculation ----
|
||||||
// this function can be used to fix inconsistencies in the RC table
|
// this function can be used to fix inconsistencies in the RC table
|
||||||
|
|
@ -315,15 +338,15 @@ impl Garage {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn bucket_helper(&self) -> helper::bucket::BucketHelper<'_> {
|
pub fn bucket_helper(&self) -> helper::bucket::BucketHelper {
|
||||||
helper::bucket::BucketHelper(self)
|
helper::bucket::BucketHelper(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn key_helper(&self) -> helper::key::KeyHelper<'_> {
|
pub fn key_helper(&self) -> helper::key::KeyHelper {
|
||||||
helper::key::KeyHelper(self)
|
helper::key::KeyHelper(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn locked_helper(&self) -> helper::locked::LockedHelper<'_> {
|
pub async fn locked_helper(&self) -> helper::locked::LockedHelper {
|
||||||
let lock = self.bucket_lock.lock().await;
|
let lock = self.bucket_lock.lock().await;
|
||||||
helper::locked::LockedHelper(self, Some(lock))
|
helper::locked::LockedHelper(self, Some(lock))
|
||||||
}
|
}
|
||||||
|
|
@ -331,9 +354,14 @@ impl Garage {
|
||||||
|
|
||||||
#[cfg(feature = "k2v")]
|
#[cfg(feature = "k2v")]
|
||||||
impl GarageK2V {
|
impl GarageK2V {
|
||||||
fn new(system: Arc<System>, db: &db::Db, meta_rep_param: TableShardedReplication) -> Self {
|
fn new(
|
||||||
|
system: Arc<System>,
|
||||||
|
db: &db::Db,
|
||||||
|
meta_rep_param: TableShardedReplication,
|
||||||
|
config: &Config,
|
||||||
|
) -> Self {
|
||||||
info!("Initialize K2V counter table...");
|
info!("Initialize K2V counter table...");
|
||||||
let counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), db);
|
let counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), db, config);
|
||||||
|
|
||||||
info!("Initialize K2V subscription manager...");
|
info!("Initialize K2V subscription manager...");
|
||||||
let subscriptions = Arc::new(SubscriptionManager::new());
|
let subscriptions = Arc::new(SubscriptionManager::new());
|
||||||
|
|
@ -347,6 +375,7 @@ impl GarageK2V {
|
||||||
meta_rep_param,
|
meta_rep_param,
|
||||||
system.clone(),
|
system.clone(),
|
||||||
db,
|
db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Initialize K2V RPC handler...");
|
info!("Initialize K2V RPC handler...");
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,24 @@
|
||||||
|
use err_derive::Error;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use garage_util::error::Error as GarageError;
|
use garage_util::error::Error as GarageError;
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize, Deserialize)]
|
#[derive(Debug, Error, Serialize, Deserialize)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("Internal error: {0}")]
|
#[error(display = "Internal error: {}", _0)]
|
||||||
Internal(#[from] GarageError),
|
Internal(#[error(source)] GarageError),
|
||||||
|
|
||||||
#[error("Bad request: {0}")]
|
#[error(display = "Bad request: {}", _0)]
|
||||||
BadRequest(String),
|
BadRequest(String),
|
||||||
|
|
||||||
/// Bucket name is not valid according to AWS S3 specs
|
/// Bucket name is not valid according to AWS S3 specs
|
||||||
#[error("Invalid bucket name: {0}")]
|
#[error(display = "Invalid bucket name: {}", _0)]
|
||||||
InvalidBucketName(String),
|
InvalidBucketName(String),
|
||||||
|
|
||||||
#[error("Access key not found: {0}")]
|
#[error(display = "Access key not found: {}", _0)]
|
||||||
NoSuchAccessKey(String),
|
NoSuchAccessKey(String),
|
||||||
|
|
||||||
#[error("Bucket not found: {0}")]
|
#[error(display = "Bucket not found: {}", _0)]
|
||||||
NoSuchBucket(String),
|
NoSuchBucket(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,3 @@
|
||||||
use std::collections::{HashMap, HashSet};
|
|
||||||
|
|
||||||
use garage_db as db;
|
|
||||||
|
|
||||||
use garage_util::crdt::*;
|
use garage_util::crdt::*;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::{Error as GarageError, OkOrMessage};
|
use garage_util::error::{Error as GarageError, OkOrMessage};
|
||||||
|
|
@ -51,10 +47,6 @@ impl<'a> LockedHelper<'a> {
|
||||||
KeyHelper(self.0)
|
KeyHelper(self.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================
|
|
||||||
// global bucket aliases
|
|
||||||
// ================================================
|
|
||||||
|
|
||||||
/// Sets a new alias for a bucket in global namespace.
|
/// Sets a new alias for a bucket in global namespace.
|
||||||
/// This function fails if:
|
/// This function fails if:
|
||||||
/// - alias name is not valid according to S3 spec
|
/// - alias name is not valid according to S3 spec
|
||||||
|
|
@ -65,7 +57,7 @@ impl<'a> LockedHelper<'a> {
|
||||||
bucket_id: Uuid,
|
bucket_id: Uuid,
|
||||||
alias_name: &String,
|
alias_name: &String,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
if !is_valid_bucket_name(alias_name, self.0.config.allow_punycode) {
|
if !is_valid_bucket_name(alias_name) {
|
||||||
return Err(Error::InvalidBucketName(alias_name.to_string()));
|
return Err(Error::InvalidBucketName(alias_name.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -96,7 +88,8 @@ impl<'a> LockedHelper<'a> {
|
||||||
// writes are now done and all writes use timestamp alias_ts
|
// writes are now done and all writes use timestamp alias_ts
|
||||||
|
|
||||||
let alias = match alias {
|
let alias = match alias {
|
||||||
None => BucketAlias::new(alias_name.clone(), alias_ts, Some(bucket_id)),
|
None => BucketAlias::new(alias_name.clone(), alias_ts, Some(bucket_id))
|
||||||
|
.ok_or_else(|| Error::InvalidBucketName(alias_name.clone()))?,
|
||||||
Some(mut a) => {
|
Some(mut a) => {
|
||||||
a.state = Lww::raw(alias_ts, Some(bucket_id));
|
a.state = Lww::raw(alias_ts, Some(bucket_id));
|
||||||
a
|
a
|
||||||
|
|
@ -187,14 +180,13 @@ impl<'a> LockedHelper<'a> {
|
||||||
.ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?;
|
.ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?;
|
||||||
|
|
||||||
// Checks ok, remove alias
|
// Checks ok, remove alias
|
||||||
let alias_ts = increment_logical_clock_2(
|
let alias_ts = match bucket.state.as_option() {
|
||||||
alias.state.timestamp(),
|
Some(bucket_state) => increment_logical_clock_2(
|
||||||
bucket
|
alias.state.timestamp(),
|
||||||
.state
|
bucket_state.aliases.get_timestamp(alias_name),
|
||||||
.as_option()
|
),
|
||||||
.map(|p| p.aliases.get_timestamp(alias_name))
|
None => increment_logical_clock(alias.state.timestamp()),
|
||||||
.unwrap_or(0),
|
};
|
||||||
);
|
|
||||||
|
|
||||||
// ---- timestamp-ensured causality barrier ----
|
// ---- timestamp-ensured causality barrier ----
|
||||||
// writes are now done and all writes use timestamp alias_ts
|
// writes are now done and all writes use timestamp alias_ts
|
||||||
|
|
@ -212,10 +204,6 @@ impl<'a> LockedHelper<'a> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================
|
|
||||||
// local bucket aliases
|
|
||||||
// ================================================
|
|
||||||
|
|
||||||
/// Sets a new alias for a bucket in the local namespace of a key.
|
/// Sets a new alias for a bucket in the local namespace of a key.
|
||||||
/// This function fails if:
|
/// This function fails if:
|
||||||
/// - alias name is not valid according to S3 spec
|
/// - alias name is not valid according to S3 spec
|
||||||
|
|
@ -228,12 +216,14 @@ impl<'a> LockedHelper<'a> {
|
||||||
key_id: &String,
|
key_id: &String,
|
||||||
alias_name: &String,
|
alias_name: &String,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
if !is_valid_bucket_name(alias_name, self.0.config.allow_punycode) {
|
let key_helper = KeyHelper(self.0);
|
||||||
|
|
||||||
|
if !is_valid_bucket_name(alias_name) {
|
||||||
return Err(Error::InvalidBucketName(alias_name.to_string()));
|
return Err(Error::InvalidBucketName(alias_name.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
|
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
|
||||||
let mut key = self.key().get_existing_key(key_id).await?;
|
let mut key = key_helper.get_existing_key(key_id).await?;
|
||||||
|
|
||||||
let key_param = key.state.as_option_mut().unwrap();
|
let key_param = key.state.as_option_mut().unwrap();
|
||||||
|
|
||||||
|
|
@ -282,13 +272,23 @@ impl<'a> LockedHelper<'a> {
|
||||||
key_id: &String,
|
key_id: &String,
|
||||||
alias_name: &String,
|
alias_name: &String,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
|
let key_helper = KeyHelper(self.0);
|
||||||
let mut key = self.key().get_existing_key(key_id).await?;
|
|
||||||
|
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
|
||||||
|
let mut key = key_helper.get_existing_key(key_id).await?;
|
||||||
|
|
||||||
let key_p = key.state.as_option().unwrap();
|
|
||||||
let bucket_p = bucket.state.as_option_mut().unwrap();
|
let bucket_p = bucket.state.as_option_mut().unwrap();
|
||||||
|
|
||||||
if key_p.local_aliases.get(alias_name).cloned().flatten() != Some(bucket_id) {
|
if key
|
||||||
|
.state
|
||||||
|
.as_option()
|
||||||
|
.unwrap()
|
||||||
|
.local_aliases
|
||||||
|
.get(alias_name)
|
||||||
|
.cloned()
|
||||||
|
.flatten()
|
||||||
|
!= Some(bucket_id)
|
||||||
|
{
|
||||||
return Err(GarageError::Message(format!(
|
return Err(GarageError::Message(format!(
|
||||||
"Bucket {:?} does not have alias {} in namespace of key {}",
|
"Bucket {:?} does not have alias {} in namespace of key {}",
|
||||||
bucket_id, alias_name, key_id
|
bucket_id, alias_name, key_id
|
||||||
|
|
@ -305,17 +305,17 @@ impl<'a> LockedHelper<'a> {
|
||||||
.local_aliases
|
.local_aliases
|
||||||
.items()
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.any(|((k, n), _, active)| (*k != key.key_id || n != alias_name) && *active);
|
.any(|((k, n), _, active)| *k == key.key_id && n == alias_name && *active);
|
||||||
|
|
||||||
if !has_other_global_aliases && !has_other_local_aliases {
|
if !has_other_global_aliases && !has_other_local_aliases {
|
||||||
return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name)));
|
return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks ok, remove alias
|
// Checks ok, remove alias
|
||||||
|
let key_param = key.state.as_option_mut().unwrap();
|
||||||
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
|
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
|
||||||
|
|
||||||
let alias_ts = increment_logical_clock_2(
|
let alias_ts = increment_logical_clock_2(
|
||||||
key_p.local_aliases.get_timestamp(alias_name),
|
key_param.local_aliases.get_timestamp(alias_name),
|
||||||
bucket_p
|
bucket_p
|
||||||
.local_aliases
|
.local_aliases
|
||||||
.get_timestamp(&bucket_p_local_alias_key),
|
.get_timestamp(&bucket_p_local_alias_key),
|
||||||
|
|
@ -324,8 +324,7 @@ impl<'a> LockedHelper<'a> {
|
||||||
// ---- timestamp-ensured causality barrier ----
|
// ---- timestamp-ensured causality barrier ----
|
||||||
// writes are now done and all writes use timestamp alias_ts
|
// writes are now done and all writes use timestamp alias_ts
|
||||||
|
|
||||||
key.state.as_option_mut().unwrap().local_aliases =
|
key_param.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None);
|
||||||
LwwMap::raw_item(alias_name.clone(), alias_ts, None);
|
|
||||||
self.0.key_table.insert(&key).await?;
|
self.0.key_table.insert(&key).await?;
|
||||||
|
|
||||||
bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
|
bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
|
||||||
|
|
@ -334,68 +333,21 @@ impl<'a> LockedHelper<'a> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensures a bucket does not have a certain local alias.
|
|
||||||
/// Contrarily to unset_local_bucket_alias, this does not
|
|
||||||
/// fail on any condition other than:
|
|
||||||
/// - bucket cannot be found (its fine if it is in deleted state)
|
|
||||||
/// - key cannot be found (its fine if alias in key points to nothing
|
|
||||||
/// or to another bucket)
|
|
||||||
pub async fn purge_local_bucket_alias(
|
|
||||||
&self,
|
|
||||||
bucket_id: Uuid,
|
|
||||||
key_id: &String,
|
|
||||||
alias_name: &String,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
|
|
||||||
let mut key = self.key().get_internal_key(key_id).await?;
|
|
||||||
|
|
||||||
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
|
|
||||||
|
|
||||||
let alias_ts = increment_logical_clock_2(
|
|
||||||
key.state
|
|
||||||
.as_option()
|
|
||||||
.map(|p| p.local_aliases.get_timestamp(alias_name))
|
|
||||||
.unwrap_or(0),
|
|
||||||
bucket
|
|
||||||
.state
|
|
||||||
.as_option()
|
|
||||||
.map(|p| p.local_aliases.get_timestamp(&bucket_p_local_alias_key))
|
|
||||||
.unwrap_or(0),
|
|
||||||
);
|
|
||||||
|
|
||||||
// ---- timestamp-ensured causality barrier ----
|
|
||||||
// writes are now done and all writes use timestamp alias_ts
|
|
||||||
|
|
||||||
if let Some(kp) = key.state.as_option_mut() {
|
|
||||||
kp.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None);
|
|
||||||
self.0.key_table.insert(&key).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(bp) = bucket.state.as_option_mut() {
|
|
||||||
bp.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
|
|
||||||
self.0.bucket_table.insert(&bucket).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// ================================================
|
|
||||||
// permissions
|
|
||||||
// ================================================
|
|
||||||
|
|
||||||
/// Sets permissions for a key on a bucket.
|
/// Sets permissions for a key on a bucket.
|
||||||
/// This function fails if:
|
/// This function fails if:
|
||||||
/// - bucket or key cannot be found at all (its ok if they are in deleted state)
|
/// - bucket or key cannot be found at all (its ok if they are in deleted state)
|
||||||
/// - bucket or key is in deleted state and we are trying to set
|
/// - bucket or key is in deleted state and we are trying to set permissions other than "deny
|
||||||
/// permissions other than "deny all"
|
/// all"
|
||||||
pub async fn set_bucket_key_permissions(
|
pub async fn set_bucket_key_permissions(
|
||||||
&self,
|
&self,
|
||||||
bucket_id: Uuid,
|
bucket_id: Uuid,
|
||||||
key_id: &String,
|
key_id: &String,
|
||||||
mut perm: BucketKeyPerm,
|
mut perm: BucketKeyPerm,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
let key_helper = KeyHelper(self.0);
|
||||||
|
|
||||||
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
|
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
|
||||||
let mut key = self.key().get_internal_key(key_id).await?;
|
let mut key = key_helper.get_internal_key(key_id).await?;
|
||||||
|
|
||||||
if let Some(bstate) = bucket.state.as_option() {
|
if let Some(bstate) = bucket.state.as_option() {
|
||||||
if let Some(kp) = bstate.authorized_keys.get(key_id) {
|
if let Some(kp) = bstate.authorized_keys.get(key_id) {
|
||||||
|
|
@ -432,20 +384,21 @@ impl<'a> LockedHelper<'a> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================
|
// ----
|
||||||
// keys
|
|
||||||
// ================================================
|
|
||||||
|
|
||||||
/// Deletes an API access key
|
/// Deletes an API access key
|
||||||
pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> {
|
pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> {
|
||||||
let state = key.state.as_option_mut().unwrap();
|
let state = key.state.as_option_mut().unwrap();
|
||||||
|
|
||||||
// --- done checking, now commit ---
|
// --- done checking, now commit ---
|
||||||
|
// (the step at unset_local_bucket_alias will fail if a bucket
|
||||||
|
// does not have another alias, the deletion will be
|
||||||
|
// interrupted in the middle if that happens)
|
||||||
|
|
||||||
// 1. Delete local aliases
|
// 1. Delete local aliases
|
||||||
for (alias, _, to) in state.local_aliases.items().iter() {
|
for (alias, _, to) in state.local_aliases.items().iter() {
|
||||||
if let Some(bucket_id) = to {
|
if let Some(bucket_id) = to {
|
||||||
self.purge_local_bucket_alias(*bucket_id, &key.key_id, alias)
|
self.unset_local_bucket_alias(*bucket_id, &key.key_id, alias)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -462,193 +415,4 @@ impl<'a> LockedHelper<'a> {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================
|
|
||||||
// repair procedure
|
|
||||||
// ================================================
|
|
||||||
|
|
||||||
pub async fn repair_aliases(&self) -> Result<(), GarageError> {
|
|
||||||
self.0.db.transaction(|tx| {
|
|
||||||
info!("--- begin repair_aliases transaction ----");
|
|
||||||
|
|
||||||
// 1. List all non-deleted buckets, so that we can fix bad aliases
|
|
||||||
let mut all_buckets: HashSet<Uuid> = HashSet::new();
|
|
||||||
|
|
||||||
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
|
|
||||||
let bucket = self
|
|
||||||
.0
|
|
||||||
.bucket_table
|
|
||||||
.data
|
|
||||||
.decode_entry(&(item?.1))
|
|
||||||
.map_err(db::TxError::Abort)?;
|
|
||||||
if !bucket.is_deleted() {
|
|
||||||
all_buckets.insert(bucket.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("number of buckets: {}", all_buckets.len());
|
|
||||||
|
|
||||||
// 2. List all aliases declared in bucket_alias_table and key_table
|
|
||||||
// Take note of aliases that point to non-existing buckets
|
|
||||||
let mut global_aliases: HashMap<String, Uuid> = HashMap::new();
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut delete_global = vec![];
|
|
||||||
for item in tx.range::<&[u8], _>(&self.0.bucket_alias_table.data.store, ..)? {
|
|
||||||
let mut alias = self
|
|
||||||
.0
|
|
||||||
.bucket_alias_table
|
|
||||||
.data
|
|
||||||
.decode_entry(&(item?.1))
|
|
||||||
.map_err(db::TxError::Abort)?;
|
|
||||||
if let Some(id) = alias.state.get() {
|
|
||||||
if all_buckets.contains(id) {
|
|
||||||
// keep aliases
|
|
||||||
global_aliases.insert(alias.name().to_string(), *id);
|
|
||||||
} else {
|
|
||||||
// delete alias
|
|
||||||
warn!(
|
|
||||||
"global alias: remove {} -> {:?} (bucket is deleted)",
|
|
||||||
alias.name(),
|
|
||||||
id
|
|
||||||
);
|
|
||||||
alias.state.update(None);
|
|
||||||
delete_global.push(alias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("number of global aliases: {}", global_aliases.len());
|
|
||||||
|
|
||||||
info!("global alias table: {} entries fixed", delete_global.len());
|
|
||||||
for ga in delete_global {
|
|
||||||
debug!("Enqueue update to global alias table: {:?}", ga);
|
|
||||||
self.0.bucket_alias_table.queue_insert(tx, &ga)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut local_aliases: HashMap<(String, String), Uuid> = HashMap::new();
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut delete_local = vec![];
|
|
||||||
|
|
||||||
for item in tx.range::<&[u8], _>(&self.0.key_table.data.store, ..)? {
|
|
||||||
let mut key = self
|
|
||||||
.0
|
|
||||||
.key_table
|
|
||||||
.data
|
|
||||||
.decode_entry(&(item?.1))
|
|
||||||
.map_err(db::TxError::Abort)?;
|
|
||||||
let Some(p) = key.state.as_option_mut() else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let mut has_changes = false;
|
|
||||||
for (name, _, to) in p.local_aliases.items().to_vec() {
|
|
||||||
if let Some(id) = to {
|
|
||||||
if all_buckets.contains(&id) {
|
|
||||||
local_aliases.insert((key.key_id.clone(), name), id);
|
|
||||||
} else {
|
|
||||||
warn!(
|
|
||||||
"local alias: remove ({}, {}) -> {:?} (bucket is deleted)",
|
|
||||||
key.key_id, name, id
|
|
||||||
);
|
|
||||||
p.local_aliases.update_in_place(name, None);
|
|
||||||
has_changes = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if has_changes {
|
|
||||||
delete_local.push(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("number of local aliases: {}", local_aliases.len());
|
|
||||||
|
|
||||||
info!("key table: {} entries fixed", delete_local.len());
|
|
||||||
for la in delete_local {
|
|
||||||
debug!("Enqueue update to key table: {:?}", la);
|
|
||||||
self.0.key_table.queue_insert(tx, &la)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Reverse the alias maps to determine the aliases per-bucket
|
|
||||||
let mut bucket_global: HashMap<Uuid, Vec<String>> = HashMap::new();
|
|
||||||
let mut bucket_local: HashMap<Uuid, Vec<(String, String)>> = HashMap::new();
|
|
||||||
|
|
||||||
for (name, bucket) in global_aliases {
|
|
||||||
bucket_global.entry(bucket).or_default().push(name);
|
|
||||||
}
|
|
||||||
for ((key, name), bucket) in local_aliases {
|
|
||||||
bucket_local.entry(bucket).or_default().push((key, name));
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Fix the bucket table to ensure consistency
|
|
||||||
let mut bucket_updates = vec![];
|
|
||||||
|
|
||||||
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
|
|
||||||
let bucket = self
|
|
||||||
.0
|
|
||||||
.bucket_table
|
|
||||||
.data
|
|
||||||
.decode_entry(&(item?.1))
|
|
||||||
.map_err(db::TxError::Abort)?;
|
|
||||||
let mut bucket2 = bucket.clone();
|
|
||||||
let Some(param) = bucket2.state.as_option_mut() else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
// fix global aliases
|
|
||||||
{
|
|
||||||
let ga = bucket_global.remove(&bucket.id).unwrap_or_default();
|
|
||||||
for (name, _, active) in param.aliases.items().to_vec() {
|
|
||||||
if active && !ga.contains(&name) {
|
|
||||||
warn!("bucket {:?}: remove global alias {}", bucket.id, name);
|
|
||||||
param.aliases.update_in_place(name, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for name in ga {
|
|
||||||
if param.aliases.get(&name).copied() != Some(true) {
|
|
||||||
warn!("bucket {:?}: add global alias {}", bucket.id, name);
|
|
||||||
param.aliases.update_in_place(name, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// fix local aliases
|
|
||||||
{
|
|
||||||
let la = bucket_local.remove(&bucket.id).unwrap_or_default();
|
|
||||||
for (pair, _, active) in param.local_aliases.items().to_vec() {
|
|
||||||
if active && !la.contains(&pair) {
|
|
||||||
warn!("bucket {:?}: remove local alias {:?}", bucket.id, pair);
|
|
||||||
param.local_aliases.update_in_place(pair, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for pair in la {
|
|
||||||
if param.local_aliases.get(&pair).copied() != Some(true) {
|
|
||||||
warn!("bucket {:?}: add local alias {:?}", bucket.id, pair);
|
|
||||||
param.local_aliases.update_in_place(pair, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if bucket2 != bucket {
|
|
||||||
bucket_updates.push(bucket2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("bucket table: {} entries fixed", bucket_updates.len());
|
|
||||||
for b in bucket_updates {
|
|
||||||
debug!("Enqueue update to bucket table: {:?}", b);
|
|
||||||
self.0.bucket_table.queue_insert(tx, &b)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("--- end repair_aliases transaction ----");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
info!("repair_aliases is done");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ use garage_db as db;
|
||||||
use garage_rpc::layout::LayoutHelper;
|
use garage_rpc::layout::LayoutHelper;
|
||||||
use garage_rpc::system::System;
|
use garage_rpc::system::System;
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
|
use garage_util::config::Config;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
use garage_util::migrate::Migrate;
|
use garage_util::migrate::Migrate;
|
||||||
|
|
@ -173,6 +174,7 @@ impl<T: CountedItem> IndexCounter<T> {
|
||||||
system: Arc<System>,
|
system: Arc<System>,
|
||||||
replication: TableShardedReplication,
|
replication: TableShardedReplication,
|
||||||
db: &db::Db,
|
db: &db::Db,
|
||||||
|
config: &Config,
|
||||||
) -> Arc<Self> {
|
) -> Arc<Self> {
|
||||||
Arc::new(Self {
|
Arc::new(Self {
|
||||||
this_node: system.id,
|
this_node: system.id,
|
||||||
|
|
@ -186,6 +188,7 @@ impl<T: CountedItem> IndexCounter<T> {
|
||||||
replication,
|
replication,
|
||||||
system,
|
system,
|
||||||
db,
|
db,
|
||||||
|
&config.experimental.merkle_backpressure,
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -121,13 +121,13 @@ impl Worker for LifecycleWorker {
|
||||||
mpu_aborted,
|
mpu_aborted,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
let n_objects = self.garage.object_table.data.store.approximate_len().ok();
|
let n_objects = self.garage.object_table.data.store.len().ok();
|
||||||
let progress = match n_objects {
|
let progress = match n_objects {
|
||||||
Some(total) if total > 0 => format!(
|
None => "...".to_string(),
|
||||||
|
Some(total) => format!(
|
||||||
"~{:.2}%",
|
"~{:.2}%",
|
||||||
100. * std::cmp::min(*counter, total) as f32 / total as f32
|
100. * std::cmp::min(*counter, total) as f32 / total as f32
|
||||||
),
|
),
|
||||||
_ => "...".to_string(),
|
|
||||||
};
|
};
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
progress: Some(progress),
|
progress: Some(progress),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_net"
|
name = "garage_net"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -30,7 +30,7 @@ rand.workspace = true
|
||||||
|
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
arc-swap.workspace = true
|
arc-swap.workspace = true
|
||||||
thiserror.workspace = true
|
err-derive.workspace = true
|
||||||
bytes.workspace = true
|
bytes.workspace = true
|
||||||
cfg-if.workspace = true
|
cfg-if.workspace = true
|
||||||
|
|
||||||
|
|
@ -39,6 +39,7 @@ kuska-handshake.workspace = true
|
||||||
|
|
||||||
opentelemetry = { workspace = true, optional = true }
|
opentelemetry = { workspace = true, optional = true }
|
||||||
opentelemetry-contrib = { workspace = true, optional = true }
|
opentelemetry-contrib = { workspace = true, optional = true }
|
||||||
|
tracing.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
pretty_env_logger.workspace = true
|
pretty_env_logger.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ use std::pin::Pin;
|
||||||
use std::sync::atomic::{self, AtomicU32};
|
use std::sync::atomic::{self, AtomicU32};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::task::Poll;
|
use std::task::Poll;
|
||||||
|
use tracing::*;
|
||||||
|
|
||||||
use arc_swap::ArcSwapOption;
|
use arc_swap::ArcSwapOption;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
|
@ -14,7 +15,7 @@ use futures::Stream;
|
||||||
use kuska_handshake::async_std::{handshake_client, BoxStream};
|
use kuska_handshake::async_std::{handshake_client, BoxStream};
|
||||||
use tokio::net::TcpStream;
|
use tokio::net::TcpStream;
|
||||||
use tokio::select;
|
use tokio::select;
|
||||||
use tokio::sync::{mpsc, oneshot, watch};
|
use tokio::sync::{mpsc, oneshot, watch, Semaphore};
|
||||||
use tokio_util::compat::*;
|
use tokio_util::compat::*;
|
||||||
|
|
||||||
#[cfg(feature = "telemetry")]
|
#[cfg(feature = "telemetry")]
|
||||||
|
|
@ -25,6 +26,7 @@ use opentelemetry::{
|
||||||
#[cfg(feature = "telemetry")]
|
#[cfg(feature = "telemetry")]
|
||||||
use opentelemetry_contrib::trace::propagator::binary::*;
|
use opentelemetry_contrib::trace::propagator::binary::*;
|
||||||
|
|
||||||
|
use crate::endpoint::RpcInFlightLimiter;
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
use crate::message::*;
|
use crate::message::*;
|
||||||
use crate::netapp::*;
|
use crate::netapp::*;
|
||||||
|
|
@ -41,6 +43,7 @@ pub(crate) struct ClientConn {
|
||||||
|
|
||||||
next_query_number: AtomicU32,
|
next_query_number: AtomicU32,
|
||||||
inflight: Mutex<HashMap<RequestID, oneshot::Sender<ByteStream>>>,
|
inflight: Mutex<HashMap<RequestID, oneshot::Sender<ByteStream>>>,
|
||||||
|
rpc_table_write_inflight_limiter: Option<Semaphore>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ClientConn {
|
impl ClientConn {
|
||||||
|
|
@ -98,8 +101,14 @@ impl ClientConn {
|
||||||
next_query_number: AtomicU32::from(RequestID::default()),
|
next_query_number: AtomicU32::from(RequestID::default()),
|
||||||
query_send: ArcSwapOption::new(Some(Arc::new(query_send))),
|
query_send: ArcSwapOption::new(Some(Arc::new(query_send))),
|
||||||
inflight: Mutex::new(HashMap::new()),
|
inflight: Mutex::new(HashMap::new()),
|
||||||
|
rpc_table_write_inflight_limiter: netapp.max_in_flight_table_write.map(Semaphore::new),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Created conn with table write limit set to {}",
|
||||||
|
netapp.max_in_flight_table_write.unwrap_or(0)
|
||||||
|
);
|
||||||
|
|
||||||
netapp.connected_as_client(peer_id, conn.clone());
|
netapp.connected_as_client(peer_id, conn.clone());
|
||||||
|
|
||||||
let debug_name = format!("CLI {}", hex::encode(&peer_id[..8]));
|
let debug_name = format!("CLI {}", hex::encode(&peer_id[..8]));
|
||||||
|
|
@ -144,10 +153,21 @@ impl ClientConn {
|
||||||
req: Req<T>,
|
req: Req<T>,
|
||||||
path: &str,
|
path: &str,
|
||||||
prio: RequestPriority,
|
prio: RequestPriority,
|
||||||
|
limiter: RpcInFlightLimiter,
|
||||||
) -> Result<Resp<T>, Error>
|
) -> Result<Resp<T>, Error>
|
||||||
where
|
where
|
||||||
T: Message,
|
T: Message,
|
||||||
{
|
{
|
||||||
|
let _permit = match (limiter, &self.rpc_table_write_inflight_limiter) {
|
||||||
|
(RpcInFlightLimiter::TableWrite, Some(sem)) => {
|
||||||
|
info!(
|
||||||
|
"Available RPC table write slots: {}",
|
||||||
|
sem.available_permits()
|
||||||
|
);
|
||||||
|
Some(sem.acquire().await.unwrap())
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
let query_send = self.query_send.load_full().ok_or(Error::ConnectionClosed)?;
|
let query_send = self.query_send.load_full().ok_or(Error::ConnectionClosed)?;
|
||||||
|
|
||||||
let id = self
|
let id = self
|
||||||
|
|
@ -212,6 +232,7 @@ impl ClientConn {
|
||||||
let stream = Box::pin(canceller.for_stream(stream));
|
let stream = Box::pin(canceller.for_stream(stream));
|
||||||
|
|
||||||
let resp_enc = RespEnc::decode(stream).await?;
|
let resp_enc = RespEnc::decode(stream).await?;
|
||||||
|
drop(_permit);
|
||||||
debug!("client: got response to request {} (path {})", id, path);
|
debug!("client: got response to request {} (path {})", id, path);
|
||||||
Resp::from_enc(resp_enc)
|
Resp::from_enc(resp_enc)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,13 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, Default)]
|
||||||
|
pub enum RpcInFlightLimiter {
|
||||||
|
#[default]
|
||||||
|
NoLimit,
|
||||||
|
TableWrite,
|
||||||
|
}
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
|
|
||||||
/// This struct represents an endpoint for message of type `M`.
|
/// This struct represents an endpoint for message of type `M`.
|
||||||
|
|
@ -114,6 +121,7 @@ where
|
||||||
target: &NodeID,
|
target: &NodeID,
|
||||||
req: T,
|
req: T,
|
||||||
prio: RequestPriority,
|
prio: RequestPriority,
|
||||||
|
limiter: RpcInFlightLimiter,
|
||||||
) -> Result<Resp<M>, Error>
|
) -> Result<Resp<M>, Error>
|
||||||
where
|
where
|
||||||
T: IntoReq<M>,
|
T: IntoReq<M>,
|
||||||
|
|
@ -136,7 +144,10 @@ where
|
||||||
"Not connected: {}",
|
"Not connected: {}",
|
||||||
hex::encode(&target[..8])
|
hex::encode(&target[..8])
|
||||||
))),
|
))),
|
||||||
Some(c) => c.call(req.into_req()?, self.path.as_str(), prio).await,
|
Some(c) => {
|
||||||
|
c.call(req.into_req()?, self.path.as_str(), prio, limiter)
|
||||||
|
.await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -149,8 +160,12 @@ where
|
||||||
target: &NodeID,
|
target: &NodeID,
|
||||||
req: M,
|
req: M,
|
||||||
prio: RequestPriority,
|
prio: RequestPriority,
|
||||||
|
limiter: RpcInFlightLimiter,
|
||||||
) -> Result<<M as Message>::Response, Error> {
|
) -> Result<<M as Message>::Response, Error> {
|
||||||
Ok(self.call_streaming(target, req, prio).await?.into_msg())
|
Ok(self
|
||||||
|
.call_streaming(target, req, prio, limiter)
|
||||||
|
.await?
|
||||||
|
.into_msg())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -159,7 +174,7 @@ where
|
||||||
pub(crate) type DynEndpoint = Box<dyn GenericEndpoint + Send + Sync>;
|
pub(crate) type DynEndpoint = Box<dyn GenericEndpoint + Send + Sync>;
|
||||||
|
|
||||||
pub(crate) trait GenericEndpoint {
|
pub(crate) trait GenericEndpoint {
|
||||||
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<'_, Result<RespEnc, Error>>;
|
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<Result<RespEnc, Error>>;
|
||||||
fn drop_handler(&self);
|
fn drop_handler(&self);
|
||||||
fn clone_endpoint(&self) -> DynEndpoint;
|
fn clone_endpoint(&self) -> DynEndpoint;
|
||||||
}
|
}
|
||||||
|
|
@ -175,7 +190,7 @@ where
|
||||||
M: Message,
|
M: Message,
|
||||||
H: StreamingEndpointHandler<M> + 'static,
|
H: StreamingEndpointHandler<M> + 'static,
|
||||||
{
|
{
|
||||||
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<'_, Result<RespEnc, Error>> {
|
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<Result<RespEnc, Error>> {
|
||||||
async move {
|
async move {
|
||||||
match self.0.handler.load_full() {
|
match self.0.handler.load_full() {
|
||||||
None => Err(Error::NoHandler),
|
None => Err(Error::NoHandler),
|
||||||
|
|
|
||||||
|
|
@ -1,49 +1,49 @@
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use err_derive::Error;
|
||||||
use log::error;
|
use log::error;
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("IO error: {0}")]
|
#[error(display = "IO error: {}", _0)]
|
||||||
Io(#[from] io::Error),
|
Io(#[error(source)] io::Error),
|
||||||
|
|
||||||
#[error("Messagepack encode error: {0}")]
|
#[error(display = "Messagepack encode error: {}", _0)]
|
||||||
RMPEncode(#[from] rmp_serde::encode::Error),
|
RMPEncode(#[error(source)] rmp_serde::encode::Error),
|
||||||
#[error("Messagepack decode error: {0}")]
|
#[error(display = "Messagepack decode error: {}", _0)]
|
||||||
RMPDecode(#[from] rmp_serde::decode::Error),
|
RMPDecode(#[error(source)] rmp_serde::decode::Error),
|
||||||
|
|
||||||
#[error("Tokio join error: {0}")]
|
#[error(display = "Tokio join error: {}", _0)]
|
||||||
TokioJoin(#[from] tokio::task::JoinError),
|
TokioJoin(#[error(source)] tokio::task::JoinError),
|
||||||
|
|
||||||
#[error("oneshot receive error: {0}")]
|
#[error(display = "oneshot receive error: {}", _0)]
|
||||||
OneshotRecv(#[from] tokio::sync::oneshot::error::RecvError),
|
OneshotRecv(#[error(source)] tokio::sync::oneshot::error::RecvError),
|
||||||
|
|
||||||
#[error("Handshake error: {0}")]
|
#[error(display = "Handshake error: {}", _0)]
|
||||||
Handshake(#[from] kuska_handshake::async_std::Error),
|
Handshake(#[error(source)] kuska_handshake::async_std::Error),
|
||||||
|
|
||||||
#[error("UTF8 error: {0}")]
|
#[error(display = "UTF8 error: {}", _0)]
|
||||||
UTF8(#[from] std::string::FromUtf8Error),
|
UTF8(#[error(source)] std::string::FromUtf8Error),
|
||||||
|
|
||||||
#[error("Framing protocol error")]
|
#[error(display = "Framing protocol error")]
|
||||||
Framing,
|
Framing,
|
||||||
|
|
||||||
#[error("Remote error ({0:?}): {1}")]
|
#[error(display = "Remote error ({:?}): {}", _0, _1)]
|
||||||
Remote(io::ErrorKind, String),
|
Remote(io::ErrorKind, String),
|
||||||
|
|
||||||
#[error("Request ID collision")]
|
#[error(display = "Request ID collision")]
|
||||||
IdCollision,
|
IdCollision,
|
||||||
|
|
||||||
#[error("{0}")]
|
#[error(display = "{}", _0)]
|
||||||
Message(String),
|
Message(String),
|
||||||
|
|
||||||
#[error("No handler / shutting down")]
|
#[error(display = "No handler / shutting down")]
|
||||||
NoHandler,
|
NoHandler,
|
||||||
|
|
||||||
#[error("Connection closed")]
|
#[error(display = "Connection closed")]
|
||||||
ConnectionClosed,
|
ConnectionClosed,
|
||||||
|
|
||||||
#[error("Version mismatch: {0}")]
|
#[error(display = "Version mismatch: {}", _0)]
|
||||||
VersionMismatch(String),
|
VersionMismatch(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,8 @@ pub struct NetApp {
|
||||||
pub id: NodeID,
|
pub id: NodeID,
|
||||||
/// Private key associated with our peer ID
|
/// Private key associated with our peer ID
|
||||||
pub privkey: ed25519::SecretKey,
|
pub privkey: ed25519::SecretKey,
|
||||||
|
/// Config related to netapp
|
||||||
|
pub(crate) max_in_flight_table_write: Option<usize>,
|
||||||
|
|
||||||
pub(crate) server_conns: RwLock<HashMap<NodeID, Arc<ServerConn>>>,
|
pub(crate) server_conns: RwLock<HashMap<NodeID, Arc<ServerConn>>>,
|
||||||
pub(crate) client_conns: RwLock<HashMap<NodeID, Arc<ClientConn>>>,
|
pub(crate) client_conns: RwLock<HashMap<NodeID, Arc<ClientConn>>>,
|
||||||
|
|
@ -101,6 +103,7 @@ impl NetApp {
|
||||||
netid: auth::Key,
|
netid: auth::Key,
|
||||||
privkey: ed25519::SecretKey,
|
privkey: ed25519::SecretKey,
|
||||||
bind_outgoing_to: Option<IpAddr>,
|
bind_outgoing_to: Option<IpAddr>,
|
||||||
|
max_in_flight_table_write: Option<usize>,
|
||||||
) -> Arc<Self> {
|
) -> Arc<Self> {
|
||||||
let mut version_tag = [0u8; 16];
|
let mut version_tag = [0u8; 16];
|
||||||
version_tag[0..8].copy_from_slice(&u64::to_be_bytes(NETAPP_VERSION_TAG)[..]);
|
version_tag[0..8].copy_from_slice(&u64::to_be_bytes(NETAPP_VERSION_TAG)[..]);
|
||||||
|
|
@ -114,6 +117,7 @@ impl NetApp {
|
||||||
netid,
|
netid,
|
||||||
id,
|
id,
|
||||||
privkey,
|
privkey,
|
||||||
|
max_in_flight_table_write,
|
||||||
server_conns: RwLock::new(HashMap::new()),
|
server_conns: RwLock::new(HashMap::new()),
|
||||||
client_conns: RwLock::new(HashMap::new()),
|
client_conns: RwLock::new(HashMap::new()),
|
||||||
endpoints: RwLock::new(HashMap::new()),
|
endpoints: RwLock::new(HashMap::new()),
|
||||||
|
|
@ -427,6 +431,7 @@ impl NetApp {
|
||||||
server_port,
|
server_port,
|
||||||
},
|
},
|
||||||
PRIO_NORMAL,
|
PRIO_NORMAL,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map(|_| ())
|
.map(|_| ())
|
||||||
|
|
|
||||||
|
|
@ -406,7 +406,7 @@ impl PeeringManager {
|
||||||
ping_time
|
ping_time
|
||||||
);
|
);
|
||||||
let ping_response = select! {
|
let ping_response = select! {
|
||||||
r = self.ping_endpoint.call(&id, ping_msg, PRIO_HIGH) => r,
|
r = self.ping_endpoint.call(&id, ping_msg, PRIO_HIGH, RpcInFlightLimiter::NoLimit) => r,
|
||||||
_ = tokio::time::sleep(ping_timeout) => Err(Error::Message("Ping timeout".into())),
|
_ = tokio::time::sleep(ping_timeout) => Err(Error::Message("Ping timeout".into())),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -458,7 +458,12 @@ impl PeeringManager {
|
||||||
let pex_message = PeerListMessage { list: peer_list };
|
let pex_message = PeerListMessage { list: peer_list };
|
||||||
match self
|
match self
|
||||||
.peer_list_endpoint
|
.peer_list_endpoint
|
||||||
.call(id, pex_message, PRIO_BACKGROUND)
|
.call(
|
||||||
|
id,
|
||||||
|
pex_message,
|
||||||
|
PRIO_BACKGROUND,
|
||||||
|
RpcInFlightLimiter::NoLimit,
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Err(e) => warn!("Error doing peer exchange: {}", e),
|
Err(e) => warn!("Error doing peer exchange: {}", e),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_rpc"
|
name = "garage_rpc"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
@ -33,7 +33,7 @@ async-trait.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_bytes.workspace = true
|
serde_bytes.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
thiserror = { workspace = true, optional = true }
|
err-derive = { workspace = true, optional = true }
|
||||||
|
|
||||||
# newer version requires rust edition 2021
|
# newer version requires rust edition 2021
|
||||||
kube = { workspace = true, optional = true }
|
kube = { workspace = true, optional = true }
|
||||||
|
|
@ -49,5 +49,5 @@ opentelemetry.workspace = true
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
kubernetes-discovery = [ "kube", "k8s-openapi", "schemars" ]
|
kubernetes-discovery = [ "kube", "k8s-openapi", "schemars" ]
|
||||||
consul-discovery = [ "reqwest", "thiserror" ]
|
consul-discovery = [ "reqwest", "err-derive" ]
|
||||||
system-libs = [ "sodiumoxide/use-pkg-config" ]
|
system-libs = [ "sodiumoxide/use-pkg-config" ]
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ use std::fs::File;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use std::net::{IpAddr, SocketAddr};
|
use std::net::{IpAddr, SocketAddr};
|
||||||
|
|
||||||
|
use err_derive::Error;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use garage_net::NodeID;
|
use garage_net::NodeID;
|
||||||
|
|
||||||
|
|
@ -219,12 +219,12 @@ impl ConsulDiscovery {
|
||||||
/// Regroup all Consul discovery errors
|
/// Regroup all Consul discovery errors
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum ConsulError {
|
pub enum ConsulError {
|
||||||
#[error("IO error: {0}")]
|
#[error(display = "IO error: {}", _0)]
|
||||||
Io(#[from] std::io::Error),
|
Io(#[error(source)] std::io::Error),
|
||||||
#[error("HTTP error: {0}")]
|
#[error(display = "HTTP error: {}", _0)]
|
||||||
Reqwest(#[from] reqwest::Error),
|
Reqwest(#[error(source)] reqwest::Error),
|
||||||
#[error("Invalid Consul TLS configuration")]
|
#[error(display = "Invalid Consul TLS configuration")]
|
||||||
InvalidTLSConfig,
|
InvalidTLSConfig,
|
||||||
#[error("Token error: {0}")]
|
#[error(display = "Token error: {}", _0)]
|
||||||
Token(#[from] reqwest::header::InvalidHeaderValue),
|
Token(#[error(source)] reqwest::header::InvalidHeaderValue),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -229,11 +229,13 @@ impl LayoutManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Save cluster layout data to disk
|
/// Save cluster layout data to disk
|
||||||
async fn save_cluster_layout(&self) {
|
async fn save_cluster_layout(&self) -> Result<(), Error> {
|
||||||
let layout = self.layout.read().unwrap().inner().clone();
|
let layout = self.layout.read().unwrap().inner().clone();
|
||||||
if let Err(e) = self.persist_cluster_layout.save_async(&layout).await {
|
self.persist_cluster_layout
|
||||||
error!("Failed to save cluster_layout: {}", e);
|
.save_async(&layout)
|
||||||
}
|
.await
|
||||||
|
.expect("Cannot save current cluster layout");
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn broadcast_update(self: &Arc<Self>, rpc: SystemRpc) {
|
fn broadcast_update(self: &Arc<Self>, rpc: SystemRpc) {
|
||||||
|
|
@ -311,7 +313,7 @@ impl LayoutManager {
|
||||||
|
|
||||||
self.change_notify.notify_waiters();
|
self.change_notify.notify_waiters();
|
||||||
self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout));
|
self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout));
|
||||||
self.save_cluster_layout().await;
|
self.save_cluster_layout().await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
|
|
@ -326,7 +328,7 @@ impl LayoutManager {
|
||||||
if let Some(new_trackers) = self.merge_layout_trackers(trackers) {
|
if let Some(new_trackers) = self.merge_layout_trackers(trackers) {
|
||||||
self.change_notify.notify_waiters();
|
self.change_notify.notify_waiters();
|
||||||
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers));
|
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers));
|
||||||
self.save_cluster_layout().await;
|
self.save_cluster_layout().await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
|
|
|
||||||
|
|
@ -507,7 +507,7 @@ impl LayoutVersion {
|
||||||
g.compute_maximal_flow()?;
|
g.compute_maximal_flow()?;
|
||||||
if g.get_flow_value()? < (NB_PARTITIONS * self.replication_factor) as i64 {
|
if g.get_flow_value()? < (NB_PARTITIONS * self.replication_factor) as i64 {
|
||||||
return Err(Error::Message(
|
return Err(Error::Message(
|
||||||
"The storage capacity of the cluster is too small. It is \
|
"The storage capacity of he cluster is to small. It is \
|
||||||
impossible to store partitions of size 1."
|
impossible to store partitions of size 1."
|
||||||
.into(),
|
.into(),
|
||||||
));
|
));
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ use std::time::Duration;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use futures::stream::futures_unordered::FuturesUnordered;
|
use futures::stream::futures_unordered::FuturesUnordered;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
|
use garage_net::endpoint::RpcInFlightLimiter;
|
||||||
use tokio::select;
|
use tokio::select;
|
||||||
|
|
||||||
use opentelemetry::KeyValue;
|
use opentelemetry::KeyValue;
|
||||||
|
|
@ -44,6 +45,8 @@ pub struct RequestStrategy<T> {
|
||||||
rs_timeout: Timeout,
|
rs_timeout: Timeout,
|
||||||
/// Data to drop when everything completes
|
/// Data to drop when everything completes
|
||||||
rs_drop_on_complete: T,
|
rs_drop_on_complete: T,
|
||||||
|
/// RPC In Flight Limiter
|
||||||
|
rs_inflight_limiter: RpcInFlightLimiter,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
|
@ -61,6 +64,7 @@ impl Clone for RequestStrategy<()> {
|
||||||
rs_priority: self.rs_priority,
|
rs_priority: self.rs_priority,
|
||||||
rs_timeout: self.rs_timeout,
|
rs_timeout: self.rs_timeout,
|
||||||
rs_drop_on_complete: (),
|
rs_drop_on_complete: (),
|
||||||
|
rs_inflight_limiter: self.rs_inflight_limiter,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -74,6 +78,7 @@ impl RequestStrategy<()> {
|
||||||
rs_priority: prio,
|
rs_priority: prio,
|
||||||
rs_timeout: Timeout::Default,
|
rs_timeout: Timeout::Default,
|
||||||
rs_drop_on_complete: (),
|
rs_drop_on_complete: (),
|
||||||
|
rs_inflight_limiter: RpcInFlightLimiter::NoLimit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Add an item to be dropped on completion
|
/// Add an item to be dropped on completion
|
||||||
|
|
@ -84,6 +89,7 @@ impl RequestStrategy<()> {
|
||||||
rs_priority: self.rs_priority,
|
rs_priority: self.rs_priority,
|
||||||
rs_timeout: self.rs_timeout,
|
rs_timeout: self.rs_timeout,
|
||||||
rs_drop_on_complete: drop_on_complete,
|
rs_drop_on_complete: drop_on_complete,
|
||||||
|
rs_inflight_limiter: RpcInFlightLimiter::NoLimit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -109,6 +115,10 @@ impl<T> RequestStrategy<T> {
|
||||||
self.rs_timeout = Timeout::Custom(timeout);
|
self.rs_timeout = Timeout::Custom(timeout);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
pub fn with_write_limiter(mut self) -> Self {
|
||||||
|
self.rs_inflight_limiter = RpcInFlightLimiter::TableWrite;
|
||||||
|
self
|
||||||
|
}
|
||||||
/// Extract drop_on_complete item
|
/// Extract drop_on_complete item
|
||||||
fn extract_drop_on_complete(self) -> (RequestStrategy<()>, T) {
|
fn extract_drop_on_complete(self) -> (RequestStrategy<()>, T) {
|
||||||
(
|
(
|
||||||
|
|
@ -118,6 +128,7 @@ impl<T> RequestStrategy<T> {
|
||||||
rs_priority: self.rs_priority,
|
rs_priority: self.rs_priority,
|
||||||
rs_timeout: self.rs_timeout,
|
rs_timeout: self.rs_timeout,
|
||||||
rs_drop_on_complete: (),
|
rs_drop_on_complete: (),
|
||||||
|
rs_inflight_limiter: self.rs_inflight_limiter,
|
||||||
},
|
},
|
||||||
self.rs_drop_on_complete,
|
self.rs_drop_on_complete,
|
||||||
)
|
)
|
||||||
|
|
@ -185,7 +196,7 @@ impl RpcHelper {
|
||||||
|
|
||||||
let node_id = to.into();
|
let node_id = to.into();
|
||||||
let rpc_call = endpoint
|
let rpc_call = endpoint
|
||||||
.call_streaming(&node_id, msg, strat.rs_priority)
|
.call_streaming(&node_id, msg, strat.rs_priority, strat.rs_inflight_limiter)
|
||||||
.with_context(Context::current_with_span(span))
|
.with_context(Context::current_with_span(span))
|
||||||
.record_duration(&self.0.metrics.rpc_duration, &metric_tags);
|
.record_duration(&self.0.metrics.rpc_duration, &metric_tags);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ use garage_net::{NetApp, NetworkKey, NodeID, NodeKey};
|
||||||
|
|
||||||
#[cfg(feature = "kubernetes-discovery")]
|
#[cfg(feature = "kubernetes-discovery")]
|
||||||
use garage_util::config::KubernetesDiscoveryConfig;
|
use garage_util::config::KubernetesDiscoveryConfig;
|
||||||
use garage_util::config::{Config, DataDirEnum};
|
use garage_util::config::{Config, DataDirEnum, RpcInFlightLimiterEnum};
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
use garage_util::persister::Persister;
|
use garage_util::persister::Persister;
|
||||||
|
|
@ -256,7 +256,17 @@ impl System {
|
||||||
let bind_outgoing_to = Some(config)
|
let bind_outgoing_to = Some(config)
|
||||||
.filter(|x| x.rpc_bind_outgoing)
|
.filter(|x| x.rpc_bind_outgoing)
|
||||||
.map(|x| x.rpc_bind_addr.ip());
|
.map(|x| x.rpc_bind_addr.ip());
|
||||||
let netapp = NetApp::new(GARAGE_VERSION_TAG, network_key, node_key, bind_outgoing_to);
|
let maybe_max_table_write = match &config.experimental.rpc_in_flight_limiters {
|
||||||
|
RpcInFlightLimiterEnum::None => None,
|
||||||
|
RpcInFlightLimiterEnum::FixedSize(v) => Some(v.max_table_write),
|
||||||
|
};
|
||||||
|
let netapp = NetApp::new(
|
||||||
|
GARAGE_VERSION_TAG,
|
||||||
|
network_key,
|
||||||
|
node_key,
|
||||||
|
bind_outgoing_to,
|
||||||
|
maybe_max_table_write,
|
||||||
|
);
|
||||||
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
|
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
|
||||||
|
|
||||||
// ---- setup netapp public listener and full mesh peering strategy ----
|
// ---- setup netapp public listener and full mesh peering strategy ----
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_table"
|
name = "garage_table"
|
||||||
version = "1.3.1"
|
version = "1.1.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,12 @@ use std::convert::TryInto;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use serde_bytes::ByteBuf;
|
use serde_bytes::ByteBuf;
|
||||||
use tokio::sync::Notify;
|
use tokio::sync::SemaphorePermit;
|
||||||
|
use tokio::sync::{Notify, Semaphore};
|
||||||
|
|
||||||
use garage_db as db;
|
use garage_db as db;
|
||||||
|
|
||||||
|
use garage_util::config::MerkleBackpressureEnum;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
use garage_util::migrate::Migrate;
|
use garage_util::migrate::Migrate;
|
||||||
|
|
@ -20,6 +22,67 @@ use crate::replication::*;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
use crate::util::*;
|
use crate::util::*;
|
||||||
|
|
||||||
|
pub(crate) struct MerkleTodo {
|
||||||
|
merkle_todo: db::Tree,
|
||||||
|
merkle_todo_notify: Notify,
|
||||||
|
merkle_todo_bounded_queue: Option<Arc<Semaphore>>,
|
||||||
|
}
|
||||||
|
impl Clone for MerkleTodo {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
merkle_todo: self.merkle_todo.clone(),
|
||||||
|
merkle_todo_notify: Notify::new(),
|
||||||
|
merkle_todo_bounded_queue: self.merkle_todo_bounded_queue.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl MerkleTodo {
|
||||||
|
fn new<F: TableSchema>(db: &db::Db, config: &MerkleBackpressureEnum) -> Self {
|
||||||
|
let merkle_todo = db
|
||||||
|
.open_tree(format!("{}:merkle_todo", F::TABLE_NAME))
|
||||||
|
.expect("Unable to open DB Merkle TODO tree");
|
||||||
|
|
||||||
|
let merkle_todo_bounded_queue = match config {
|
||||||
|
MerkleBackpressureEnum::None => None,
|
||||||
|
MerkleBackpressureEnum::FixedQueue(p) => {
|
||||||
|
Some(Arc::new(Semaphore::new(p.max_queue_size)))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
merkle_todo,
|
||||||
|
merkle_todo_notify: Notify::new(),
|
||||||
|
merkle_todo_bounded_queue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn len(&self) -> Result<usize, db::Error> {
|
||||||
|
self.merkle_todo.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn with_db<F: FnOnce(&db::Tree, SemaphorePermit)>(&self, f: F) {
|
||||||
|
let bounded = self
|
||||||
|
.merkle_todo_bounded_queue
|
||||||
|
.clone()
|
||||||
|
.unwrap_or(Arc::new(Semaphore::new(1)));
|
||||||
|
let permit = bounded.acquire().await.unwrap();
|
||||||
|
f(&self.merkle_todo, permit);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn appended(&self, permit: SemaphorePermit) {
|
||||||
|
permit.forget();
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn processed(&self) {
|
||||||
|
let bounded = self
|
||||||
|
.merkle_todo_bounded_queue
|
||||||
|
.clone()
|
||||||
|
.unwrap_or(Arc::new(Semaphore::new(1)));
|
||||||
|
bounded.add_permits(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct TableData<F: TableSchema, R: TableReplication> {
|
pub struct TableData<F: TableSchema, R: TableReplication> {
|
||||||
system: Arc<System>,
|
system: Arc<System>,
|
||||||
|
|
||||||
|
|
@ -29,8 +92,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
|
||||||
pub store: db::Tree,
|
pub store: db::Tree,
|
||||||
|
|
||||||
pub(crate) merkle_tree: db::Tree,
|
pub(crate) merkle_tree: db::Tree,
|
||||||
pub(crate) merkle_todo: db::Tree,
|
pub(crate) merkle_todo: MerkleTodo,
|
||||||
pub(crate) merkle_todo_notify: Notify,
|
|
||||||
|
|
||||||
pub(crate) insert_queue: db::Tree,
|
pub(crate) insert_queue: db::Tree,
|
||||||
pub(crate) insert_queue_notify: Arc<Notify>,
|
pub(crate) insert_queue_notify: Arc<Notify>,
|
||||||
|
|
@ -38,10 +100,18 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
|
||||||
pub(crate) gc_todo: db::Tree,
|
pub(crate) gc_todo: db::Tree,
|
||||||
|
|
||||||
pub(crate) metrics: TableMetrics,
|
pub(crate) metrics: TableMetrics,
|
||||||
|
|
||||||
|
pub(crate) config: MerkleBackpressureEnum,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
pub fn new(system: Arc<System>, instance: F, replication: R, db: &db::Db) -> Arc<Self> {
|
pub fn new(
|
||||||
|
system: Arc<System>,
|
||||||
|
instance: F,
|
||||||
|
replication: R,
|
||||||
|
db: &db::Db,
|
||||||
|
config: &MerkleBackpressureEnum,
|
||||||
|
) -> Arc<Self> {
|
||||||
let store = db
|
let store = db
|
||||||
.open_tree(format!("{}:table", F::TABLE_NAME))
|
.open_tree(format!("{}:table", F::TABLE_NAME))
|
||||||
.expect("Unable to open DB tree");
|
.expect("Unable to open DB tree");
|
||||||
|
|
@ -49,9 +119,8 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
let merkle_tree = db
|
let merkle_tree = db
|
||||||
.open_tree(format!("{}:merkle_tree", F::TABLE_NAME))
|
.open_tree(format!("{}:merkle_tree", F::TABLE_NAME))
|
||||||
.expect("Unable to open DB Merkle tree tree");
|
.expect("Unable to open DB Merkle tree tree");
|
||||||
let merkle_todo = db
|
|
||||||
.open_tree(format!("{}:merkle_todo", F::TABLE_NAME))
|
let merkle_todo = MerkleTodo::new::<F>(db, config);
|
||||||
.expect("Unable to open DB Merkle TODO tree");
|
|
||||||
|
|
||||||
let insert_queue = db
|
let insert_queue = db
|
||||||
.open_tree(format!("{}:insert_queue", F::TABLE_NAME))
|
.open_tree(format!("{}:insert_queue", F::TABLE_NAME))
|
||||||
|
|
@ -76,11 +145,11 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
store,
|
store,
|
||||||
merkle_tree,
|
merkle_tree,
|
||||||
merkle_todo,
|
merkle_todo,
|
||||||
merkle_todo_notify: Notify::new(),
|
|
||||||
insert_queue,
|
insert_queue,
|
||||||
insert_queue_notify: Arc::new(Notify::new()),
|
insert_queue_notify: Arc::new(Notify::new()),
|
||||||
gc_todo,
|
gc_todo,
|
||||||
metrics,
|
metrics,
|
||||||
|
config: config.clone(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -167,6 +236,8 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
// - When an entry is modified or deleted, add it to the merkle updater's todo list.
|
// - When an entry is modified or deleted, add it to the merkle updater's todo list.
|
||||||
// This has to be done atomically with the modification for the merkle updater
|
// This has to be done atomically with the modification for the merkle updater
|
||||||
// to maintain consistency. The merkle updater must then be notified with todo_notify.
|
// to maintain consistency. The merkle updater must then be notified with todo_notify.
|
||||||
|
// Also to avoid overloading the merkle updater, you need to sleep a given amount of
|
||||||
|
// time to enable backpressure (ie. slow down clients).
|
||||||
// - When an entry is updated to be a tombstone, add it to the gc_todo tree
|
// - When an entry is updated to be a tombstone, add it to the gc_todo tree
|
||||||
|
|
||||||
pub(crate) fn update_many<T: Borrow<ByteBuf>>(&self, entries: &[T]) -> Result<(), Error> {
|
pub(crate) fn update_many<T: Borrow<ByteBuf>>(&self, entries: &[T]) -> Result<(), Error> {
|
||||||
|
|
@ -201,6 +272,7 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
) -> Result<Option<F::E>, Error> {
|
) -> Result<Option<F::E>, Error> {
|
||||||
let tree_key = self.tree_key(partition_key, sort_key);
|
let tree_key = self.tree_key(partition_key, sort_key);
|
||||||
|
|
||||||
|
// transaction begins
|
||||||
let changed = self.store.db().transaction(|tx| {
|
let changed = self.store.db().transaction(|tx| {
|
||||||
let (old_entry, old_bytes, new_entry) = match tx.get(&self.store, &tree_key)? {
|
let (old_entry, old_bytes, new_entry) = match tx.get(&self.store, &tree_key)? {
|
||||||
Some(old_bytes) => {
|
Some(old_bytes) => {
|
||||||
|
|
@ -238,31 +310,44 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
// transaction ends
|
||||||
|
|
||||||
if let Some((new_entry, new_bytes_hash)) = changed {
|
// early return if nothing changed
|
||||||
self.metrics.internal_update_counter.add(1);
|
let (new_entry, new_bytes_hash) = match changed {
|
||||||
|
Some((e, b)) => (e, b),
|
||||||
let is_tombstone = new_entry.is_tombstone();
|
None => {
|
||||||
self.merkle_todo_notify.notify_one();
|
let maybe_bound = self.merkle_todo_bounded_queue.clone();
|
||||||
if is_tombstone {
|
if let Some(b) = &maybe_bound {
|
||||||
// We are only responsible for GC'ing this item if we are the
|
b.add_permits(1);
|
||||||
// "leader" of the partition, i.e. the first node in the
|
|
||||||
// set of nodes that replicates this partition.
|
|
||||||
// This avoids GC loops and does not change the termination properties
|
|
||||||
// of the GC algorithm, as in all cases GC is suspended if
|
|
||||||
// any node of the partition is unavailable.
|
|
||||||
let pk_hash = Hash::try_from(&tree_key[..32]).unwrap();
|
|
||||||
// TODO: this probably breaks when the layout changes
|
|
||||||
let nodes = self.replication.storage_nodes(&pk_hash);
|
|
||||||
if nodes.first() == Some(&self.system.id) {
|
|
||||||
GcTodoEntry::new(tree_key, new_bytes_hash).save(&self.gc_todo)?;
|
|
||||||
}
|
}
|
||||||
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Some(new_entry))
|
// Handle GC in case of tombstone
|
||||||
} else {
|
let is_tombstone = new_entry.is_tombstone();
|
||||||
Ok(None)
|
if is_tombstone {
|
||||||
|
// We are only responsible for GC'ing this item if we are the
|
||||||
|
// "leader" of the partition, i.e. the first node in the
|
||||||
|
// set of nodes that replicates this partition.
|
||||||
|
// This avoids GC loops and does not change the termination properties
|
||||||
|
// of the GC algorithm, as in all cases GC is suspended if
|
||||||
|
// any node of the partition is unavailable.
|
||||||
|
let pk_hash = Hash::try_from(&tree_key[..32]).unwrap();
|
||||||
|
// TODO: this probably breaks when the layout changes
|
||||||
|
let nodes = self.replication.storage_nodes(&pk_hash);
|
||||||
|
if nodes.first() == Some(&self.system.id) {
|
||||||
|
GcTodoEntry::new(tree_key, new_bytes_hash).save(&self.gc_todo)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect metrics
|
||||||
|
self.metrics.internal_update_counter.add(1);
|
||||||
|
|
||||||
|
// Synchronize with the Merkle Worker
|
||||||
|
self.merkle_todo_notify.notify_one(); // Wake-up it
|
||||||
|
|
||||||
|
Ok(Some(new_entry))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_if_equal(self: &Arc<Self>, k: &[u8], v: &[u8]) -> Result<bool, Error> {
|
pub(crate) fn delete_if_equal(self: &Arc<Self>, k: &[u8], v: &[u8]) -> Result<bool, Error> {
|
||||||
|
|
@ -282,10 +367,16 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
_ => Ok(false),
|
_ => Ok(false),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if removed {
|
if !removed {
|
||||||
self.metrics.internal_delete_counter.add(1);
|
let maybe_bound = self.merkle_todo_bounded_queue.clone();
|
||||||
self.merkle_todo_notify.notify_one();
|
if let Some(b) = &maybe_bound {
|
||||||
|
b.add_permits(1);
|
||||||
|
}
|
||||||
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.metrics.internal_delete_counter.add(1);
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
Ok(removed)
|
Ok(removed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -310,11 +401,18 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
_ => Ok(false),
|
_ => Ok(false),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if removed {
|
if !removed {
|
||||||
self.metrics.internal_delete_counter.add(1);
|
let maybe_bound = self.merkle_todo_bounded_queue.clone();
|
||||||
self.merkle_todo_notify.notify_one();
|
if let Some(b) = &maybe_bound {
|
||||||
|
b.add_permits(1);
|
||||||
|
}
|
||||||
|
return Ok(false);
|
||||||
}
|
}
|
||||||
Ok(removed)
|
|
||||||
|
self.metrics.internal_delete_counter.add(1);
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- Insert queue functions ----
|
// ---- Insert queue functions ----
|
||||||
|
|
@ -367,7 +465,7 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn gc_todo_approximate_len(&self) -> Result<usize, Error> {
|
pub fn gc_todo_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.gc_todo.approximate_len()?)
|
Ok(self.gc_todo.len()?)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -262,7 +262,8 @@ impl<F: TableSchema, R: TableReplication> TableGc<F, R> {
|
||||||
// GC has been successful for all of these entries.
|
// GC has been successful for all of these entries.
|
||||||
// We now remove them all from our local table and from the GC todo list.
|
// We now remove them all from our local table and from the GC todo list.
|
||||||
for item in items {
|
for item in items {
|
||||||
self.data
|
let _is_removed = self
|
||||||
|
.data
|
||||||
.delete_if_equal_hash(&item.key[..], item.value_hash)
|
.delete_if_equal_hash(&item.key[..], item.value_hash)
|
||||||
.err_context("GC: local delete tombstones")?;
|
.err_context("GC: local delete tombstones")?;
|
||||||
item.remove_if_equal(&self.data.gc_todo)
|
item.remove_if_equal(&self.data.gc_todo)
|
||||||
|
|
@ -275,14 +276,21 @@ impl<F: TableSchema, R: TableReplication> TableGc<F, R> {
|
||||||
|
|
||||||
impl<F: TableSchema, R: TableReplication> EndpointHandler<GcRpc> for TableGc<F, R> {
|
impl<F: TableSchema, R: TableReplication> EndpointHandler<GcRpc> for TableGc<F, R> {
|
||||||
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> Result<GcRpc, Error> {
|
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> Result<GcRpc, Error> {
|
||||||
|
let maybe_bounded = self.data.merkle_todo_bounded_queue.clone();
|
||||||
match message {
|
match message {
|
||||||
GcRpc::Update(items) => {
|
GcRpc::Update(items) => {
|
||||||
|
if let Some(b) = maybe_bounded {
|
||||||
|
b.acquire_many(items.len() as u32).await.unwrap().forget();
|
||||||
|
}
|
||||||
self.data.update_many(items)?;
|
self.data.update_many(items)?;
|
||||||
Ok(GcRpc::Ok)
|
Ok(GcRpc::Ok)
|
||||||
}
|
}
|
||||||
GcRpc::DeleteIfEqualHash(items) => {
|
GcRpc::DeleteIfEqualHash(items) => {
|
||||||
|
if let Some(b) = maybe_bounded {
|
||||||
|
b.acquire_many(items.len() as u32).await.unwrap().forget();
|
||||||
|
}
|
||||||
for (key, vhash) in items.iter() {
|
for (key, vhash) in items.iter() {
|
||||||
self.data.delete_if_equal_hash(&key[..], *vhash)?;
|
let _is_removed = self.data.delete_if_equal_hash(&key[..], *vhash)?;
|
||||||
}
|
}
|
||||||
Ok(GcRpc::Ok)
|
Ok(GcRpc::Ok)
|
||||||
}
|
}
|
||||||
|
|
@ -313,7 +321,7 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
|
||||||
|
|
||||||
fn status(&self) -> WorkerStatus {
|
fn status(&self) -> WorkerStatus {
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
queue_length: Some(self.gc.data.gc_todo_approximate_len().unwrap_or(0) as u64),
|
queue_length: Some(self.gc.data.gc_todo_len().unwrap_or(0) as u64),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -329,7 +337,6 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn wait_for_work(&mut self) -> WorkerState {
|
async fn wait_for_work(&mut self) -> WorkerState {
|
||||||
tokio::time::sleep(self.wait_delay).await;
|
|
||||||
WorkerState::Busy
|
WorkerState::Busy
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ use tokio::sync::watch;
|
||||||
use garage_db as db;
|
use garage_db as db;
|
||||||
|
|
||||||
use garage_util::background::*;
|
use garage_util::background::*;
|
||||||
|
use garage_util::config::MerkleBackpressureEnum;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::encode::{nonversioned_decode, nonversioned_encode};
|
use garage_util::encode::{nonversioned_decode, nonversioned_encode};
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
|
@ -70,6 +71,15 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
|
||||||
pub(crate) fn new(data: Arc<TableData<F, R>>) -> Arc<Self> {
|
pub(crate) fn new(data: Arc<TableData<F, R>>) -> Arc<Self> {
|
||||||
let empty_node_hash = blake2sum(&nonversioned_encode(&MerkleNode::Empty).unwrap()[..]);
|
let empty_node_hash = blake2sum(&nonversioned_encode(&MerkleNode::Empty).unwrap()[..]);
|
||||||
|
|
||||||
|
// @FIXME: move in worker
|
||||||
|
match &data.config {
|
||||||
|
MerkleBackpressureEnum::None => info!("Merkle Backpressure is not activated"),
|
||||||
|
MerkleBackpressureEnum::FixedQueue(v) => info!(
|
||||||
|
"Merkle backpressure with a fixed queue size (qlen={}) is activated.",
|
||||||
|
v.max_queue_size
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
Arc::new(Self {
|
Arc::new(Self {
|
||||||
data,
|
data,
|
||||||
empty_node_hash,
|
empty_node_hash,
|
||||||
|
|
@ -125,6 +135,11 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
|
||||||
k
|
k
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let maybe_bound = self.data.merkle_todo_bounded_queue.clone();
|
||||||
|
if let Some(b) = &maybe_bound {
|
||||||
|
b.add_permits(1);
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -287,12 +302,12 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
|
||||||
MerkleNode::decode_opt(&ent)
|
MerkleNode::decode_opt(&ent)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merkle_tree_approximate_len(&self) -> Result<usize, Error> {
|
pub fn merkle_tree_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.data.merkle_tree.approximate_len()?)
|
Ok(self.data.merkle_tree.len()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn todo_approximate_len(&self) -> Result<usize, Error> {
|
pub fn todo_len(&self) -> Result<usize, Error> {
|
||||||
Ok(self.data.merkle_todo.approximate_len()?)
|
Ok(self.data.merkle_todo.len()?)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -306,7 +321,7 @@ impl<F: TableSchema, R: TableReplication> Worker for MerkleWorker<F, R> {
|
||||||
|
|
||||||
fn status(&self) -> WorkerStatus {
|
fn status(&self) -> WorkerStatus {
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
queue_length: Some(self.0.todo_approximate_len().unwrap_or(0) as u64),
|
queue_length: Some(self.0.todo_len().unwrap_or(0) as u64),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,16 @@
|
||||||
use opentelemetry::{global, metrics::*, KeyValue};
|
use opentelemetry::{global, metrics::*, KeyValue};
|
||||||
|
use std::convert::TryInto;
|
||||||
|
|
||||||
use garage_db as db;
|
use garage_db as db;
|
||||||
|
|
||||||
|
use crate::data::MerkleTodo;
|
||||||
|
|
||||||
/// TableMetrics reference all counter used for metrics
|
/// TableMetrics reference all counter used for metrics
|
||||||
pub struct TableMetrics {
|
pub struct TableMetrics {
|
||||||
pub(crate) _table_size: ValueObserver<u64>,
|
pub(crate) _table_size: ValueObserver<u64>,
|
||||||
pub(crate) _merkle_tree_size: ValueObserver<u64>,
|
pub(crate) _merkle_tree_size: ValueObserver<u64>,
|
||||||
pub(crate) _merkle_todo_len: ValueObserver<u64>,
|
pub(crate) _merkle_todo_len: ValueObserver<u64>,
|
||||||
|
pub(crate) _merkle_todo_bounded_queue_free: ValueObserver<u64>,
|
||||||
pub(crate) _gc_todo_len: ValueObserver<u64>,
|
pub(crate) _gc_todo_len: ValueObserver<u64>,
|
||||||
|
|
||||||
pub(crate) get_request_counter: BoundCounter<u64>,
|
pub(crate) get_request_counter: BoundCounter<u64>,
|
||||||
|
|
@ -25,7 +29,7 @@ impl TableMetrics {
|
||||||
table_name: &'static str,
|
table_name: &'static str,
|
||||||
store: db::Tree,
|
store: db::Tree,
|
||||||
merkle_tree: db::Tree,
|
merkle_tree: db::Tree,
|
||||||
merkle_todo: db::Tree,
|
merkle_todo: MerkleTodo,
|
||||||
gc_todo: db::Tree,
|
gc_todo: db::Tree,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let meter = global::meter(table_name);
|
let meter = global::meter(table_name);
|
||||||
|
|
@ -34,7 +38,7 @@ impl TableMetrics {
|
||||||
.u64_value_observer(
|
.u64_value_observer(
|
||||||
"table.size",
|
"table.size",
|
||||||
move |observer| {
|
move |observer| {
|
||||||
if let Ok(value) = store.approximate_len() {
|
if let Ok(value) = store.len() {
|
||||||
observer.observe(
|
observer.observe(
|
||||||
value as u64,
|
value as u64,
|
||||||
&[KeyValue::new("table_name", table_name)],
|
&[KeyValue::new("table_name", table_name)],
|
||||||
|
|
@ -48,7 +52,7 @@ impl TableMetrics {
|
||||||
.u64_value_observer(
|
.u64_value_observer(
|
||||||
"table.merkle_tree_size",
|
"table.merkle_tree_size",
|
||||||
move |observer| {
|
move |observer| {
|
||||||
if let Ok(value) = merkle_tree.approximate_len() {
|
if let Ok(value) = merkle_tree.len() {
|
||||||
observer.observe(
|
observer.observe(
|
||||||
value as u64,
|
value as u64,
|
||||||
&[KeyValue::new("table_name", table_name)],
|
&[KeyValue::new("table_name", table_name)],
|
||||||
|
|
@ -62,7 +66,7 @@ impl TableMetrics {
|
||||||
.u64_value_observer(
|
.u64_value_observer(
|
||||||
"table.merkle_updater_todo_queue_length",
|
"table.merkle_updater_todo_queue_length",
|
||||||
move |observer| {
|
move |observer| {
|
||||||
if let Ok(v) = merkle_todo.approximate_len() {
|
if let Ok(v) = merkle_todo.len() {
|
||||||
observer.observe(
|
observer.observe(
|
||||||
v as u64,
|
v as u64,
|
||||||
&[KeyValue::new("table_name", table_name)],
|
&[KeyValue::new("table_name", table_name)],
|
||||||
|
|
@ -72,11 +76,25 @@ impl TableMetrics {
|
||||||
)
|
)
|
||||||
.with_description("Merkle tree updater TODO queue length")
|
.with_description("Merkle tree updater TODO queue length")
|
||||||
.init(),
|
.init(),
|
||||||
|
_merkle_todo_bounded_queue_free: meter
|
||||||
|
.u64_value_observer(
|
||||||
|
"table.merkle_todo_bounded_queue_free",
|
||||||
|
move |observer| {
|
||||||
|
let maybe_bounded = merkle_todo_bounded_queue.clone();
|
||||||
|
let free: u64 = match &maybe_bounded {
|
||||||
|
Some(v) => v.available_permits().try_into().unwrap(),
|
||||||
|
None => 0,
|
||||||
|
};
|
||||||
|
observer.observe(free, &[KeyValue::new("table_name", table_name)])
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.with_description("Merkle TODO queue free slots")
|
||||||
|
.init(),
|
||||||
_gc_todo_len: meter
|
_gc_todo_len: meter
|
||||||
.u64_value_observer(
|
.u64_value_observer(
|
||||||
"table.gc_todo_queue_length",
|
"table.gc_todo_queue_length",
|
||||||
move |observer| {
|
move |observer| {
|
||||||
if let Ok(value) = gc_todo.approximate_len() {
|
if let Ok(value) = gc_todo.len() {
|
||||||
observer.observe(
|
observer.observe(
|
||||||
value as u64,
|
value as u64,
|
||||||
&[KeyValue::new("table_name", table_name)],
|
&[KeyValue::new("table_name", table_name)],
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ impl<F: TableSchema, R: TableReplication> Worker for InsertQueueWorker<F, R> {
|
||||||
|
|
||||||
fn status(&self) -> WorkerStatus {
|
fn status(&self) -> WorkerStatus {
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
queue_length: Some(self.0.data.insert_queue.approximate_len().unwrap_or(0) as u64),
|
queue_length: Some(self.0.data.insert_queue.len().unwrap_or(0) as u64),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue