Compare commits

..

9 commits

Author SHA1 Message Date
Alex Auvolat
b6b18427a5 use optimization level 3 and thin LTO for release builds (#1405)
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1405
Co-authored-by: Alex Auvolat <lx@deuxfleurs.fr>
Co-committed-by: Alex Auvolat <lx@deuxfleurs.fr>
2026-04-16 08:47:02 +00:00
Gauthier Zirnhelt
9987166b2b Fix the LifecycleWorker being uncooperative (#1396)
## Summary

This PR ensures that the `LifecycleWorker` yields at least once to the Tokio scheduler in between each batch of 100 objects.

## Problem being solved

I'm administrating a Garage cluster which has been experiencing timeouts on all endpoints while the lifecycle worker is running at midnight UTC : `Ping timeout` error messages and even requests eventually failing due to `Could not reach quorum ...`.

I have found that this happens while the lifecycle worker is working on a big bucket (containing millions of objects) with a lifecycle rule that applies to very few objects.
The `process_object()` function does not hit any `await`:
- `last_bucket` is always the same, so the `bucket_table` is not read asynchronously
- no transaction is made on the `object_table` because my lifecycle rule (almost) never applies to any object

The first commit in this PR adds an executable which reproduces the problem that I've been experiencing in a self-contained way : the lifecycle worker starves the Tokio scheduler so much that no other task is able to run (or very rarely).
To run it : `cargo run -p garage_model --bin lifecycle-starvation-test`.
This commit can be dropped post-review, as it's only useful to demonstrate the starvation.

The error messages completely stopped after adding the extra yield to the nodes of my cluster.
The duration of the lifecycle worker task does not appear to have changed at all from what I can see (looking at the timestamps produced either by the self-contained binary or by each of my nodes with the `Lifecycle worker finished` message).

## Note

An other potential fix would have been to force the `WorkerProcessor` to yield before re-enqueuing a busy task, but this would have affected all Garage workers even though it's only the `LifecycleWorker` being uncooperative.

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1396
Reviewed-by: Alex <lx@deuxfleurs.fr>
Co-authored-by: Gauthier Zirnhelt <gauthier.zirnhelt@insimo.fr>
Co-committed-by: Gauthier Zirnhelt <gauthier.zirnhelt@insimo.fr>
2026-04-15 09:56:24 +00:00
trinity-1686a
b72b090a09 fix silent write errors (#1358)
fix #1355

some write errors are not reported when calling write_all. That's notably the case of ENOSPC on small buffers (1MiB).
on ext4, the error is catched when calling flush(). This is hopefully the case on most local filesystems, though afaik this assumption doesn't hold for NFS

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1358
Co-authored-by: trinity-1686a <trinity@deuxfleurs.fr>
Co-committed-by: trinity-1686a <trinity@deuxfleurs.fr>
2026-02-21 07:21:24 +00:00
Armael
8551aefed4 Fix: correctly parse CORS website configuration with no rules (#1320)
When sending a website config with an empty list of CORS rules, garage currently incorrectly refuses it with error message "Invalid XML: missing field `CORSRule`".
This fix the issue by following the documentation of quick-xml related to serde field parameters for this specific scenario:  https://docs.rs/quick-xml/latest/quick_xml/de/#sequences-xsall-and-xssequence-xml-schema-types .

(I've based this PR on main-v1 because we want it for deuxfleurs' deployment.)

Co-authored-by: Armaël Guéneau <armael.gueneau@ens-lyon.org>
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1320
Co-authored-by: Armael <armael@noreply.localhost>
Co-committed-by: Armael <armael@noreply.localhost>
2026-02-07 13:11:20 +00:00
Alex Auvolat
47bf5d9fb0 bump version to v1.3.1 2026-01-24 13:01:27 +01:00
Alex Auvolat
5df37dae5e update cargo dependencies in main-v1 (#1299)
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1299
Co-authored-by: Alex Auvolat <lx@deuxfleurs.fr>
Co-committed-by: Alex Auvolat <lx@deuxfleurs.fr>
2026-01-24 11:59:01 +00:00
Alex
44af0bdab3 Merge pull request 'Backport #1283 and #1290 to main-v1' (#1297) from backports-v1 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1297
2026-01-24 11:34:28 +00:00
rmoff
a7d6620e18 Fix typo in error message 2026-01-24 12:21:45 +01:00
Joe Anderson
8eb12755e4 Allow bucket to be missing from presigned post params 2026-01-24 12:21:25 +01:00
333 changed files with 8874 additions and 41392 deletions

View file

@ -2,14 +2,13 @@ labels:
nix: "enabled" nix: "enabled"
when: when:
- event: event:
- tag - push
- pull_request - tag
- deployment - pull_request
- cron - deployment
- manual - cron
- event: push - manual
branch: main-*
steps: steps:
- name: check formatting - name: check formatting
@ -17,16 +16,6 @@ steps:
commands: commands:
- nix-build -j4 --attr flakePackages.fmt - nix-build -j4 --attr flakePackages.fmt
- name: check typos
image: nixpkgs/nix:nixos-24.05
commands:
- nix-shell --attr ci --run typos
- name: check lints with clippy
image: nixpkgs/nix:nixos-24.05
commands:
- nix-build -j4 --attr flakePackages.clippy
- name: build - name: build
image: nixpkgs/nix:nixos-24.05 image: nixpkgs/nix:nixos-24.05
commands: commands:

View file

@ -38,15 +38,7 @@ steps:
- matrix: - matrix:
ARCH: i386 ARCH: i386
- name: upgrade tests from v1.0.0 - name: upgrade tests
image: nixpkgs/nix:nixos-24.05
commands:
- nix-shell --attr ci --run "./script/test-upgrade.sh v1.0.0 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false)
when:
- matrix:
ARCH: amd64
- name: upgrade tests from v0.8.4
image: nixpkgs/nix:nixos-24.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false) - nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false)

View file

@ -1,231 +0,0 @@
# Contributing to Garage
## Policy on AI
To ensure the quality of the codebase and documentation, the use of AI,
including LLMs and coding agents, is strictly restricted in the following way:
- AI **must not** be used to write documentation
- **Do not** use AI to write bug reports, commit descriptions and pull request
messages
- **Do not** use AI agents to make contributions to Garage, all contributions
must be led by a human that know what they are doing at all times
- AI **may** be used for some tedious code generation tasks, limited to very
mechanical translations from API docs or boilerplate writing. The code
generated must be so simple as to make it clear that it cannot be covered by
copyright.
You are free to make use of AI privately to explore the codebase and solve
conceptual problems, but please restrain from copying the output from an LLM
anywhere in your code or on the issue tracker, or from letting an agent edit
the codebase directly.
## Asking questions
Read the documentation before asking questions.
Do not use the issue tracker to ask questions about Garage.
Questions asked on the issue tracker will be closed.
Ask questions on the Matrix channel `#garage:deuxfleurs.fr` so that any
community member can see your question and help you out.
If you need in-depth support from the Garage developers specifically, write to
`garagehq@deuxfleurs.fr`. Even if you do so, we do not commit to giving you an
answer.
## Reporting bugs
When writing a bug report, use this checklist:
- For bugs that can be reproduced:
- confirm that you are using the latest version of Garage and that the bug still exists in this version
- set the log level to debug using the `RUST_LOG=garage=debug` environment variable and reproduce the bug to get more verbose logs
- Check whether there is already an open issue in the bug tracker. If so, your bug report is still valuable but please add it as a comment to the existing issue instead of opening a new one.
- Collect as much information as possible:
- logs of the Garage daemon at the time the issue happened, including logs that show what was happening before the issue occurred
- the output of `garage status`
- the output of `garage stats -a`
- the output of `garage layout history`
- Write a detailed bug report, including:
- a description of your cluster (number of nodes, hardware, operating system, networking, etc)
- a detailed description of what you did that led to the issue, including any code or command line that invoked a Garage API
- what you were expecting
- what actually happened, and how that's different from what you expected
- the information collected previously
- if possible, simple steps to help the developers reproduce the issue locally
Bug reports that are imprecise or otherwise unactionable will be closed.
## Suggesting new features
Garage can be improved in many ways, but just suggesting a new feature does not mean we will implement it.
Feature requests that may lead to an actual implementation are feature requests that:
- are precise and actionable, i.e. include a precise description of the expected behavior and any necessary architectural details required for the implementation
- are motivated by actual need from a variety of users
Moreover, a certain number of features are defined as out-of-scope for Garage, including but not limited to:
- extensions to the S3 API that are not present on AWS
- features that require the implementation of a consensus algorithm
- more generally, features that are incompatible with the architecture of Garage and its goal of staying simple
Only feature requests in one of the following category may stay open in the issue tracker:
- features that the Garage team wants to work on
- features that are being actively worked on by an external contributor which is clearly identified
- features that are easy to implement and could be an easy task for a new contributor that wants to get to know the codebase
All other feature requests will be closed after a few months of inactivity, so as to keep the number of open issues to a manageable level.
Feature requests that are clearly out of scope will be closed directly.
## Improving the documentation
An easy way to contribute to Garage which also adds a lot of value is to
improve the documentation. Make sure to write in clear technical English, and
write unambiguously. Documentation contributions are very appreciated if they
are well-written.
## For developers
We welcome code contributions to Garage that adhere to our standards for quality:
- Changes should be reviewed from a functional perspective to ensure that they work well with the existing codebase and do not introduce bugs or subtle issues.
- You must have tested your contribution to make sure that it does what it says. The amount of testing required is proportional to the complexity of the change introduced.
- Any new feature must be properly documented following existing practices (see below).
- Unit tests should be included when relevant.
- Contributions should pass basic lints for syntactic quality (`cargo fmt`, `cargo clippy`, `typos`).
- Contributions should pass our CI test suite.
- No user-facing breaking changes may be introduced between major releases.
- No internal data model change may be introduced between major releases, to
ensure that Garage daemons with different minor/patch versions numbers can
work together in a cluster. For major releases, a proper migration path
should be implemented and tested thoroughly.
Please follow up on your work when changes are requested, to avoid stale PRs.
Do not take it personally if a Garage developer pushes directly to your branch
to modify your contribution, as this might be necessary to get it merged
faster.
### Properly documenting your contribution
#### Configuration options
New configuration options should be documented in
`doc/book/reference-manual/configuration.md`. The documentation for a
configuration option should be exhaustive. For instance, for choice options all
choices should be listed explicitly with a precise description of their
meaning.
In terms of syntax, all configuration options should appear in three places:
- in the example at the top, with an example value
- in the index of all configuration options which is sorted by alphabetical order
- in its dedicated subsection with full reference text
#### CLI commands and command flags
CLI commands are self-documented using the doc commends in the codebase.
Make sure to write clear and precise comments for all options you are adding.
#### S3 features
If you implement new S3 features, make sure to update the compatibility matrix in `doc/book/reference-manual/s3-compatibility.md`.
#### Admin API
The admin API has an OpenAPI specification that is automatically generated
using Utoipa, from a description of each endpoint that is given in
`src/api/admin/openapi.rs` and a description of data structure schemas in
`src/api/admin/api.rs`. The code in `openapi.rs` is only used to generate the
OpenAPI specification document and not for the actual implementation in Garage,
whereas structures defined in `api.rs` are also used for the implementation of
API calls. Make sure to write good doc comments for all of these items so that
the OpenAPI specification will be precise and accurate.
An up-to-date version of the OpenAPI specification document should be kept in
the repository in `doc/api/garage-admin-v2.json`. When you are making changes
to the admin API, update this document with the following command:
```
cargo run -- admin-api-schema > doc/api/garage-admin-v2.json
```
## Garage team organization
Alex (handle `lx`) is the lead developer and is responsible of ensuring the
correctness of Garage and stability between version upgrades.
The other maintainers are Trinity (handle `trinity-1686a`), Quentin (handle `quentin`) and Maximilien (handle `halfa`).
Maximilien is responsible for coordinating effort on the Kubernetes integration / Helm chart.
## Pull request merging criteria
The following PRs should only be merged after review and approval from Alex:
- PRs that introduce architectural changes, such as changes in the data model
or change in the coordination protocols between nodes
- PRs that introduce changes on the format of data structures used for
persistent disk storage and internal cluster communication (RPC)
- PRs that are suspected of introducing some kind of breakage or unexpected
behavior due to their complexity
PRs that introduce breaking change for users but don't fall in one of the
previous category should be discussed between maintainers to evaluate the
impact on users when upgrading. Alex's approval is not required to merge them
as long as they are clearly identified as breaking in the PR title, and are
properly merged in the branch for the next major version and not in the current
main branch.
All other PRs can be merged by any maintainer on their own, once they are
confident that the quality standards defined in this document are respected
before merging.
## Merging strategy
When merging PRs, maintainers should ensure that a Git commit is created by
Forgejo that records the PR number, its title and its text in the commit
message. If a PR is fixing an issue, make sure that the issue number is
included in the PR title as well. This is to ensure that when releasing a new
version of Garage, the changelog in the release notes can be properly
constructed by reading the Git log since the last release.
We also want to keep the history "almost linear" to facilitate the use of `git
bisect` if it ever were necessary. This leaves the following two merging
strategies:
- For PRs that consist of many commits that should stay independent, the
"rebase and create merge commit" strategy should be used. The merge commit is
created automatically by Forgejo and saves the PR's number, title and text in
the commit message.
- For PRs that consist of only one commit, or a few number of commits that can
be merged, the "create squash commit" strategy should be used. This way a
single commit will be created by Forgejo which also saves the PR's number,
title and text in the commit message.
When cherry-picking commits from one branch to the other, a simple fast-forward
merging strategy can be used if the commit message already references a PR
number.

2298
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -16,7 +16,6 @@ members = [
"src/garage", "src/garage",
"src/k2v-client", "src/k2v-client",
"src/format-table", "src/format-table",
"fuzz",
] ]
default-members = ["src/garage"] default-members = ["src/garage"]
@ -25,173 +24,130 @@ default-members = ["src/garage"]
# Internal Garage crates # Internal Garage crates
format_table = { version = "0.1.1", path = "src/format-table" } format_table = { version = "0.1.1", path = "src/format-table" }
garage_api_common = { version = "2.3.0", path = "src/api/common" } garage_api_common = { version = "1.3.1", path = "src/api/common" }
garage_api_admin = { version = "2.3.0", path = "src/api/admin" } garage_api_admin = { version = "1.3.1", path = "src/api/admin" }
garage_api_s3 = { version = "2.3.0", path = "src/api/s3" } garage_api_s3 = { version = "1.3.1", path = "src/api/s3" }
garage_api_k2v = { version = "2.3.0", path = "src/api/k2v" } garage_api_k2v = { version = "1.3.1", path = "src/api/k2v" }
garage_block = { version = "2.3.0", path = "src/block" } garage_block = { version = "1.3.1", path = "src/block" }
garage_db = { version = "2.3.0", path = "src/db", default-features = false } garage_db = { version = "1.3.1", path = "src/db", default-features = false }
garage_model = { version = "2.3.0", path = "src/model", default-features = false } garage_model = { version = "1.3.1", path = "src/model", default-features = false }
garage_net = { version = "2.3.0", path = "src/net" } garage_net = { version = "1.3.1", path = "src/net" }
garage_rpc = { version = "2.3.0", path = "src/rpc" } garage_rpc = { version = "1.3.1", path = "src/rpc" }
garage_table = { version = "2.3.0", path = "src/table" } garage_table = { version = "1.3.1", path = "src/table" }
garage_util = { version = "2.3.0", path = "src/util" } garage_util = { version = "1.3.1", path = "src/util" }
garage_web = { version = "2.3.0", path = "src/web" } garage_web = { version = "1.3.1", path = "src/web" }
k2v-client = { version = "0.0.4", path = "src/k2v-client" } k2v-client = { version = "0.0.4", path = "src/k2v-client" }
# External crates from crates.io # External crates from crates.io
arc-swap = "1.8" arc-swap = "1.0"
arbitrary = { version = "1.4.2"}
argon2 = "0.5" argon2 = "0.5"
async-trait = "0.1" async-trait = "0.1.7"
backtrace = "0.3" backtrace = "0.3"
base64 = "0.22" base64 = "0.21"
blake2 = "0.10" blake2 = "0.10"
bytes = "1.11" bytes = "1.0"
bytesize = "2.3" bytesize = "1.1"
cfg-if = "1.0" cfg-if = "1.0"
chrono = { version = "0.4", features = ["serde"] } chrono = "0.4"
crc-fast = "1.9" crc32fast = "1.4"
crc32c = "0.6"
crypto-common = "0.1" crypto-common = "0.1"
gethostname = "1.1" gethostname = "0.4"
git-version = "0.3" git-version = "0.3.4"
hex = "0.4" hex = "0.4"
hexdump = "0.1" hexdump = "0.1"
hmac = "0.12" hmac = "0.12"
itertools = "0.14" itertools = "0.12"
ipnet = "2.11" ipnet = "2.9.0"
lazy_static = "1.5" lazy_static = "1.4"
libfuzzer-sys = "0.4"
md-5 = "0.10" md-5 = "0.10"
mktemp = "0.5" mktemp = "0.5"
nix = { version = "0.31", default-features = false, features = ["fs"] } nix = { version = "0.29", default-features = false, features = ["fs"] }
nom = "8.0" nom = "7.1"
parking_lot = "0.12" parking_lot = "0.12"
parse_duration = "2.1" parse_duration = "2.1"
paste = "1.0" pin-project = "1.0.12"
pin-project = "1.1" pnet_datalink = "0.34"
pnet_datalink = "0.35" rand = "0.8"
rand = "0.9"
sha1 = "0.10" sha1 = "0.10"
sha2 = "0.10" sha2 = "0.10"
timeago = { version = "0.5", default-features = false } timeago = { version = "0.4", default-features = false }
xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] } xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] }
aes-gcm = { version = "0.10", features = ["aes", "stream"] } aes-gcm = { version = "0.10", features = ["aes", "stream"] }
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" } sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
kuska-handshake = { version = "0.2.0", features = ["default", "async_std"] } kuska-handshake = { version = "0.2.0", features = ["default", "async_std"] }
clap = { version = "4.5", features = ["derive", "env"] } clap = { version = "4.1", features = ["derive", "env"] }
pretty_env_logger = "0.5" pretty_env_logger = "0.5"
structopt = { version = "0.3", default-features = false } structopt = { version = "0.3", default-features = false }
syslog-tracing = "0.3" syslog-tracing = "0.3"
tracing = "0.1" tracing = "0.1"
tracing-journald = "0.3" tracing-journald = "0.3.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
heed = { version = "0.22", default-features = false, features = [] } heed = { version = "0.11", default-features = false, features = ["lmdb"] }
rusqlite = { version = "0.38", features = ["fallible_uint"] } rusqlite = "0.37"
r2d2 = "0.8" r2d2 = "0.8"
r2d2_sqlite = "0.32" r2d2_sqlite = "0.31"
fjall = "2.11" fjall = "2.4"
async-compression = { version = "0.4", features = ["tokio", "zstd"] } async-compression = { version = "0.4", features = ["tokio", "zstd"] }
zstd = { version = "0.13", default-features = false } zstd = { version = "0.13", default-features = false }
quick-xml = { version = "0.39", features = ["serialize"] } quick-xml = { version = "0.26", features = [ "serialize" ] }
rmp-serde = "1.3" rmp-serde = "1.1.2"
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
serde_bytes = "0.11" serde_bytes = "0.11"
serde_json = "1.0" serde_json = "1.0"
toml = { version = "0.9", default-features = false, features = ["parse", "serde"] } toml = { version = "0.8", default-features = false, features = ["parse"] }
utoipa = { version = "5.4", features = ["chrono"] }
# newer version requires rust edition 2021 # newer version requires rust edition 2021
k8s-openapi = { version = "0.27", features = ["v1_35"] } k8s-openapi = { version = "0.21", features = ["v1_24"] }
kube = { version = "3.0", default-features = false, features = [ kube = { version = "0.88", default-features = false, features = ["runtime", "derive", "client", "rustls-tls"] }
"runtime", schemars = "0.8"
"derive", reqwest = { version = "0.11", default-features = false, features = ["rustls-tls-manual-roots", "json"] }
"client",
"rustls-tls",
] }
schemars = "1.2"
reqwest = { version = "0.13", default-features = false, features = [
"rustls-no-provider",
"json",
] }
form_urlencoded = "1.2" form_urlencoded = "1.0.0"
http = "1.4" http = "1.0"
httpdate = "1.0" httpdate = "1.0"
http-range = "0.1" http-range = "0.1"
http-body-util = "0.1" http-body-util = "0.1"
hyper = { version = "1.8", default-features = false } hyper = { version = "1.0", default-features = false }
hyper-util = { version = "0.1", features = ["full"] } hyper-util = { version = "0.1", features = [ "full" ] }
multer = "3.1" multer = "3.0"
percent-encoding = "2.3" percent-encoding = "2.2"
roxmltree = "0.21" roxmltree = "0.19"
url = "2.5" url = "2.3"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.49", default-features = false, features = [ tokio = { version = "1.0", default-features = false, features = ["net", "rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
"rt",
"rt-multi-thread",
"io-util",
"net",
"time",
"macros",
"sync",
"signal",
"fs",
] }
tokio-util = { version = "0.7", features = ["compat", "io"] } tokio-util = { version = "0.7", features = ["compat", "io"] }
tokio-stream = { version = "0.1", features = ["net"] } tokio-stream = { version = "0.1", features = ["net"] }
socket2 = { version = "0.6", features = ["all"] }
opentelemetry = { version = "0.17", features = ["rt-tokio", "metrics", "trace"] } opentelemetry = { version = "0.17", features = [ "rt-tokio", "metrics", "trace" ] }
opentelemetry-prometheus = "0.10" opentelemetry-prometheus = "0.10"
opentelemetry-otlp = "0.10" opentelemetry-otlp = "0.10"
opentelemetry-contrib = "0.9" opentelemetry-contrib = "0.9"
prometheus = "0.13" prometheus = "0.13"
# used by the k2v-client crate only # used by the k2v-client crate only
aws-sigv4 = { version = "1.3", default-features = false } aws-sigv4 = { version = "1.1", default-features = false }
hyper-rustls = { version = "0.27", default-features = false, features = [ hyper-rustls = { version = "0.26", default-features = false, features = ["http1", "http2", "ring", "rustls-native-certs"] }
"http1",
"http2",
"ring",
"rustls-native-certs",
] }
log = "0.4" log = "0.4"
thiserror = "2.0" thiserror = "2.0"
# ---- used only as build / dev dependencies ---- # ---- used only as build / dev dependencies ----
assert-json-diff = "2.0" assert-json-diff = "2.0"
rustc_version = "0.4" rustc_version = "0.4.0"
static_init = "1.0" static_init = "1.0"
aws-smithy-runtime = { version = "1.9", default-features = false, features = [ aws-smithy-runtime = { version = "1.8", default-features = false, features = ["tls-rustls"] }
"tls-rustls", aws-sdk-config = { version = "1.62", default-features = false }
] } aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] }
aws-sdk-config = { version = "1.99", default-features = false }
aws-sdk-s3 = { version = "1.121", default-features = false, features = [
"rt-tokio",
] }
[profile.release] [profile.release]
lto = "thin" lto = "thin"
codegen-units = 16 codegen-units = 16
opt-level = 3 opt-level = 3
strip = "debuginfo" strip = "debuginfo"
[workspace.lints.clippy]
# pedantic lints configuration
doc_markdown = "warn"
format_collect = "warn"
manual_midpoint = "warn"
semicolon_if_nothing_returned = "warn"
unnecessary_semicolon = "warn"
unnecessary_wraps = "warn"
# nursery lints configuration
# or_fun_call = "warn" # enable it to help detect non trivial code used in `_or` method

View file

@ -1,14 +0,0 @@
# Security Reporting
If you wish to report responsibly a security vulnerability about Garage, we ask that you follow the following process.
Please report each security vulnerabilities by filling out the following template:
- PROJECT: A URL to the code repository containing the vulnerable version - be reminded that the source of truth is at https://git.deuxfleurs.fr/deuxfleurs/garage
- PUBLIC: Please let us know if this vulnerability has been made or discussed publicly already, and if so, please let us know where.
- DESCRIPTION: Please provide precise description of the security vulnerability you have found with as much information as you are able and willing to provide.
Please send the above info, along with any other information you feel is pertinent by emailing the core team at: garagehq@deuxfleurs.fr
The Garage Core Team will let you know within a few weeks whether or not your report has been accepted or rejected.
We ask that you please keep the report confidential until we have either responded or made a public announcement.

View file

@ -1,7 +1,7 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>Garage administration API v0</title> <title>Garage Adminstration API v0</title>
<!-- needed for adaptive design --> <!-- needed for adaptive design -->
<meta charset="utf-8"/> <meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">

View file

@ -3,10 +3,10 @@ info:
version: v0.8.0 version: v0.8.0
title: Garage Administration API v0+garage-v0.8.0 title: Garage Administration API v0+garage-v0.8.0
description: | description: |
Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks. Administrate your Garage cluster programatically, including status, layout, keys, buckets, and maintainance tasks.
*Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionnaly, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!*
paths: paths:
/status: /status:
get: get:
tags: tags:

View file

@ -1,7 +1,7 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>Garage administration API v1</title> <title>Garage Adminstration API v0</title>
<!-- needed for adaptive design --> <!-- needed for adaptive design -->
<meta charset="utf-8"/> <meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">

View file

@ -3,10 +3,10 @@ info:
version: v0.9.0 version: v0.9.0
title: Garage Administration API v0+garage-v0.9.0 title: Garage Administration API v0+garage-v0.9.0
description: | description: |
Administrate your Garage cluster programmatically, including status, layout, keys, buckets, and maintenance tasks. Administrate your Garage cluster programatically, including status, layout, keys, buckets, and maintainance tasks.
*Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionally, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!* *Disclaimer: The API is not stable yet, hence its v0 tag. The API can change at any time, and changes can include breaking backward compatibility. Read the changelog and upgrade your scripts before upgrading. Additionnaly, this specification is very early stage and can contain bugs, especially on error return codes/types that are not tested yet. Do not expect a well finished and polished product!*
paths: paths:
/health: /health:
get: get:
tags: tags:
@ -440,7 +440,7 @@ paths:
- "false" - "false"
example: "true" example: "true"
required: false required: false
description: "Whether or not the secret key should be returned in the response" description: "Wether or not the secret key should be returned in the response"
responses: responses:
'500': '500':
description: "The server can not handle your request. Check your connectivity with the rest of the cluster." description: "The server can not handle your request. Check your connectivity with the rest of the cluster."

View file

@ -1,24 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Garage administration API v2</title>
<!-- needed for adaptive design -->
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="./css/redoc.css" rel="stylesheet">
<!--
Redoc doesn't change outer page styles
-->
<style>
body {
margin: 0;
padding: 0;
}
</style>
</head>
<body>
<redoc spec-url='./garage-admin-v2.json'></redoc>
<script src="./redoc.standalone.js"> </script>
</body>
</html>

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -51,4 +51,4 @@ We are currently building this SDK for [Python](@/documentation/build/python.md#
More information: More information:
- [In the reference manual](@/documentation/reference-manual/admin-api.md) - [In the reference manual](@/documentation/reference-manual/admin-api.md)
- [Full specification](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [Full specifiction](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html)

View file

@ -5,13 +5,13 @@ weight = 99
## S3 ## S3
If you are developing a new application, you may want to use Garage to store your user's media. If you are developping a new application, you may want to use Garage to store your user's media.
The S3 API that Garage uses is a standard REST API, so as long as you can make HTTP requests, The S3 API that Garage uses is a standard REST API, so as long as you can make HTTP requests,
you can query it. You can check the [S3 REST API Reference](https://docs.aws.amazon.com/AmazonS3/latest/API/API_Operations_Amazon_Simple_Storage_Service.html) from Amazon to learn more. you can query it. You can check the [S3 REST API Reference](https://docs.aws.amazon.com/AmazonS3/latest/API/API_Operations_Amazon_Simple_Storage_Service.html) from Amazon to learn more.
Developing your own wrapper around the REST API is time consuming and complicated. Developping your own wrapper around the REST API is time consuming and complicated.
Instead, there are some libraries already available. Instead, there are some libraries already avalaible.
Some of them are maintained by Amazon, some by Minio, others by the community. Some of them are maintained by Amazon, some by Minio, others by the community.

View file

@ -23,7 +23,7 @@ To configure S3-compatible software to interact with Garage,
you will need the following parameters: you will need the following parameters:
- An **API endpoint**: this corresponds to the HTTP or HTTPS address - An **API endpoint**: this corresponds to the HTTP or HTTPS address
used to contact the Garage server. When running Garage locally this will usually used to contact the Garage server. When runing Garage locally this will usually
be `http://127.0.0.1:3900`. In a real-world setting, you would usually have a reverse-proxy be `http://127.0.0.1:3900`. In a real-world setting, you would usually have a reverse-proxy
that adds TLS support and makes your Garage server available under a public hostname that adds TLS support and makes your Garage server available under a public hostname
such as `https://garage.example.com`. such as `https://garage.example.com`.

View file

@ -12,9 +12,8 @@ In this section, we cover the following web applications:
| [Mastodon](#mastodon) | ✅ | Natively supported | | [Mastodon](#mastodon) | ✅ | Natively supported |
| [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` | | [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` |
| [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` | | [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` |
| [Ente](#ente) | ✅ | Natively supported | | [Pixelfed](#pixelfed) | ✅ | Natively supported |
| [Pixelfed](#pixelfed) | ❓ | Natively supported | | [Pleroma](#pleroma) | ❓ | Not yet tested |
| [Pleroma](#pleroma) | ✅ | Natively supported |
| [Lemmy](#lemmy) | ✅ | Supported with pict-rs | | [Lemmy](#lemmy) | ✅ | Supported with pict-rs |
| [Funkwhale](#funkwhale) | ❓ | Not yet tested | | [Funkwhale](#funkwhale) | ❓ | Not yet tested |
| [Misskey](#misskey) | ❓ | Not yet tested | | [Misskey](#misskey) | ❓ | Not yet tested |
@ -54,7 +53,7 @@ garage bucket allow nextcloud --read --write --key nextcloud-key
Now edit your Nextcloud configuration file to enable object storage. Now edit your Nextcloud configuration file to enable object storage.
On my installation, the config. file is located at the following path: `/var/www/nextcloud/config/config.php`. On my installation, the config. file is located at the following path: `/var/www/nextcloud/config/config.php`.
We will add a new root key to the `$CONFIG` dictionary named `objectstore`: We will add a new root key to the `$CONFIG` dictionnary named `objectstore`:
```php ```php
<?php <?php
@ -413,7 +412,7 @@ mc mirror --newer-than "3h" ./public/system/ garage/mastodon-data
## Matrix ## Matrix
Matrix is a chat communication protocol. Its main stable server implementation, [Synapse](https://matrix-org.github.io/synapse/latest/), provides a module to store media on a S3 backend. Additionally, a server independent media store supporting S3 has been developed by the community, it has been made possible thanks to how the matrix API has been designed and will work with implementations like Conduit, Dendrite, etc. Matrix is a chat communication protocol. Its main stable server implementation, [Synapse](https://matrix-org.github.io/synapse/latest/), provides a module to store media on a S3 backend. Additionally, a server independent media store supporting S3 has been developped by the community, it has been made possible thanks to how the matrix API has been designed and will work with implementations like Conduit, Dendrite, etc.
### synapse-s3-storage-provider (synapse only) ### synapse-s3-storage-provider (synapse only)
@ -450,7 +449,7 @@ media_storage_providers:
Note that uploaded media will also be stored locally and this behavior can not be deactivated, it is even required for Note that uploaded media will also be stored locally and this behavior can not be deactivated, it is even required for
some operations like resizing images. some operations like resizing images.
In fact, your local filesystem is considered as a cache but without any automated way to garbage collect it. In fact, your local filesysem is considered as a cache but without any automated way to garbage collect it.
We can build our garbage collector with `s3_media_upload`, a tool provided with the module. We can build our garbage collector with `s3_media_upload`, a tool provided with the module.
If you installed the module with the command provided before, you should be able to bring it in your path: If you installed the module with the command provided before, you should be able to bring it in your path:
@ -568,186 +567,13 @@ The module can then be configured with:
Other configuration options can be found in the Other configuration options can be found in the
[configuration YAML file](https://github.com/processone/ejabberd-contrib/blob/master/mod_s3_upload/conf/mod_s3_upload.yml). [configuration YAML file](https://github.com/processone/ejabberd-contrib/blob/master/mod_s3_upload/conf/mod_s3_upload.yml).
## Ente
Ente is an alternative for Google Photos and Apple Photos. It [can be selfhosted](https://help.ente.io/self-hosting/) and is working fine with Garage as of May 2024.
As a first step we need to create a bucket and a key for Ente:
```bash
garage bucket create ente
garage key create ente-key
# For the CORS setup to work, the key needs to be --owner as well, at least temporarily.
garage bucket allow ente --read --write --owner --key ente-key
```
We also need to setup some CORS rules to allow the Ente frontend to access the bucket:
```bash
export CORS='{"CORSRules":[{"AllowedHeaders":["*"],"AllowedMethods":["GET", "PUT", "POST", "DELETE"],"AllowedOrigins":["*"], "ExposeHeaders":["ETag"]}]}'
aws s3api put-bucket-cors --bucket ente --cors-configuration $CORS
```
Now we need to configure ente-server to use our bucket. This is explained [in the Ente S3 documentation](https://help.ente.io/self-hosting/guides/external-s3).
Prepare a configuration file for ente's backend as `museum.yaml`:
```yaml
credentials-file: /credentials.yaml
apps:
public-albums: https://albums.example.tld # If you want to use the share album feature
internal:
hardcoded-ott:
local-domain-suffix: "@example.com" # Your domain
local-domain-value: 123456 # Custom One-Time Password since we are not sending mail by default
key:
# WARNING -- You MUST CHANGE the values below
# Someone has made an image that can do it for you : https://github.com/EdyTheCow/ente-selfhost/blob/main/images/ente-server-tools/Dockerfile
# Simply build it yourself or run docker run --rm ghcr.io/edythecow/ente-server-tools go run tools/gen-random-keys/main.go
encryption: yvmG/RnzKrbCb9L3mgsmoxXr9H7i2Z4qlbT0mL3ln4w= # CHANGE THIS VALUE
hash: KXYiG07wC7GIgvCSdg+WmyWdXDAn6XKYJtp/wkEU7x573+byBRAYtpTP0wwvi8i/4l37uicX1dVTUzwH3sLZyw== # CHANGE THIS VALUE
jwt:
secret: i2DecQmfGreG6q1vBj5tCokhlN41gcfS2cjOs9Po-u8= # CHANGE THIS VALUE
```
The full configuration file can be found [here](https://github.com/ente-io/ente/blob/main/server/configurations/local.yaml)
Then prepare a credentials file as `credentials.yaml`
```yaml
db:
host: postgres
port: 5432
name: <ente_db_name>
user: <pguser>
password: <pgpass>
s3:
# Override the primary and secondary hot storage. The commented out values
# are the defaults.
#
hot_storage:
primary: b2-eu-cen
# secondary: wasabi-eu-central-2-v3
# If true, enable some workarounds to allow us to use a local minio instance
# for object storage.
#
# 1. Disable SSL.
# 2. Use "path" style S3 URLs (see `use_path_style_urls` below).
# 3. Directly download the file during replication instead of going via the
# Cloudflare worker.
# 4. Do not specify storage classes when uploading objects (since minio does
# not support them, specifically it doesn't support GLACIER).
are_local_buckets: true
# To use "path" style S3 URLs instead of DNS-based bucket access
# default to true if you set "are_local_buckets: true"
# use_path_style_urls: true
b2-eu-cen: # Don't change this key, it is hardcoded
key: <keyID>
secret: <keySecret>
endpoint: garage:3900 # publicly accessible endpoint of your garage instance
region: garage
bucket: <yourbucketName>
use_path_style: true
# you can specify secondary locations, names are hardcoded as well
# wasabi-eu-central-2-v3:
# scw-eu-fr-v3:
# and you can also specify a bucket to be used for embeddings, preview etc..
# default to the first bucket
# derived-storage: wasabi-eu-central-2-derived
```
Finally you can run it with Docker :
```bash
docker run -d --name ente-server --restart unless-stopped -v /path/to/museum.yaml:/museum.yaml -v /path/to/credentials.yaml:/credentials.yaml -p 8080:8080 ghcr.io/ente-io/ente-server
```
For more information on deployment you can check the [ente documentation](https://help.ente.io/self-hosting/)
## Pixelfed ## Pixelfed
[Pixelfed Technical Documentation > Configuration](https://docs.pixelfed.org/technical-documentation/env.html#filesystem) [Pixelfed Technical Documentation > Configuration](https://docs.pixelfed.org/technical-documentation/env.html#filesystem)
## Pleroma ## Pleroma
### Creating your bucket [Pleroma Documentation > Pleroma.Uploaders.S3](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#pleromauploaderss3)
This is the usual Garage setup:
```bash
garage key new --name pleroma-key
garage bucket create pleroma
garage bucket allow pleroma --read --write --owner --key pleroma-key
```
We also need to expose these buckets publicly to serve their content to users:
```bash
garage bucket website --allow pleroma
```
Note the Key ID and Secret Key.
### Configure Pleroma
Update your Pleroma configuration like that in `/etc/pleroma/config.exs`.
```
config :pleroma, Pleroma.Upload,
uploader: Pleroma.Uploaders.S3,
base_url: "https://pleroma.garage.example.tld"
config :ex_aws, :s3,
access_key_id: "GW...",
secret_access_key: "XXX",
region: "garage",
host: "api.garage.example.tld"
```
And restart Pleroma.
You can found more information in [Pleroma Documentation > Pleroma.Uploaders.S3](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#pleromauploaderss3)
### Migrating your data
Pleroma have an internal migration tool that can encounter some fatal error
```
** (EXIT from #PID<0.98.0>) an exception was raised:
** (File.Error) could not stream "/var/lib/pleroma/uploads/09/f8": illegal operation on a directory
(elixir 1.17.3) lib/file/stream.ex:100: anonymous fn/3 in Enumerable.File.Stream.reduce/3
(elixir 1.17.3) lib/stream.ex:1675: anonymous fn/5 in Stream.resource/3
(elixir 1.17.3) lib/stream.ex:1891: Enumerable.Stream.do_each/4
(elixir 1.17.3) lib/task/supervised.ex:370: Task.Supervised.stream_reduce/7
(elixir 1.17.3) lib/enum.ex:4423: Enum.map/2
(ex_aws_s3 2.5.8) lib/ex_aws/s3/upload.ex:141: ExAws.Operation.ExAws.S3.Upload.perform/2
(pleroma 2.10.0) lib/pleroma/uploaders/s3.ex:60: Pleroma.Uploaders.S3.put_file/1
(pleroma 2.10.0) lib/pleroma/uploaders/uploader.ex:49: Pleroma.Uploaders.Uploader.put_file/2
```
So, use [your best tool](https://garagehq.deuxfleurs.fr/documentation/connect/cli/) to sync `/var/lib/pleroma/uploads/` in your S3.
Then, to avoid some non existent problem (just in case of), run this command
```bash
while true
do
rm -vr $(./bin/pleroma_ctl uploads migrate_local S3 2>&1 | grep "could not stream" | awk -F '"' '{print $2}')
sleep 5
done
```
If you have many files, stop this command sometime and the command bellow (interactive) to delete local
file after upload. Then restart the loop.
```bash
./bin/pleroma_ctl uploads migrate_local S3 --delete
```
And *voilà*
## Lemmy ## Lemmy

View file

@ -207,13 +207,3 @@ $ plakar at @garageS3 ls
``` ```
More information in Plakar documentation: https://www.plakar.io/docs/main/quickstart/ More information in Plakar documentation: https://www.plakar.io/docs/main/quickstart/
## Synology HyperBackup
HyperBackup can be configured to upload backups to garage using a custom S3 destination. However, the HyperBackup client hardcodes the `us-east-1` region that is a critical input to the v4 signature process. If garage is not set to `us-east-1`, HyperBackup will recognize available buckets, but fail during the final setup stage.
In garage.toml:
```toml
[s3_api]
s3_region = "us-east-1"
```

View file

@ -41,7 +41,7 @@ Some commands:
# list buckets # list buckets
mc ls garage/ mc ls garage/
# list objects in a bucket # list objets in a bucket
mc ls garage/my_files mc ls garage/my_files
# copy from your filesystem to garage # copy from your filesystem to garage
@ -149,15 +149,6 @@ rclone help
This will tremendously accelerate operations such as `rclone sync` or `rclone ncdu` by reducing the number This will tremendously accelerate operations such as `rclone sync` or `rclone ncdu` by reducing the number
of ListObjects calls that are made. of ListObjects calls that are made.
**Garage behind Cloudflare proxy:** when running Garage behind Cloudflare proxy, you might see `Response: error 403 Forbidden, Forbidden: Invalid signature` error in your garage logs or `AccessDenied: Forbidden: Invalid signature` error in rclone logs. Try adding `--s3-sign-accept-encoding=false` flag to your rclone command and see if the issue is resolved.
```bash
# this throws an error
rclone lsd garage:
# this should work
rclone lsd --s3-sign-accept-encoding=false garage:
```
## `s3cmd` ## `s3cmd`
@ -218,7 +209,7 @@ Within Cyberduck, a
available within the `Preferences -> Profiles` section. This can enabled and available within the `Preferences -> Profiles` section. This can enabled and
then connections to Garage may be configured. then connections to Garage may be configured.
### Instructions for the CLI ### Instuctions for the CLI
To configure duck (Cyberduck's CLI tool), start by creating its folder hierarchy: To configure duck (Cyberduck's CLI tool), start by creating its folder hierarchy:
@ -323,3 +314,4 @@ ls
``` ```
And through the web interface at http://[::1]:8080/web/client And through the web interface at http://[::1]:8080/web/client

View file

@ -201,9 +201,11 @@ on the binary cache, the client will download the result from the cache instead
### Channels ### Channels
Channels additionally serve Nix definitions, ie. a `.nix` file referencing Channels additionnaly serve Nix definitions, ie. a `.nix` file referencing
all the derivations you want to serve. all the derivations you want to serve.
## Gitlab ## Gitlab
*External link:* [Gitlab Documentation > Object storage](https://docs.gitlab.com/ee/administration/object_storage.html) *External link:* [Gitlab Documentation > Object storage](https://docs.gitlab.com/ee/administration/object_storage.html)

View file

@ -8,12 +8,12 @@ have published Ansible roles. We list them and compare them below.
## Comparison of Ansible roles ## Comparison of Ansible roles
| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster2309 ansible-role-garage](#eddster2309-ansible-role-garage) | | Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster ansible-role-garage](#eddster-ansible-role-garage) |
|------------------------------------|---------------------------------------------|---------------------------------------------------------------|---------------------------------| |------------------------------------|---------------------------------------------|---------------------------------------------------------------|---------------------------------|
| **Runtime** | Systemd | Docker | Systemd | | **Runtime** | Systemd | Docker | Systemd |
| **Target OS** | Any Linux | Any Linux | Any Linux | | **Target OS** | Any Linux | Any Linux | Any Linux |
| **Architecture** | amd64, arm64, i686 | amd64, arm64 | arm64, arm, 386, amd64 | | **Architecture** | amd64, arm64, i686 | amd64, arm64 | arm64, arm, 386, amd64 |
| **Additional software** | None | Traefik | Nginx and Keepalived (optional) | | **Additional software** | None | Traefik | Ngnix and Keepalived (optional) |
| **Automatic node connection** | ❌ | ✅ | ✅ | | **Automatic node connection** | ❌ | ✅ | ✅ |
| **Layout management** | ❌ | ✅ | ✅ | | **Layout management** | ❌ | ✅ | ✅ |
| **Manage buckets & keys** | ❌ | ✅ (basic) | ✅ | | **Manage buckets & keys** | ❌ | ✅ (basic) | ✅ |

View file

@ -29,10 +29,6 @@ it's stable).
Garage is available in the official repositories under [extra](https://archlinux.org/packages/extra/x86_64/garage). Garage is available in the official repositories under [extra](https://archlinux.org/packages/extra/x86_64/garage).
```bash
pacman -S garage
```
## FreeBSD ## FreeBSD
```bash ```bash
@ -44,9 +40,3 @@ pkg install garage
```bash ```bash
nix-shell -p garage nix-shell -p garage
``` ```
## conda-forge
```bash
pixi global install garage
```

View file

@ -33,7 +33,7 @@ by adding encryption at different levels.
We would be very curious to know your needs and thougs about ideas such as We would be very curious to know your needs and thougs about ideas such as
encryption practices and things like key management, as we want Garage to be a encryption practices and things like key management, as we want Garage to be a
serious base platform for the development of secure, encrypted applications. serious base platform for the developpment of secure, encrypted applications.
Do not hesitate to come talk to us if you have any thoughts or questions on the Do not hesitate to come talk to us if you have any thoughts or questions on the
subject. subject.
@ -59,7 +59,7 @@ For standard S3 API requests, Garage does not encrypt data at rest by itself.
For the most generic at rest encryption of data, we recommend setting up your For the most generic at rest encryption of data, we recommend setting up your
storage partitions on encrypted LUKS devices. storage partitions on encrypted LUKS devices.
If you are developing your own client software that makes use of S3 storage, If you are developping your own client software that makes use of S3 storage,
we recommend implementing data encryption directly on the client side and never we recommend implementing data encryption directly on the client side and never
transmitting plaintext data to Garage. This makes it easy to use an external transmitting plaintext data to Garage. This makes it easy to use an external
untrusted storage provider if necessary. untrusted storage provider if necessary.
@ -108,14 +108,14 @@ Protects against the following threats:
- Stolen HDD - Stolen HDD
Crucially, does not protect against malicious sysadmins or remote attackers that Crucially, does not protect againt malicious sysadmins or remote attackers that
might gain access to your servers. might gain access to your servers.
Methods include full-disk encryption with tools such as LUKS. Methods include full-disk encryption with tools such as LUKS.
## Encrypting data on the client side ## Encrypting data on the client side
Protects against the following threats: Protects againt the following threats:
- A honest-but-curious administrator - A honest-but-curious administrator
- A malicious administrator that tries to corrupt your data - A malicious administrator that tries to corrupt your data

View file

@ -9,7 +9,7 @@ There are three methods to expose buckets as website:
1. using the PutBucketWebsite S3 API call, which is allowed for access keys that have the owner permission bit set 1. using the PutBucketWebsite S3 API call, which is allowed for access keys that have the owner permission bit set
2. from the Garage CLI, by an administrator of the cluster 2. from the Garage CLI, by an adminstrator of the cluster
3. using the Garage administration API 3. using the Garage administration API

View file

@ -20,12 +20,12 @@ sudo apt-get update
sudo apt-get install build-essential sudo apt-get install build-essential
``` ```
## Building from source from the Forgejo repository ## Building from source from the Gitea repository
The primary location for Garage's source code is the The primary location for Garage's source code is the
[Forgejo repository](https://git.deuxfleurs.fr/Deuxfleurs/garage), [Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage),
which contains all of the released versions as well as the code which contains all of the released versions as well as the code
for the development of the next version. for the developpement of the next version.
Clone the repository and enter it as follows: Clone the repository and enter it as follows:
@ -41,7 +41,7 @@ git tag # List available tags
git checkout v0.8.0 # Change v0.8.0 with the version you wish to build git checkout v0.8.0 # Change v0.8.0 with the version you wish to build
``` ```
Otherwise you will be building a development build from the `main` branch Otherwise you will be building a developpement build from the `main` branch
that includes all of the changes to be released in the next version. that includes all of the changes to be released in the next version.
Be careful that such a build might be unstable or contain bugs, Be careful that such a build might be unstable or contain bugs,
and could be incompatible with nodes that run stable versions of Garage. and could be incompatible with nodes that run stable versions of Garage.
@ -85,14 +85,11 @@ The following feature flags are available in v0.8.0:
| Feature flag | Enabled | Description | | Feature flag | Enabled | Description |
| ------------ | ------- | ----------- | | ------------ | ------- | ----------- |
| `bundled-libs` | *by default* | Use bundled version of sqlite3, zstd, lmdb and libsodium | | `bundled-libs` | *by default* | Use bundled version of sqlite3, zstd, lmdb and libsodium |
| `consul-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Consul API | | `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium<br>if available (exclusive with `bundled-libs`, build using<br>`cargo build --no-default-features --features system-libs`) |
| `fjall` | experimental | Enable using Fjall to store Garage's metadata |
| `journald` | optional | Enable logging to systemd-journald with<br>`GARAGE_LOG_TO_JOURNALD=true` environment variable set |
| `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your<br>Garage cluster must have it enabled as well) | | `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your<br>Garage cluster must have it enabled as well) |
| `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API | | `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API |
| `lmdb` | *by default* | Enable using LMDB to store Garage's metadata |
| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API | | `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API |
| `sqlite` | *by default* | Enable using Sqlite3 to store Garage's metadata |
| `syslog` | optional | Enable logging to Syslog with<br>`GARAGE_LOG_TO_SYSLOG=true` environment variable set |
| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium<br>if available (exclusive with `bundled-libs`, build using<br>`cargo build --no-default-features --features system-libs`) |
| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry | | `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry |
| `syslog` | optional | Enable logging to Syslog |
| `lmdb` | *by default* | Enable using LMDB to store Garage's metadata |
| `sqlite` | *by default* | Enable using Sqlite3 to store Garage's metadata |

View file

@ -26,7 +26,7 @@ Or deploy with custom values:
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
``` ```
If you want to manage the CustomResourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart: If you want to manage the CustomRessourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart:
```bash ```bash
kubectl apply -k ../k8s/crd kubectl apply -k ../k8s/crd
@ -47,12 +47,12 @@ All possible configuration values can be found with:
helm show values ./garage helm show values ./garage
``` ```
This is an example `values.override.yaml` for deploying in a microk8s cluster with a https s3 api ingress route: This is an example `values.overrride.yaml` for deploying in a microk8s cluster with a https s3 api ingress route:
```yaml ```yaml
garage: garage:
# Use only 2 replicas per object # Use only 2 replicas per object
replicationFactor: 2 replicationMode: "2"
# Start 4 instances (StatefulSets) of garage # Start 4 instances (StatefulSets) of garage
deployment: deployment:

View file

@ -96,14 +96,14 @@ to store 2 TB of data in total.
## Get a Docker image ## Get a Docker image
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v2.3.0`) and not the `latest` tag. We encourage you to use a fixed tag (eg. `v1.3.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v2.3.0` but it's up to you For this example, we will use the latest published version at the time of the writing which is `v1.3.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
For example: For example:
``` ```
docker pull dxflrs/garage:v2.3.0 sudo docker pull dxflrs/garage:v1.3.0
``` ```
## Deploying and configuring Garage ## Deploying and configuring Garage
@ -171,7 +171,7 @@ docker run \
-v /etc/garage.toml:/etc/garage.toml \ -v /etc/garage.toml:/etc/garage.toml \
-v /var/lib/garage/meta:/var/lib/garage/meta \ -v /var/lib/garage/meta:/var/lib/garage/meta \
-v /var/lib/garage/data:/var/lib/garage/data \ -v /var/lib/garage/data:/var/lib/garage/data \
dxflrs/garage:v2.3.0 dxflrs/garage:v1.3.0
``` ```
With this command line, Garage should be started automatically at each boot. With this command line, Garage should be started automatically at each boot.
@ -185,7 +185,7 @@ If you want to use `docker-compose`, you may use the following `docker-compose.y
version: "3" version: "3"
services: services:
garage: garage:
image: dxflrs/garage:v2.3.0 image: dxflrs/garage:v1.3.0
network_mode: "host" network_mode: "host"
restart: unless-stopped restart: unless-stopped
volumes: volumes:

View file

@ -7,7 +7,7 @@ The main reason to add a reverse proxy in front of Garage is to provide TLS to y
In production you will likely need your certificates signed by a certificate authority. In production you will likely need your certificates signed by a certificate authority.
The most automated way is to use a provider supporting the [ACME protocol](https://datatracker.ietf.org/doc/html/rfc8555) The most automated way is to use a provider supporting the [ACME protocol](https://datatracker.ietf.org/doc/html/rfc8555)
such as [Let's Encrypt](https://letsencrypt.org/) or [ZeroSSL](https://zerossl.com/). such as [Let's Encrypt](https://letsencrypt.org/), [ZeroSSL](https://zerossl.com/) or [Buypass Go SSL](https://www.buypass.com/ssl/products/acme).
If you are only testing Garage, you can generate a self-signed certificate to follow the documentation: If you are only testing Garage, you can generate a self-signed certificate to follow the documentation:
@ -97,7 +97,7 @@ server {
location / { location / {
proxy_pass http://s3_backend; proxy_pass http://s3_backend;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $http_host; proxy_set_header Host $host;
# Disable buffering to a temporary file. # Disable buffering to a temporary file.
proxy_max_temp_file_size 0; proxy_max_temp_file_size 0;
} }
@ -142,74 +142,7 @@ server {
## Apache httpd ## Apache httpd
The [Apache HTTP Server](https://httpd.apache.org/) @TODO
is a general purpose web server that includes
[reverse proxy](https://httpd.apache.org/docs/2.4/mod/mod_proxy.html)
capabilities.
### Exposing the S3 endpoints
Create a new [virtual host](https://httpd.apache.org/docs/2.4/vhosts/),
obtain a certificate using
[certbot](https://eff-certbot.readthedocs.io/en/stable/using.html#apache),
and add the
[`ProxyPass`](https://httpd.apache.org/docs/2.4/mod/mod_proxy.html#proxypass)
and
[`ProxyPreserveHost`](https://httpd.apache.org/docs/2.4/mod/mod_proxy.html#proxypreservehost)
options:
```apache
<VirtualHost *:443>
ServerName garage.example.com
SSLCertificateFile /etc/letsencrypt/live/garage.example.com/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/garage.example.com/privkey.pem
Include /etc/letsencrypt/options-ssl-apache.conf
Header always set Strict-Transport-Security "max-age=31536000"
Header always add Content-Security-Policy upgrade-insecure-requests
ProxyPass "/" "http://localhost:3900/" nocanon
ProxyPreserveHost on
</VirtualHost>
```
The `nocanon` keyword is important for
[presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-presigned-url.html);
otherwise,
> `mod_proxy` will canonicalise ProxyPassed URLs.
> But this may be incompatible with some backends,
> particularly those that make use of `PATH_INFO`.
> The optional `nocanon` keyword suppresses this
> and passes the URL path "raw" to the backend.
### Exposing the web endpoint
Adding static websites backed by Garage works very similarly,
with the only difference being the port selected in the `ProxyPass` directive.
```apache
ProxyPass "/" "http://localhost:3902/" nocanon
```
### Using Unix sockets
Apache can also proxy via Unix sockets instead of TCP ports,
if Garage is so configured.
`garage.toml`:
```toml
[s3_api]
api_bind_addr = "/run/garage/s3_api.socket"
```
Apache config:
```apache
ProxyPass "/" "unix:/run/garage/s3_api.socket|http://localhost/" nocanon
```
## Traefik v2 ## Traefik v2
@ -339,7 +272,7 @@ Add the following configuration section [to compress response](https://doc.traef
### Add caching response ### Add caching response
Traefik's caching middleware is only available on [enterprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed) Traefik's caching middleware is only available on [entreprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed)
### Complete example ### Complete example

View file

@ -38,7 +38,7 @@ WantedBy=multi-user.target
id is dynamically allocated by systemd (set with `DynamicUser=true`). It cannot id is dynamically allocated by systemd (set with `DynamicUser=true`). It cannot
access (read or write) home folders (`/home`, `/root` and `/run/user`), the access (read or write) home folders (`/home`, `/root` and `/run/user`), the
rest of the filesystem can only be read but not written, only the path seen as rest of the filesystem can only be read but not written, only the path seen as
`/var/lib/garage` is writable as seen by the service. Additionally, the process `/var/lib/garage` is writable as seen by the service. Additionnaly, the process
can not gain new privileges over time. can not gain new privileges over time.
For this to work correctly, your `garage.toml` must be set with For this to work correctly, your `garage.toml` must be set with

View file

@ -10,7 +10,7 @@ perspective. It will allow you to understand if Garage is a good fit for
you, how to better use it, how to contribute to it, what can Garage could you, how to better use it, how to contribute to it, what can Garage could
and could not do, etc. and could not do, etc.
- **[Goals and use cases](@/documentation/design/goals.md):** This page explains why Garage was conceived and what practical use cases it targets. - **[Goals and use cases](@/documentation/design/goals.md):** This page explains why Garage was concieved and what practical use cases it targets.
- **[Related work](@/documentation/design/related-work.md):** This pages presents the theoretical background on which Garage is built, and describes other software storage solutions and why they didn't work for us. - **[Related work](@/documentation/design/related-work.md):** This pages presents the theoretical background on which Garage is built, and describes other software storage solutions and why they didn't work for us.
@ -31,3 +31,5 @@ We love to talk and hear about Garage, that's why we keep a log here:
- [(en, 2021-04-28) Distributed object storage is centralised](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2021-04-28_spirals-team/talk.pdf) - [(en, 2021-04-28) Distributed object storage is centralised](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2021-04-28_spirals-team/talk.pdf)
- [(fr, 2020-12-02) Garage : jouer dans la cour des grands quand on est un hébergeur associatif](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2020-12-02_wide-team/talk.pdf) - [(fr, 2020-12-02) Garage : jouer dans la cour des grands quand on est un hébergeur associatif](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2020-12-02_wide-team/talk.pdf)

View file

@ -15,14 +15,14 @@ The more a user request will require intra-cluster requests to complete, the mor
This is especially true for sequential requests: requests that must wait the result of another request to be sent. This is especially true for sequential requests: requests that must wait the result of another request to be sent.
We designed Garage without consensus algorithms (eg. Paxos or Raft) to minimize the number of sequential and parallel requests. We designed Garage without consensus algorithms (eg. Paxos or Raft) to minimize the number of sequential and parallel requests.
This series of benchmarks quantifies the impact of this design choice. This serie of benchmarks quantifies the impact of this design choice.
### On a simple simulated network ### On a simple simulated network
We start with a controlled environment, all the instances are running on the same (powerful enough) machine. We start with a controlled environment, all the instances are running on the same (powerful enough) machine.
To control the network latency, we simulate the network with [mknet](https://git.deuxfleurs.fr/trinity-1686a/mknet) (a tool we developed, based on `tc` and the linux network stack). To control the network latency, we simulate the network with [mknet](https://git.deuxfleurs.fr/trinity-1686a/mknet) (a tool we developped, based on `tc` and the linux network stack).
To measure S3 endpoints latency, we use our own tool [s3lat](https://git.deuxfleurs.fr/quentin/s3lat/) to observe only the intra-cluster latency and not some contention on the nodes (CPU, RAM, disk I/O, network bandwidth, etc.). To mesure S3 endpoints latency, we use our own tool [s3lat](https://git.deuxfleurs.fr/quentin/s3lat/) to observe only the intra-cluster latency and not some contention on the nodes (CPU, RAM, disk I/O, network bandwidth, etc.).
Compared to other benchmark tools, S3Lat sends only one (small) request at the same time and measures its latency. Compared to other benchmark tools, S3Lat sends only one (small) request at the same time and measures its latency.
We selected 5 standard endpoints that are often in the critical path: ListBuckets, ListObjects, GetObject, PutObject and RemoveObject. We selected 5 standard endpoints that are often in the critical path: ListBuckets, ListObjects, GetObject, PutObject and RemoveObject.
@ -32,7 +32,7 @@ In this first benchmark, we consider 5 instances that are located in a different
Compared to garage, minio latency drastically increases on 3 endpoints: GetObject, PutObject, RemoveObject. Compared to garage, minio latency drastically increases on 3 endpoints: GetObject, PutObject, RemoveObject.
We suppose that these requests on minio make transactions over Raft, involving 4 sequential requests: 1) sending the message to the leader, 2) having the leader dispatch it to the other nodes, 3) waiting for the confirmation of followers and finally 4) committing it. With our current configuration, one Raft transaction will take around 400 ms. GetObject seems to correlate to 1 transaction while PutObject and RemoveObject seems to correlate to 2 or 3. Reviewing minio code would be required to confirm this hypothesis. We suppose that these requests on minio make transactions over Raft, involving 4 sequential requests: 1) sending the message to the leader, 2) having the leader dispatch it to the other nodes, 3) waiting for the confirmation of followers and finally 4) commiting it. With our current configuration, one Raft transaction will take around 400 ms. GetObject seems to correlate to 1 transaction while PutObject and RemoveObject seems to correlate to 2 or 3. Reviewing minio code would be required to confirm this hypothesis.
Conversely, garage uses an architecture similar to DynamoDB and never require global cluster coordination to answer a request. Conversely, garage uses an architecture similar to DynamoDB and never require global cluster coordination to answer a request.
Instead, garage can always contact the right node in charge of the requested data, and can answer in as low as one request in the case of GetObject and PutObject. We also observed that Garage latency, while often lower to minio, is more dispersed: garage is still in beta and has not received any performance optimization yet. Instead, garage can always contact the right node in charge of the requested data, and can answer in as low as one request in the case of GetObject and PutObject. We also observed that Garage latency, while often lower to minio, is more dispersed: garage is still in beta and has not received any performance optimization yet.
@ -50,7 +50,7 @@ We plot a similar graph as before:
This new graph is very similar to the one before, neither minio or garage seems to benefit from this new topology, but they also do not suffer from it. This new graph is very similar to the one before, neither minio or garage seems to benefit from this new topology, but they also do not suffer from it.
Considering garage, this is expected: nodes in the same DC are put in the same zone, and then data are spread on different zones for data resiliency and availability. Considering garage, this is expected: nodes in the same DC are put in the same zone, and then data are spread on different zones for data resiliency and availaibility.
Then, in the default mode, requesting data requires to query at least 2 zones to be sure that we have the most up to date information. Then, in the default mode, requesting data requires to query at least 2 zones to be sure that we have the most up to date information.
These requests will involve at least one inter-DC communication. These requests will involve at least one inter-DC communication.
In other words, we prioritize data availability and synchronization over raw performances. In other words, we prioritize data availability and synchronization over raw performances.

View file

@ -59,13 +59,11 @@ Garage themselves for the following tasks:
- Hosting of their homepage, [privacyguides.org](https://www.privacyguides.org/), and various other static sites - Hosting of their homepage, [privacyguides.org](https://www.privacyguides.org/), and various other static sites
- As a PowerDNS authoritative zone backend through [Lightning Stream](https://doc.powerdns.com/lightningstream/latest/index.html) and [LMDB](https://doc.powerdns.com/authoritative/backends/lmdb.html) - As a Mastodon object storage backend for [mstdn.party](https://mstdn.party/) and [mstdn.plus](https://mstdn.plus/)
- As a Mastodon media storage backend for [mstdn.party](https://mstdn.party/) and [mstdn.plus](https://mstdn.plus/)
- As a PeerTube storage backend for [neat.tube](https://neat.tube/) - As a PeerTube storage backend for [neat.tube](https://neat.tube/)
- As a [Matrix media backend](https://github.com/matrix-org/synapse-s3-storage-provider) - As a [Matrix media backend](https://github.com/matrix-org/synapse-s3-storage-provider)
Triplebit's Garage cluster is a multi-site cluster currently composed of Triplebit's Garage cluster is a multi-site cluster currently composed of
15 storage nodes in 3 physical locations. 10 nodes in 3 physical locations.

View file

@ -94,7 +94,7 @@ delete a tombstone, the following condition has to be met:
- All nodes responsible for storing this entry are aware of the existence of - All nodes responsible for storing this entry are aware of the existence of
the tombstone, i.e. they cannot hold another version of the entry that is the tombstone, i.e. they cannot hold another version of the entry that is
superseded by the tombstone. This ensures that deleting the tombstone is superseeded by the tombstone. This ensures that deleting the tombstone is
safe and that no deleted value will come back in the system. safe and that no deleted value will come back in the system.
Garage uses atomic database operations (such as compare-and-swap and Garage uses atomic database operations (such as compare-and-swap and
@ -141,3 +141,4 @@ rebalance of data, this would have led to the disk utilization to explode
during the rebalancing, only to shrink again after 24 hours. The 10-minute during the rebalancing, only to shrink again after 24 hours. The 10-minute
delay is a compromise that gives good security while not having this problem of delay is a compromise that gives good security while not having this problem of
disk space explosion on rebalance. disk space explosion on rebalance.

View file

@ -37,7 +37,7 @@ However, Amazon S3 source code is not open but alternatives were proposed.
We identified Minio, Pithos, Swift and Ceph. We identified Minio, Pithos, Swift and Ceph.
Minio/Ceph enforces a total order, so properties similar to a (relaxed) filesystem. Minio/Ceph enforces a total order, so properties similar to a (relaxed) filesystem.
Swift and Pithos are probably the most similar to AWS S3 with their consistent hashing ring. Swift and Pithos are probably the most similar to AWS S3 with their consistent hashing ring.
However Pithos is not maintained anymore. More precisely the company that published Pithos version 1 has developed a second version 2 but has not open sourced it. However Pithos is not maintained anymore. More precisely the company that published Pithos version 1 has developped a second version 2 but has not open sourced it.
Some tests conducted by the [ACIDES project](https://acides.org/) have shown that Openstack Swift consumes way more resources (CPU+RAM) that we can afford. Furthermore, people developing Swift have not designed their software for geo-distribution. Some tests conducted by the [ACIDES project](https://acides.org/) have shown that Openstack Swift consumes way more resources (CPU+RAM) that we can afford. Furthermore, people developing Swift have not designed their software for geo-distribution.
There were many attempts in research too. I am only thinking to [LBFS](https://pdos.csail.mit.edu/papers/lbfs:sosp01/lbfs.pdf) that was used as a basis for Seafile. But none of them have been effectively implemented yet. There were many attempts in research too. I am only thinking to [LBFS](https://pdos.csail.mit.edu/papers/lbfs:sosp01/lbfs.pdf) that was used as a basis for Seafile. But none of them have been effectively implemented yet.
@ -63,7 +63,7 @@ Due to its industry oriented design, Ceph is also far from being *Simple* to ope
In a certain way, Ceph and MinIO are closer together than they are from Garage or OpenStack Swift. In a certain way, Ceph and MinIO are closer together than they are from Garage or OpenStack Swift.
**[Pithos](https://github.com/exoscale/pithos):** **[Pithos](https://github.com/exoscale/pithos):**
Pithos has been abandoned and should probably not used yet, in the following we explain why we did not pick their design. Pithos has been abandonned and should probably not used yet, in the following we explain why we did not pick their design.
Pithos was relying as a S3 proxy in front of Cassandra (and was working with Scylla DB too). Pithos was relying as a S3 proxy in front of Cassandra (and was working with Scylla DB too).
From its designers' mouth, storing data in Cassandra has shown its limitations justifying the project abandonment. From its designers' mouth, storing data in Cassandra has shown its limitations justifying the project abandonment.
They built a closed-source version 2 that does not store blobs in the database (only metadata) but did not communicate further on it. They built a closed-source version 2 that does not store blobs in the database (only metadata) but did not communicate further on it.

View file

@ -82,6 +82,12 @@ nix-build \
*The result is located in `result/bin`. You can pass arguments to cross compile: check `.woodpecker/release.yml` for examples.* *The result is located in `result/bin`. You can pass arguments to cross compile: check `.woodpecker/release.yml` for examples.*
If you modify a `Cargo.toml` or regenerate any `Cargo.lock`, you must run `cargo2nix`:
```
cargo2nix -f
```
Many tools like rclone, `mc` (minio-client), or `aws` (awscliv2) will be available in your environment and will be useful to test Garage. Many tools like rclone, `mc` (minio-client), or `aws` (awscliv2) will be available in your environment and will be useful to test Garage.
**This is the recommended method.** **This is the recommended method.**
@ -118,6 +124,23 @@ cargo fmt # format the project, run it before any commit!
cargo clippy # run the linter, run it before any commit! cargo clippy # run the linter, run it before any commit!
``` ```
This is specific to our project, but you will need one last tool, `cargo2nix`.
To install it, run:
```bash
cargo install --git https://github.com/superboum/cargo2nix --branch main cargo2nix
```
You must use it every time you modify a `Cargo.toml` or regenerate a `Cargo.lock` file as follow:
```bash
cargo build # Rebuild Cargo.lock if needed
cargo2nix -f
```
It will output a `Cargo.nix` file which is a specific `Cargo.lock` file dedicated to Nix that is required by our CI
which means you must include it in your commits.
Later, to use our scripts and integration tests, you might need additional tools. Later, to use our scripts and integration tests, you might need additional tools.
These tools are listed at the end of the `shell.nix` package in the `nativeBuildInputs` part. These tools are listed at the end of the `shell.nix` package in the `nativeBuildInputs` part.
It is up to you to find a way to install the ones you need on your computer. It is up to you to find a way to install the ones you need on your computer.

View file

@ -3,6 +3,15 @@ title = "Miscellaneous notes"
weight = 20 weight = 20
+++ +++
## Quirks about cargo2nix/rust in Nix
If you use submodules in your crate (like `crdt` and `replication` in `garage_table`), you must list them in `default.nix`
The Windows target does not work. it might be solvable through [overrides](https://github.com/cargo2nix/cargo2nix/blob/master/overlay/overrides.nix). Indeed, we pass `x86_64-pc-windows-gnu` but mingw need `x86_64-w64-mingw32`
We have a simple [PR on cargo2nix](https://github.com/cargo2nix/cargo2nix/pull/201) that fixes critical bugs but the project does not seem very active currently. We must use [my patched version of cargo2nix](https://github.com/superboum/cargo2nix) to enable i686 and armv6l compilation. We might need to contribute to cargo2nix in the future.
## Nix ## Nix
Nix has no armv7 + musl toolchains but armv7l is backward compatible with armv6l. Nix has no armv7 + musl toolchains but armv7l is backward compatible with armv6l.

View file

@ -23,7 +23,7 @@ This logic is defined in `nix/build_index.nix`.
For each commit, we first pass the code to a formatter (rustfmt) and a linter (clippy). For each commit, we first pass the code to a formatter (rustfmt) and a linter (clippy).
Then we try to build it in debug mode and run both unit tests and our integration tests. Then we try to build it in debug mode and run both unit tests and our integration tests.
Additionally, when releasing, our integration tests are run on the release build for amd64 and i686. Additionnaly, when releasing, our integration tests are run on the release build for amd64 and i686.
## Generated Artifacts ## Generated Artifacts
@ -32,7 +32,7 @@ We generate the following binary artifacts for now:
- **os**: linux - **os**: linux
- **format**: static binary, docker container - **format**: static binary, docker container
Additionally we also build two web pages and one JSON document: Additionnaly we also build two web pages and one JSON document:
- the documentation (this website) - the documentation (this website)
- [the release page](https://garagehq.deuxfleurs.fr/_releases.html) - [the release page](https://garagehq.deuxfleurs.fr/_releases.html)
- [the release list in JSON format](https://garagehq.deuxfleurs.fr/_releases.json) - [the release list in JSON format](https://garagehq.deuxfleurs.fr/_releases.json)
@ -67,7 +67,7 @@ nix copy --to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/
The previous command will only send the built package and not its dependencies. The previous command will only send the built package and not its dependencies.
In the case of our CI pipeline, we want to cache all intermediate build steps In the case of our CI pipeline, we want to cache all intermediate build steps
as well. This can be done using this quite involved command (here as an example as well. This can be done using this quite involved command (here as an example
for the `pkgs.amd64.release` package): for the `pkgs.amd64.relase` package):
```bash ```bash
nix copy -j8 \ nix copy -j8 \
@ -174,3 +174,5 @@ drone sign --save Deuxfleurs/garage
``` ```
Looking at the file, you will see that most of the commands are `nix-shell` and `nix-build` commands with various parameters. Looking at the file, you will see that most of the commands are `nix-shell` and `nix-build` commands with various parameters.

View file

@ -42,7 +42,7 @@ You may pause an ongoing scrub using `garage repair scrub pause`, but note that
the scrub will resume automatically 24 hours later as Garage will not let your the scrub will resume automatically 24 hours later as Garage will not let your
cluster run without a regular scrub. If the scrub procedure is too intensive cluster run without a regular scrub. If the scrub procedure is too intensive
for your servers and is slowing down your workload, the recommended solution for your servers and is slowing down your workload, the recommended solution
is to increase the "scrub tranquility" using `garage worker set scrub-tranquility`. is to increase the "scrub tranquility" using `garage repair scrub set-tranquility`.
A higher tranquility value will make Garage take longer pauses between two block A higher tranquility value will make Garage take longer pauses between two block
verifications. Of course, scrubbing the entire data store will also take longer. verifications. Of course, scrubbing the entire data store will also take longer.

View file

@ -242,7 +242,7 @@ dc3 Tags Partitions Capacity Usable capacity
TOTAL 256 (256 unique) 2.0 GB 1000.0 MB (50.0%) TOTAL 256 (256 unique) 2.0 GB 1000.0 MB (50.0%)
``` ```
As we can see, the node that was moved to `dc3` (node4) is only used at 25% (approximately), As we can see, the node that was moved to `dc3` (node4) is only used at 25% (approximatively),
whereas the node that was already in `dc3` (node3) is used at 75%. whereas the node that was already in `dc3` (node3) is used at 75%.
This can be explained by the following: This can be explained by the following:
@ -260,7 +260,7 @@ This can be explained by the following:
data can be removed to be moved to node1. data can be removed to be moved to node1.
- Garage will move data in equal proportions from all possible sources, in this - Garage will move data in equal proportions from all possible sources, in this
case it means that it will transfer 25% of the entire data set from node3 to case it means that it will tranfer 25% of the entire data set from node3 to
node1 and another 25% from node4 to node1. node1 and another 25% from node4 to node1.
This explains why node3 ends with 75% utilization (100% from before minus 25% This explains why node3 ends with 75% utilization (100% from before minus 25%

View file

@ -40,7 +40,7 @@ First of all, Garage divides the set of all possible block hashes
in a fixed number of slices (currently 1024), and assigns in a fixed number of slices (currently 1024), and assigns
to each slice a primary storage location among the specified data directories. to each slice a primary storage location among the specified data directories.
The number of slices having their primary location in each data directory The number of slices having their primary location in each data directory
is proportional to the capacity specified in the config file. is proportionnal to the capacity specified in the config file.
When Garage receives a block to write, it will always write it in the primary When Garage receives a block to write, it will always write it in the primary
directory of the slice that contains its hash. directory of the slice that contains its hash.

View file

@ -161,7 +161,4 @@ your recovery options are as follows:
- **Option 3: restoring a filesystem-level snapshot.** If you are using ZFS or - **Option 3: restoring a filesystem-level snapshot.** If you are using ZFS or
BTRFS to snapshot your metadata partition, refer to their specific BTRFS to snapshot your metadata partition, refer to their specific
documentation on rolling back or copying files from an old snapshot. documentation on rolling back or copying files from an old snapshot.
Note that, depending on the properties of the filesystem and of the DB engine,
if these snapshots were taken during a write operation to the database, they may
also be corrupted and thus unfit for recovery.

View file

@ -56,7 +56,7 @@ From a high level perspective, a major upgrade looks like this:
10. Enable API access (reverse step 1) 10. Enable API access (reverse step 1)
11. Monitor your cluster while load comes back, check that all your applications are happy with this new version 11. Monitor your cluster while load comes back, check that all your applications are happy with this new version
### Major upgrades with minimal downtime ### Major upgarades with minimal downtime
There is only one operation that has to be coordinated cluster-wide: the switch of one version of the internal RPC protocol to the next. There is only one operation that has to be coordinated cluster-wide: the switch of one version of the internal RPC protocol to the next.
This means that an upgrade with very limited downtime can simply be performed from one major version to the next by restarting all nodes This means that an upgrade with very limited downtime can simply be performed from one major version to the next by restarting all nodes

View file

@ -43,10 +43,12 @@ or if you want a build customized for your system,
you can [build Garage from source](@/documentation/cookbook/from-source.md). you can [build Garage from source](@/documentation/cookbook/from-source.md).
If none of these option work for you, you can also run Garage in a Docker If none of these option work for you, you can also run Garage in a Docker
container. For simplicity, a minimal command to launch Garage using Docker is container. When using Docker, the commands used in this guide will not work
provided in this quick start guide. We recommend reading the tutorial on anymore. We recommend reading the tutorial on [configuring a
[configuring a multi-node cluster](@/documentation/cookbook/real-world.md) to multi-node cluster](@/documentation/cookbook/real-world.md) to learn about
learn about the full Docker workflow for Garage. using Garage as a Docker container. For simplicity, a minimal command to launch
Garage using Docker is provided in this quick start guide as well.
## Configuring and starting Garage ## Configuring and starting Garage
@ -80,6 +82,9 @@ bind_addr = "[::]:3902"
root_domain = ".web.garage.localhost" root_domain = ".web.garage.localhost"
index = "index.html" index = "index.html"
[k2v_api]
api_bind_addr = "[::]:3904"
[admin] [admin]
api_bind_addr = "[::]:3903" api_bind_addr = "[::]:3903"
admin_token = "$(openssl rand -base64 32)" admin_token = "$(openssl rand -base64 32)"
@ -90,13 +95,10 @@ EOF
See the [Configuration file format](https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/) See the [Configuration file format](https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/)
for complete options and values. for complete options and values.
By default, Garage looks for its configuration file in **`/etc/garage.toml`.** Now that your configuration file has been created, you may save it to the directory of your choice.
Since we have written our configuration file in the working directory, we will have to set By default, Garage looks for **`/etc/garage.toml`.**
the following environment variable: You can also store it somewhere else, but you will have to specify `-c path/to/garage.toml`
at each invocation of the `garage` binary (for example: `garage -c ./garage.toml server`, `garage -c ./garage.toml status`).
```bash
export GARAGE_CONFIG_FILE=$(pwd)/garage.toml
```
As you can see, the `rpc_secret` is a 32 bytes hexadecimal string. As you can see, the `rpc_secret` is a 32 bytes hexadecimal string.
You can regenerate it with `openssl rand -hex 32`. You can regenerate it with `openssl rand -hex 32`.
@ -109,36 +111,15 @@ Garage server will not be persistent. Change these to locations on your local di
your data to be persisted properly. your data to be persisted properly.
### Configuring initial access credentials
Since `v2.3.0`, Garage can automatically create a default access key and a default storage bucket,
based on values provided in environment variables.
To use this feature, export the following environment variables:
```bash
export GARAGE_DEFAULT_ACCESS_KEY="GK$(openssl rand -hex 16)"
export GARAGE_DEFAULT_SECRET_KEY="$(openssl rand -hex 32)"
export GARAGE_DEFAULT_BUCKET="default-bucket"
```
The example above creates a random access key ID and associated secret key.
You can also provide an access key ID and secret key of your own.
### Launching the Garage server ### Launching the Garage server
Use the following command to launch the Garage server: Use the following command to launch the Garage server:
```bash ```
garage server --single-node --default-bucket garage -c path/to/garage.toml server
``` ```
The `--single-node` flag instructs Garage to automatically configure a single-node cluster without data replication. If you have placed the `garage.toml` file in `/etc` (its default location), you can simply run `garage server`.
The `--default-bucket` flag instructs Garage to create a default access key and a default bucket using the environment variables we defined above.
Both flags are optional and can be omitted, in which case you will have to follow manual configuration steps described below.
**For older versions of Garage (before v2.3.0):** automatic configuration using `--single-node` and `--default-bucket` is not available,
you must follow the manual configuration steps.
Alternatively, if you cannot or do not wish to run the Garage binary directly, Alternatively, if you cannot or do not wish to run the Garage binary directly,
you may use Docker to run Garage in a container using the following command: you may use Docker to run Garage in a container using the following command:
@ -146,58 +127,21 @@ you may use Docker to run Garage in a container using the following command:
```bash ```bash
docker run \ docker run \
-d \ -d \
--name garage-container \ --name garaged \
-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903 \ -p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903 \
-v $(pwd)/garage.toml:/etc/garage.toml \ -v /path/to/garage.toml:/etc/garage.toml \
-e GARAGE_DEFAULT_ACCESS_KEY \ -v /path/to/garage/meta:/var/lib/garage/meta \
-e GARAGE_DEFAULT_SECRET_KEY \ -v /path/to/garage/data:/var/lib/garage/data \
-e GARAGE_DEFAULT_BUCKET \ dxflrs/garage:v1.3.0
dxflrs/garage:v2.3.0
/garage server --single-node --default-bucket
``` ```
Note that this command will NOT create persistent volumes for Garage's data, so Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903`
your cluster will be wiped if the container terminates. To persist Garage's
data, you must manually add volumes for the `data` and `metadata` directories
and configure their correct paths in your `garage.toml` files (see [configuring
a multi-node cluster](@/documentation/cookbook/real-world.md)).
Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903`. #### Troubleshooting
### Checking that Garage runs correctly
The `garage` utility is also used as a CLI tool to administrate your Garage
deployment. It needs read access to your configuration file and to the metadata directory
to obtain connection parameters to contact the local Garage node.
Use the following command to show the status of your cluster:
```
garage status
```
If you are running Garage in a Docker container, you can use the following command instead:
```bash
docker exec garage-container /garage status
```
This should show something like this:
```
==== HEALTHY NODES ====
ID Hostname Address Tags Zone Capacity DataAvail Version
563e1ac825ee3323 linuxbox 127.0.0.1:3901 [default] dc1 19.9 GiB 19.5 GiB (97.6%) v2.3.0
```
### Troubleshooting
Ensure your configuration file, `metadata_dir` and `data_dir` are readable by the user running the `garage` server or Docker. Ensure your configuration file, `metadata_dir` and `data_dir` are readable by the user running the `garage` server or Docker.
When running the `garage` CLI, ensure that the path to your configuration file is correctly specified (see below), You can tune Garage's verbosity by setting the `RUST_LOG=` environment variable. \
and that it can read it and read from your metadata directory.
You can tune Garage's verbosity by setting the `RUST_LOG=` environment variable.
Available log levels are (from less verbose to more verbose): `error`, `warn`, `info` *(default)*, `debug` and `trace`. Available log levels are (from less verbose to more verbose): `error`, `warn`, `info` *(default)*, `debug` and `trace`.
```bash ```bash
@ -210,135 +154,36 @@ Log level `info` is the default value and is recommended for most use cases.
Log level `debug` can help you check why your S3 API calls are not working. Log level `debug` can help you check why your S3 API calls are not working.
### Checking that Garage runs correctly
## Uploading and downloading from Garage The `garage` utility is also used as a CLI tool to configure your Garage deployment.
It uses values from the TOML configuration file to find the Garage daemon running on the
local node, therefore if your configuration file is not at `/etc/garage.toml` you will
again have to specify `-c path/to/garage.toml` at each invocation.
This section will show how to download and upload files on Garage using a third-party tool named `awscli`. If you are running Garage in a Docker container, you can set `alias garage="docker exec -ti <container name> /garage"`
to use the Garage binary inside your container.
If the `garage` CLI is able to correctly detect the parameters of your local Garage node,
### Install and configure `awscli` the following command should be enough to show the status of your cluster:
If you have python on your system, you can install it with:
```bash
python -m pip install --user awscli
```
Now that `awscli` is installed, you must configure it to talk to your Garage
instance using the credentials defined above. Here is a simple way to create
a configuration file in `~/.awsrc` using a single command that will save the
secrets from your environment:
```bash
cat > ~/.awsrc <<EOF
export AWS_ENDPOINT_URL='http://localhost:3900'
export AWS_DEFAULT_REGION='garage'
export AWS_ACCESS_KEY_ID='$GARAGE_DEFAULT_ACCESS_KEY'
export AWS_SECRET_ACCESS_KEY='$GARAGE_DEFAULT_SECRET_KEY'
aws --version
EOF
``` ```
garage status
Note that you need to have at least `awscli` `>=1.29.0` or `>=2.13.0`, otherwise you
need to specify `--endpoint-url` explicitly on each `awscli` invocation.
Now, each time you want to use `awscli` on this target, run:
```bash
source ~/.awsrc
``` ```
*You can create multiple files with different names if you This should show something like this:
have multiple Garage clusters or different keys.
Switching from one cluster to another is as simple as
sourcing the right file.*
### Example usage of `awscli`
```bash
# list buckets
aws s3 ls
# list objects of a bucket
aws s3 ls s3://default-bucket
# copy from your filesystem to garage
aws s3 cp /proc/cpuinfo s3://default-bucket/cpuinfo.txt
# copy from garage to your filesystem
aws s3 cp s3://default-bucket/cpuinfo.txt /tmp/cpuinfo.txt
```
Note that you can use `awscli` for more advanced operations like
creating a bucket, pre-signing a request or managing your website.
[Read the full documentation to know more](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/index.html).
Some features are however not implemented like ACL or policy.
Check [our S3 compatibility list](@/documentation/reference-manual/s3-compatibility.md).
### Other tools for interacting with Garage
The following tools can also be used to send and receive files from/to Garage:
- [minio-client](@/documentation/connect/cli.md#minio-client)
- [s3cmd](@/documentation/connect/cli.md#s3cmd)
- [rclone](@/documentation/connect/cli.md#rclone)
- [Cyberduck](@/documentation/connect/cli.md#cyberduck)
- [WinSCP](@/documentation/connect/cli.md#winscp)
An exhaustive list is maintained in the ["Integrations" > "Browsing tools" section](@/documentation/connect/_index.md).
## Manual configuration
This section provides instructions that are equivalent to using the
`--single-node` and `--default-bucket` flags for automatic configuration. If
you are using an older version of Garage (before v2.3.0), you must follow
these instructions as automatic configuration is not available.
We will have to run quite a few `garage` administration commands to get started.
If you ever get lost, don't forget that the `help` command and the `--help` flags can help you anywhere,
the CLI tool is self-documented! Two examples:
```
garage help
garage bucket allow --help
```
### Configuring the `garage` CLI
Remember that the `garage` CLI needs to know the path of your `garage.toml` configuration file.
If it is not in the default location of `/etc/garage.toml`, you can specify it either:
- by setting the `GARAGE_CONFIG_FILE` environment variable;
- by adding the `-c` flag to each `garage` command, for example: `garage -c ./garage.toml status`.
If you are running Garage in a Docker container, you can set the following alias
to provide a fake `garage`command that uses the Garage binary inside your container:
```bash
alias garage="docker exec -ti <container name> /garage"
```
You can test that your `garage` CLI is configured correctly by running a basic command such as `garage status`.
### Creating a cluster layout
When you first start a cluster without automatic configuration, the output of `garage status` will look as follows:
``` ```
==== HEALTHY NODES ==== ==== HEALTHY NODES ====
ID Hostname Address Tags Zone Capacity DataAvail Version ID Hostname Address Tag Zone Capacity
563e1ac825ee3323 linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED v2.3.0 563e1ac825ee3323 linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED
``` ```
Creating a cluster layout for a Garage deployment means informing Garage of the ## Creating a cluster layout
disk space available on each node of the cluster using the `-c` flag, as well
as the name of the zone (e.g. datacenter) each machine is located in using the Creating a cluster layout for a Garage deployment means informing Garage
`-z` flag. of the disk space available on each node of the cluster, `-c`,
as well as the name of the zone (e.g. datacenter), `-z`, each machine is located in.
For our test deployment, we are have only one node with zone named `dc1` and a For our test deployment, we are have only one node with zone named `dc1` and a
capacity of `1G`, though the capacity is ignored for a single node deployment capacity of `1G`, though the capacity is ignored for a single node deployment
@ -359,29 +204,38 @@ garage layout apply --version 1
``` ```
### Creating buckets and keys ## Creating buckets and keys
In this section, we will suppose that we want to create a bucket named `nextcloud-bucket`
that will be accessed through a key named `nextcloud-app-key`.
Don't forget that `help` command and `--help` subcommands can help you anywhere,
the CLI tool is self-documented! Two examples:
```
garage help
garage bucket allow --help
```
### Create a bucket
Let's take an example where we want to deploy NextCloud using Garage as the Let's take an example where we want to deploy NextCloud using Garage as the
main data storage. We will suppose that we want to create a bucket named main data storage.
`nextcloud-bucket` that will be accessed through a key named
`nextcloud-app-key`.
#### Create a bucket First, create a bucket with the following command:
First, create the bucket with the following command:
``` ```
garage bucket create nextcloud-bucket garage bucket create nextcloud-bucket
``` ```
Check that the bucket was created properly: Check that everything went well:
``` ```
garage bucket list garage bucket list
garage bucket info nextcloud-bucket garage bucket info nextcloud-bucket
``` ```
#### Create an API key ### Create an API key
The `nextcloud-bucket` bucket now exists on the Garage server, The `nextcloud-bucket` bucket now exists on the Garage server,
however it cannot be accessed until we add an API key with the proper access rights. however it cannot be accessed until we add an API key with the proper access rights.
@ -404,14 +258,14 @@ Secret key: 7d37d093435a41f2aab8f13c19ba067d9776c90215f56614adad6ece597dbb34
Authorized buckets: Authorized buckets:
``` ```
Check that the key was created properly: Check that everything works as intended:
``` ```
garage key list garage key list
garage key info nextcloud-app-key garage key info nextcloud-app-key
``` ```
#### Allow a key to access a bucket ### Allow a key to access a bucket
Now that we have a bucket and a key, we need to give permissions to the key on the bucket: Now that we have a bucket and a key, we need to give permissions to the key on the bucket:
@ -430,5 +284,78 @@ You can check at any time the allowed keys on your bucket with:
garage bucket info nextcloud-bucket garage bucket info nextcloud-bucket
``` ```
You should now be able to read and write objects to the bucket using the
credentials created above. ## Uploading and downloading from Garage
To download and upload files on garage, we can use a third-party tool named `awscli`.
### Install and configure `awscli`
If you have python on your system, you can install it with:
```bash
python -m pip install --user awscli
```
Now that `awscli` is installed, you must configure it to talk to your Garage instance,
with your key. There are multiple ways to do that, the simplest one is to create a file
named `~/.awsrc` with this content:
```bash
export AWS_ACCESS_KEY_ID=xxxx # put your Key ID here
export AWS_SECRET_ACCESS_KEY=xxxx # put your Secret key here
export AWS_DEFAULT_REGION='garage'
export AWS_ENDPOINT_URL='http://localhost:3900'
aws --version
```
Note you need to have at least `awscli` `>=1.29.0` or `>=2.13.0`, otherwise you
need to specify `--endpoint-url` explicitly on each `awscli` invocation.
Now, each time you want to use `awscli` on this target, run:
```bash
source ~/.awsrc
```
*You can create multiple files with different names if you
have multiple Garage clusters or different keys.
Switching from one cluster to another is as simple as
sourcing the right file.*
### Example usage of `awscli`
```bash
# list buckets
aws s3 ls
# list objects of a bucket
aws s3 ls s3://nextcloud-bucket
# copy from your filesystem to garage
aws s3 cp /proc/cpuinfo s3://nextcloud-bucket/cpuinfo.txt
# copy from garage to your filesystem
aws s3 cp s3://nextcloud-bucket/cpuinfo.txt /tmp/cpuinfo.txt
```
Note that you can use `awscli` for more advanced operations like
creating a bucket, pre-signing a request or managing your website.
[Read the full documentation to know more](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/index.html).
Some features are however not implemented like ACL or policy.
Check [our s3 compatibility list](@/documentation/reference-manual/s3-compatibility.md).
### Other tools for interacting with Garage
The following tools can also be used to send and receive files from/to Garage:
- [minio-client](@/documentation/connect/cli.md#minio-client)
- [s3cmd](@/documentation/connect/cli.md#s3cmd)
- [rclone](@/documentation/connect/cli.md#rclone)
- [Cyberduck](@/documentation/connect/cli.md#cyberduck)
- [WinSCP](@/documentation/connect/cli.md#winscp)
An exhaustive list is maintained in the ["Integrations" > "Browsing tools" section](@/documentation/connect/_index.md).

View file

@ -6,167 +6,41 @@ weight = 40
The Garage administration API is accessible through a dedicated server whose The Garage administration API is accessible through a dedicated server whose
listen address is specified in the `[admin]` section of the configuration listen address is specified in the `[admin]` section of the configuration
file (see [configuration file file (see [configuration file
reference](@/documentation/reference-manual/configuration.md)). reference](@/documentation/reference-manual/configuration.md))
The current version of the admin API is v2. No breaking changes to the Garage **WARNING.** At this point, there is no commitment to the stability of the APIs described in this document.
administration API will be published outside of a major release. We will bump the version numbers prefixed to each API endpoint each time the syntax
or semantics change, meaning that code that relies on these endpoint will break
when changes are introduced.
Versions:
- Before Garage 0.7.2 - no admin API
- Garage 0.7.2 - admin APIv0
- Garage 0.9.0 - admin APIv1, deprecate admin APIv0
History of previous versions:
- Before Garage v0.7.2 - no admin API
- Garage v0.7.2 - admin API v0
- Garage v0.9.0 - admin API v1, deprecate admin API v0
- Garage v2.0.0 - admin API v2, deprecate admin API v1
## Access control ## Access control
### Using an API token The admin API uses two different tokens for access control, that are specified in the config file's `[admin]` section:
Administration API tokens tokens are used as simple HTTP bearer tokens. In - `metrics_token`: the token for accessing the Metrics endpoint (if this token
other words, to authenticate access to an admin API endpoint, add the following is not set in the config file, the Metrics endpoint can be accessed without
HTTP header to your request: access control);
- `admin_token`: the token for accessing all of the other administration
endpoints (if this token is not set in the config file, access to these
endpoints is disabled entirely).
These tokens are used as simple HTTP bearer tokens. In other words, to
authenticate access to an admin API endpoint, add the following HTTP header
to your request:
``` ```
Authorization: Bearer <token> Authorization: Bearer <token>
``` ```
### User-defined API tokens ## Administration API endpoints
Cluster administrators may dynamically define administration tokens using the CLI commands under `garage admin-token`.
Such tokens may be limited in scope, meaning that they may enable access to only a subset of API calls.
They may also have an expiration date to limit their use in time.
Here is an example to create an administration token that is valid for 30 days
and gives access to only a subset of API calls, allowing it to create buckets
and access keys and give keys permissions on buckets:
```bash
$ garage admin-token create --expires-in 30d \
--scope ListBuckets,GetBucketInfo,ListKeys,GetKeyInfo,CreateBucket,CreateKey,AllowBucketKey,DenyBucketKey \
my-token
This is your secret bearer token, it will not be shown again by Garage:
8ed1830b10a276ff57061950.kOSIpxWK9zSGbTO9Xadpv3YndSFWma0_snXcYHaORXk
==== ADMINISTRATION TOKEN INFORMATION ====
Token ID: 8ed1830b10a276ff57061950
Token name: my-token
Created: 2025-06-15 15:12:44.160 +02:00
Validity: valid
Expiration: 2025-07-15 15:12:44.117 +02:00
Scope: ListBuckets
GetBucketInfo
ListKeys
GetKeyInfo
CreateBucket
CreateKey
AllowBucketKey
DenyBucketKey
```
When running this command, your token will be shown only once and **will never
be shown again by Garage**, so make sure to save it directly. The token is
hashed internally, and is identified by its prefix (32 hex digits followed by a
dot) which is saved in clear.
When running `garage admin-token list`, you might see something like this:
```
ID Created Name Expiration Scope
- - metrics_token (from daemon configuration) never Metrics
8ed1830b10a276ff57061950 2025-06-15 my-token 2025-07-15 15:12:44.117 +02:00 ListBuckets, ... (8)
```
### Master API tokens
The admin API can also use two different master tokens for access control,
specified in the config file's `[admin]` section:
- `metrics_token`: the token for accessing the Metrics endpoint. If this token
is not set in the config file, the Metrics endpoint can be accessed without
access control.
- `admin_token`: the token for accessing all of the other administration
endpoints. If this token is not set in the config file, access to these
endpoints is only possible with a user-defined admin token.
With the introduction of multiple user-defined admin tokens, the use of master
API tokens is now discouraged.
## Using the admin API
All of the admin API endpoints are described in the OpenAPI specification:
- APIv2 - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v2.html) - [OpenAPI JSON](https://garagehq.deuxfleurs.fr/api/garage-admin-v2.json)
- APIv1 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.yml)
- APIv0 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml)
Making a request to the API from the command line can be as simple as running:
```bash
curl -H 'Authorization: Bearer s3cr3t' http://localhost:3903/v2/GetClusterStatus | jq
```
For more advanced use cases, we recommend using an SDK.
[Go to the "Build your own app" section to know how to use our SDKs](@/documentation/build/_index.md)
### Making API calls from the `garage` CLI
Since v2.0.0, the `garage` binary provides a subcommand `garage json-api` that
allows you to invoke the API without making an HTTP request. This can be
useful for scripting Garage deployments.
`garage json-api` proxies API calls through Garage's internal RPC protocol,
therefore it does not require any form of authentication: RPC connection
parameters are discovered automatically to contact the locally-running Garage
instance (as when running any other `garage` CLI command).
For simple calls that take no parameters, usage is as follows:
```
$ garage json-api GetClusterHealth
{
"connectedNodes": 3,
"knownNodes": 3,
"partitions": 256,
"partitionsAllOk": 256,
"partitionsQuorum": 256,
"status": "healthy",
"storageNodes": 3,
"storageNodesOk": 3
}
```
If you need to specify a JSON body for your call, you can add it directly after
the name of the function you are calling:
```
$ garage json-api CreateAdminToken '{"name": "test"}'
```
Or you can feed it through stdin by adding a `-` as the last command parameter:
```
$ garage json-api CreateAdminToken -
{"name": "test"}
<EOF>
```
For admin API calls that would have taken query parameters in their HTTP version, these parameters can be passed in the JSON body object:
```
$ garage json-api GetAdminTokenInfo '{"id":"b0e6e0ace2c0b2aca4cdb2de"}'
```
For admin API calls that take both query parameters and a JSON body, combine them in the following fashion:
```
$ garage json-api UpdateAdminToken '{"id":"b0e6e0ace2c0b2aca4cdb2de", "body":{"name":"not a test"}}'
```
## Special administration API endpoints
### Metrics `GET /metrics` ### Metrics `GET /metrics`
@ -209,7 +83,7 @@ content-length: 102
date: Tue, 08 Aug 2023 07:22:38 GMT date: Tue, 08 Aug 2023 07:22:38 GMT
Garage is fully operational Garage is fully operational
Consult the full health check API endpoint at /v2/GetClusterHealth for more details Consult the full health check API endpoint at /v0/health for more details
``` ```
### On-demand TLS `GET /check` ### On-demand TLS `GET /check`
@ -252,7 +126,23 @@ $ curl -so /dev/null -w "%{http_code}" http://localhost:3903/check?domain=exampl
200 200
``` ```
**References:** **References:**
- [Using On-Demand TLS](https://caddyserver.com/docs/automatic-https#using-on-demand-tls) - [Using On-Demand TLS](https://caddyserver.com/docs/automatic-https#using-on-demand-tls)
- [Add option for a backend check to approve use of on-demand TLS](https://github.com/caddyserver/caddy/pull/1939) - [Add option for a backend check to approve use of on-demand TLS](https://github.com/caddyserver/caddy/pull/1939)
- [Serving tens of thousands of domains over HTTPS with Caddy](https://caddy.community/t/serving-tens-of-thousands-of-domains-over-https-with-caddy/11179) - [Serving tens of thousands of domains over HTTPS with Caddy](https://caddy.community/t/serving-tens-of-thousands-of-domains-over-https-with-caddy/11179)
### Cluster operations
These endpoints have a dedicated OpenAPI spec.
- APIv1 - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.yml)
- APIv0 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml)
Requesting the API from the command line can be as simple as running:
```bash
curl -H 'Authorization: Bearer s3cr3t' http://localhost:3903/v0/status | jq
```
For more advanced use cases, we recommend using a SDK.
[Go to the "Build your own app" section to know how to use our SDKs](@/documentation/build/_index.md)

View file

@ -51,21 +51,17 @@ allow_punycode = false
[consul_discovery] [consul_discovery]
api = "catalog" api = "catalog"
consul_http_addr = "https://127.0.0.1:8500" consul_http_addr = "http://127.0.0.1:8500"
tls_skip_verify = false
service_name = "garage-daemon" service_name = "garage-daemon"
ca_cert = "/etc/consul/consul-ca.crt" ca_cert = "/etc/consul/consul-ca.crt"
# for `agent` API mode, unset client_cert and client_key:
client_cert = "/etc/consul/consul-client.crt" client_cert = "/etc/consul/consul-client.crt"
client_key = "/etc/consul/consul-key.crt" client_key = "/etc/consul/consul-key.crt"
# for `agent` API mode, unset client_cert and client_key, and optionally enable `token`
# optionally enable `token` for authentication:
# token = "abcdef-01234-56789" # token = "abcdef-01234-56789"
tls_skip_verify = false
tags = [ "dns-enabled" ] tags = [ "dns-enabled" ]
meta = { dns-acl = "allow trusted" } meta = { dns-acl = "allow trusted" }
datacenters = ["dc1", "dc2", "dc3"]
[kubernetes_discovery] [kubernetes_discovery]
namespace = "garage" namespace = "garage"
@ -86,7 +82,6 @@ add_host_to_metrics = true
[admin] [admin]
api_bind_addr = "0.0.0.0:3903" api_bind_addr = "0.0.0.0:3903"
metrics_token = "BCAdFjoa9G0KJR0WXnHHm7fs1ZAbfpI8iIZ+Z/a2NgI=" metrics_token = "BCAdFjoa9G0KJR0WXnHHm7fs1ZAbfpI8iIZ+Z/a2NgI="
metrics_require_token = true
admin_token = "UkLeGWEvHnXBqnueR3ISEMWpOnm40jH2tM2HnnL/0F4=" admin_token = "UkLeGWEvHnXBqnueR3ISEMWpOnm40jH2tM2HnnL/0F4="
trace_sink = "http://localhost:4317" trace_sink = "http://localhost:4317"
``` ```
@ -102,9 +97,9 @@ The following gives details about each available configuration option.
Top-level configuration options, in alphabetical order: Top-level configuration options, in alphabetical order:
[`allow_punycode`](#allow_punycode), [`allow_punycode`](#allow_punycode),
[`allow_world_readable_secrets`](#allow_world_readable_secrets), [`allow_world_readable_secrets`](#allow_world_readable_secrets),
[`block_max_concurrent_reads`](#block_max_concurrent_reads), [`block_max_concurrent_reads`](`block_max_concurrent_reads),
[`block_max_concurrent_writes_per_request`](#block_max_concurrent_writes_per_request),
[`block_ram_buffer_max`](#block_ram_buffer_max), [`block_ram_buffer_max`](#block_ram_buffer_max),
[`block_max_concurrent_writes_per_request`](#block_max_concurrent_writes_per_request),
[`block_size`](#block_size), [`block_size`](#block_size),
[`bootstrap_peers`](#bootstrap_peers), [`bootstrap_peers`](#bootstrap_peers),
[`compression_level`](#compression_level), [`compression_level`](#compression_level),
@ -132,14 +127,12 @@ The `[consul_discovery]` section:
[`client_cert`](#consul_client_cert_and_key), [`client_cert`](#consul_client_cert_and_key),
[`client_key`](#consul_client_cert_and_key), [`client_key`](#consul_client_cert_and_key),
[`consul_http_addr`](#consul_http_addr), [`consul_http_addr`](#consul_http_addr),
[`datacenters`](#consul_datacenters)
[`meta`](#consul_tags_and_meta), [`meta`](#consul_tags_and_meta),
[`service_name`](#consul_service_name), [`service_name`](#consul_service_name),
[`tags`](#consul_tags_and_meta), [`tags`](#consul_tags_and_meta),
[`tls_skip_verify`](#consul_tls_skip_verify), [`tls_skip_verify`](#consul_tls_skip_verify),
[`token`](#consul_token). [`token`](#consul_token).
The `[kubernetes_discovery]` section: The `[kubernetes_discovery]` section:
[`namespace`](#kube_namespace), [`namespace`](#kube_namespace),
[`service_name`](#kube_service_name), [`service_name`](#kube_service_name),
@ -157,7 +150,6 @@ The `[s3_web]` section:
The `[admin]` section: The `[admin]` section:
[`api_bind_addr`](#admin_api_bind_addr), [`api_bind_addr`](#admin_api_bind_addr),
[`metrics_require_token`](#admin_metrics_require_token),
[`metrics_token`/`metrics_token_file`](#admin_metrics_token), [`metrics_token`/`metrics_token_file`](#admin_metrics_token),
[`admin_token`/`admin_token_file`](#admin_token), [`admin_token`/`admin_token_file`](#admin_token),
[`trace_sink`](#admin_trace_sink), [`trace_sink`](#admin_trace_sink),
@ -344,7 +336,7 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows:
| --------- | ----------------- | ------------- | | --------- | ----------------- | ------------- |
| [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` | | [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` | | [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
| [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`/`v2.1.0`) | `"fjall"` | `<metadata_dir>/db.fjall/` | | [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`) | `"fjall"` | `<metadata_dir>/db.fjall/` |
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` | | [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0. Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
@ -353,16 +345,8 @@ old Sled metadata databases to another engine.
Performance characteristics of the different DB engines are as follows: Performance characteristics of the different DB engines are as follows:
- **LMDB:** the recommended database engine for high-performance distributed clusters - LMDB: the recommended database engine for high-performance distributed clusters.
with `replication_factor` ≥ 2. LMDB works very well, but is known to have the following limitations:
LMDB works well, but is known to have the following limitations:
- LMDB is prone to database corruption after an unclean shutdown (e.g. a process kill
or a power outage). It is recommended to configure
[`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval) to be
able to easily recover from this situation. With `replication_factor` ≥ 2,
metadata can also be reconstructed from remote nodes upon corruption
(see [Recovering from failures](@/documentation/operations/recovering.md#corrupted_meta)).
- The data format of LMDB is not portable between architectures, so for - The data format of LMDB is not portable between architectures, so for
instance the Garage database of an x86-64 node cannot be moved to an ARM64 instance the Garage database of an x86-64 node cannot be moved to an ARM64
@ -372,21 +356,30 @@ Performance characteristics of the different DB engines are as follows:
node to very small database sizes due to how LMDB works; it is therefore node to very small database sizes due to how LMDB works; it is therefore
not recommended. not recommended.
- Several users have reported corrupted LMDB database files after an unclean
shutdown (e.g. a power outage). This situation can generally be recovered
from if your cluster is geo-replicated (by rebuilding your metadata db from
other nodes), or if you have saved regular snapshots at the filesystem
level.
- Keys in LMDB are limited to 511 bytes. This limit translates to limits on - Keys in LMDB are limited to 511 bytes. This limit translates to limits on
object keys in S3 and sort keys in K2V that are limited to 479 bytes. object keys in S3 and sort keys in K2V that are limted to 479 bytes.
- **Sqlite:** Garage supports Sqlite as an alternative storage backend for - Sqlite: Garage supports Sqlite as an alternative storage backend for
metadata, which does not have the issues listed above for LMDB. Sqlite is metadata, which does not have the issues listed above for LMDB.
slower than LMDB, so it is not the best choice for high-performance storage On versions 0.8.x and earlier, Sqlite should be avoided due to abysmal
clusters. performance, which was fixed with the addition of `metadata_fsync`.
Sqlite is still probably slower than LMDB due to the way we use it,
so it is not the best choice for high-performance storage clusters,
but it should work fine in many cases.
- **Fjall:** a storage engine based on LSM trees, which theoretically allow for - Fjall: a storage engine based on LSM trees, which theoretically allow for
higher write throughput than other storage engines that are based on B-trees. higher write throughput than other storage engines that are based on B-trees.
Using Fjall could potentially improve Garage's performance significantly in Using Fjall could potentially improve Garage's performance significantly in
write-heavy workloads. **Support for Fjall is experimental at this point**, write-heavy workloads. **Support for Fjall is experimental at this point**,
we have added it to Garage for evaluation purposes only. **Use it only with we have added it to Garage for evaluation purposes only. **Do not use it for
test data, and report any issues to our bug tracker. Do not use it for production-critical workloads.**
production workloads.**
It is possible to convert Garage's metadata directory from one format to another It is possible to convert Garage's metadata directory from one format to another
using the `garage convert-db` command, which should be used as follows: using the `garage convert-db` command, which should be used as follows:
@ -397,7 +390,7 @@ garage convert-db -a <input db engine> -i <input db path> \
``` ```
Make sure to specify the full database path as presented in the table above Make sure to specify the full database path as presented in the table above
(third column), and not just the path to the metadata directory. (third colummn), and not just the path to the metadata directory.
#### `metadata_fsync` {#metadata_fsync} #### `metadata_fsync` {#metadata_fsync}
@ -439,14 +432,13 @@ This might reduce the risk that a data block is lost in rare
situations such as simultaneous node losing power, situations such as simultaneous node losing power,
at the cost of a moderate drop in write performance. at the cost of a moderate drop in write performance.
Similarly to `metadata_fsync`, this is likely not necessary Similarly to `metatada_fsync`, this is likely not necessary
if geographical replication is used. if geographical replication is used.
#### `metadata_auto_snapshot_interval` (since `v0.9.4`) {#metadata_auto_snapshot_interval} #### `metadata_auto_snapshot_interval` (since `v0.9.4`) {#metadata_auto_snapshot_interval}
If this value is set, Garage will automatically take a snapshot of the metadata If this value is set, Garage will automatically take a snapshot of the metadata
DB file at a regular interval and save it in the metadata directory, DB file at a regular interval and save it in the metadata directory.
or in [`metadata_snapshots_dir`](#metadata_snapshots_dir) if it is set.
This parameter can take any duration string that can be parsed by This parameter can take any duration string that can be parsed by
the [`parse_duration`](https://docs.rs/parse_duration/latest/parse_duration/#syntax) crate. the [`parse_duration`](https://docs.rs/parse_duration/latest/parse_duration/#syntax) crate.
@ -455,19 +447,14 @@ corrupted, for instance after an unclean shutdown. See [this
page](@/documentation/operations/recovering.md#corrupted_meta) for details. page](@/documentation/operations/recovering.md#corrupted_meta) for details.
Garage keeps only the two most recent snapshots of the metadata DB and deletes Garage keeps only the two most recent snapshots of the metadata DB and deletes
older ones automatically. older ones automatically.
You can also create metadata snapshots manually at any point using the
`garage meta snapshot` command.
Using snapshots created by Garage is the best option to make snapshots of your
node's metadata for potential recovery, as they are guaranteed to be clean and
consistent, contrarily to filesystem-level snapshots that may be taken while
some writes are in-flight and thus might be corrupted.
Note that taking a metadata snapshot is a relatively intensive operation as the Note that taking a metadata snapshot is a relatively intensive operation as the
entire data file is copied. A snapshot being taken might have performance entire data file is copied. A snapshot being taken might have performance
impacts on the Garage node while it is running. If the cluster is under heavy impacts on the Garage node while it is running. If the cluster is under heavy
write load when a snapshot operation is running, this might also cause the write load when a snapshot operation is running, this might also cause the
database file to grow in size significantly as pages cannot be recycled easily. database file to grow in size significantly as pages cannot be recycled easily.
For this reason, it might be better to use filesystem-level snapshots instead
if possible.
#### `disable_scrub` {#disable_scrub} #### `disable_scrub` {#disable_scrub}
@ -555,19 +542,19 @@ awaits for one of the `block_max_concurrent_reads` slots to be available
slot, it reads the entire block file to RAM and frees the slot as soon as the slot, it reads the entire block file to RAM and frees the slot as soon as the
block file is finished reading. Only after the slot is released will the block file is finished reading. Only after the slot is released will the
block's data start being transferred over the network. If the request fails to block's data start being transferred over the network. If the request fails to
acquire a reading slot within 15 seconds, it fails with a timeout error. acquire a reading slot wihtin 15 seconds, it fails with a timeout error.
Timeout events can be monitored through the `block_read_semaphore_timeouts` Timeout events can be monitored through the `block_read_semaphore_timeouts`
metric in Prometheus: a non-zero number of such events indicates an I/O metric in Prometheus: a non-zero number of such events indicates an I/O
bottleneck on HDD read speed. bottleneck on HDD read speed.
#### `block_max_concurrent_writes_per_request` (since `v1.3.1` / `v2.2.0`) {#block_max_concurrent_writes_per_request} #### `block_max_concurrent_writes_per_request` (since `v2.1.0`) {#block_max_concurrent_writes_per_request}
This parameter is designed to adapt to the concurrent write performance of This parameter is designed to adapt to the concurrent write performance of
different storage media. Maximum number of parallel block writes per put request. different storage media.Maximum number of parallel block writes per put request
Higher values may improve throughput but increase memory usage. Higher values improve throughput but increase memory usage.
Default value: 3. Recommended values: 10-30 for NVMe, 3-10 for spinning HDD. Default: 3, Recommended: 10-30 for NVMe, 3-10 for HDD
#### `lmdb_map_size` {#lmdb_map_size} #### `lmdb_map_size` {#lmdb_map_size}
@ -618,11 +605,11 @@ storing the secret as the `GARAGE_RPC_SECRET_FILE` environment variable.
#### `rpc_bind_addr` {#rpc_bind_addr} #### `rpc_bind_addr` {#rpc_bind_addr}
The address and port on which to bind for inter-cluster communications The address and port on which to bind for inter-cluster communcations
(referred to as RPC for remote procedure calls). (reffered to as RPC for remote procedure calls).
The port specified here should be the same one that other nodes will used to contact The port specified here should be the same one that other nodes will used to contact
the node, even in the case of a NAT: the NAT should be configured to forward the external the node, even in the case of a NAT: the NAT should be configured to forward the external
port number to the same internal port number. This means that if you have several nodes running port number to the same internal port nubmer. This means that if you have several nodes running
behind a NAT, they should each use a different RPC port number. behind a NAT, they should each use a different RPC port number.
#### `rpc_bind_outgoing` (since `v0.9.2`) {#rpc_bind_outgoing} #### `rpc_bind_outgoing` (since `v0.9.2`) {#rpc_bind_outgoing}
@ -741,18 +728,6 @@ node_prefix "" {
} }
``` ```
#### `datacenters` {#consul_datacenters}
Optional list of datacenters that allow garage to do service discovery when Consul is configured in WAN federation.
Example: `datacenters = ["dc1", "dc2", "dc3"]`
In a WAN configuration, by default the Consul services API only responds with
local LAN services. When a list of datacenters is specified using this option,
Garage will query the consul server API by datacenter directly, allowing for
Garage to discover nodes across the Consul WAN.
#### `tags` and `meta` {#consul_tags_and_meta} #### `tags` and `meta` {#consul_tags_and_meta}
Additional list of tags and map of service meta to add during service registration. Additional list of tags and map of service meta to add during service registration.
@ -785,14 +760,14 @@ manually.
#### `api_bind_addr` {#s3_api_bind_addr} #### `api_bind_addr` {#s3_api_bind_addr}
The IP and port on which to bind for accepting S3 API calls. The IP and port on which to bind for accepting S3 API calls.
This endpoint does not support TLS: a reverse proxy should be used to provide it. This endpoint does not suport TLS: a reverse proxy should be used to provide it.
Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode. Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode.
#### `s3_region` {#s3_region} #### `s3_region` {#s3_region}
Garage will accept S3 API calls that are targeted to the S3 region defined here. Garage will accept S3 API calls that are targetted to the S3 region defined here.
API calls targeted to other regions will fail with a AuthorizationHeaderMalformed error API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error
message that redirects the client to the correct region. message that redirects the client to the correct region.
#### `root_domain` {#s3_root_domain} #### `root_domain` {#s3_root_domain}
@ -800,7 +775,7 @@ message that redirects the client to the correct region.
The optional suffix to access bucket using vhost-style in addition to path-style request. The optional suffix to access bucket using vhost-style in addition to path-style request.
Note path-style requests are always enabled, whether or not vhost-style is configured. Note path-style requests are always enabled, whether or not vhost-style is configured.
Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate, Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate,
but might be required by software not supporting path-style requests. but might be required by softwares not supporting path-style requests.
If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with
using the hostname `my-bucket.s3.garage.eu`. using the hostname `my-bucket.s3.garage.eu`.
@ -816,7 +791,7 @@ behaviour of this module.
The IP and port on which to bind for accepting HTTP requests to buckets configured The IP and port on which to bind for accepting HTTP requests to buckets configured
for website access. for website access.
This endpoint does not support TLS: a reverse proxy should be used to provide it. This endpoint does not suport TLS: a reverse proxy should be used to provide it.
Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode. Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode.
@ -849,34 +824,10 @@ See [administration API reference](@/documentation/reference-manual/admin-api.md
Alternatively, since `v0.8.5`, a path can be used to create a unix socket. Note that for security reasons, Alternatively, since `v0.8.5`, a path can be used to create a unix socket. Note that for security reasons,
the socket will have 0220 mode. Make sure to set user and group permissions accordingly. the socket will have 0220 mode. Make sure to set user and group permissions accordingly.
#### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN`, `GARAGE_ADMIN_TOKEN_FILE` (env) {#admin_token}
The token for accessing all administration functions on the admin endpoint,
with the exception of the metrics endpoint (see `metrics_token`).
You can use any random string for this value. We recommend generating a random
token with `openssl rand -base64 32`.
For Garage version earlier than `v2.0`, if this token is not set,
access to these endpoints is disabled entirely.
Since Garage `v2.0`, additional admin API tokens can be defined dynamically
in your Garage cluster using administration commands. This new admin token system
is more flexible since it allows admin tokens to have an expiration date,
and to have a scope restricted to certain admin API functions. If `admin_token`
is set, it behaves as an admin token without expiration and with full scope.
Otherwise, only admin API tokens defined dynamically can be used.
`admin_token` was introduced in Garage `v0.7.2`.
`admin_token_file` and the `GARAGE_ADMIN_TOKEN` environment variable are supported since Garage `v0.8.2`.
`GARAGE_ADMIN_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`.
#### `metrics_token`, `metrics_token_file` or `GARAGE_METRICS_TOKEN`, `GARAGE_METRICS_TOKEN_FILE` (env) {#admin_metrics_token} #### `metrics_token`, `metrics_token_file` or `GARAGE_METRICS_TOKEN`, `GARAGE_METRICS_TOKEN_FILE` (env) {#admin_metrics_token}
The token for accessing the Prometheus metrics endpoint (`/metrics`). The token for accessing the Metrics endpoint. If this token is not set, the
If this token is not set, and unless `metrics_require_token` is set to `true`, Metrics endpoint can be accessed without access control.
the metrics endpoint can be accessed without access control.
You can use any random string for this value. We recommend generating a random token with `openssl rand -base64 32`. You can use any random string for this value. We recommend generating a random token with `openssl rand -base64 32`.
@ -885,12 +836,17 @@ You can use any random string for this value. We recommend generating a random t
`GARAGE_METRICS_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`. `GARAGE_METRICS_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`.
#### `metrics_require_token` (since `v2.0.0`) {#admin_metrics_require_token} #### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN`, `GARAGE_ADMIN_TOKEN_FILE` (env) {#admin_token}
If this is set to `true`, accessing the metrics endpoint will always require The token for accessing all of the other administration endpoints. If this
an access token. Valid tokens include the `metrics_token` if it is set, token is not set, access to these endpoints is disabled entirely.
and admin API token defined dynamically in Garage which have
the `Metrics` endpoint in their scope. You can use any random string for this value. We recommend generating a random token with `openssl rand -base64 32`.
`admin_token` was introduced in Garage `v0.7.2`.
`admin_token_file` and the `GARAGE_ADMIN_TOKEN` environment variable are supported since Garage `v0.8.2`.
`GARAGE_ADMIN_TOKEN_FILE` is supported since `v0.8.5` / `v0.9.1`.
#### `trace_sink` {#admin_trace_sink} #### `trace_sink` {#admin_trace_sink}

View file

@ -46,7 +46,7 @@ to select the replication mode best suited to your use case (hint: in most cases
### Compression and deduplication ### Compression and deduplication
All data stored in Garage is deduplicated, and optionally compressed using All data stored in Garage is deduplicated, and optionnally compressed using
Zstd. Objects uploaded to Garage are chunked in blocks of constant sizes (see Zstd. Objects uploaded to Garage are chunked in blocks of constant sizes (see
[`block_size`](@/documentation/reference-manual/configuration.md#block_size)), [`block_size`](@/documentation/reference-manual/configuration.md#block_size)),
and the hashes of individual blocks are used to dispatch them to storage nodes and the hashes of individual blocks are used to dispatch them to storage nodes
@ -84,13 +84,13 @@ exposing the same content under different domain names.
Garage also supports bucket aliases which are local to a single user: Garage also supports bucket aliases which are local to a single user:
this allows different users to have different buckets with the same name, thus avoiding naming collisions. this allows different users to have different buckets with the same name, thus avoiding naming collisions.
This can be helpful for instance if you want to write an application that creates per-user buckets with always the same name. This can be helpfull for instance if you want to write an application that creates per-user buckets with always the same name.
This feature is totally invisible to S3 clients and does not break compatibility with AWS. This feature is totally invisible to S3 clients and does not break compatibility with AWS.
### Cluster administration API ### Cluster administration API
Garage provides a fully-fledged REST API to administer your cluster programmatically. Garage provides a fully-fledged REST API to administer your cluster programatically.
Functionality included in the admin API include: setting up and monitoring Functionality included in the admin API include: setting up and monitoring
cluster nodes, managing access credentials, and managing storage buckets and bucket aliases. cluster nodes, managing access credentials, and managing storage buckets and bucket aliases.
A full reference of the administration API is available [here](@/documentation/reference-manual/admin-api.md). A full reference of the administration API is available [here](@/documentation/reference-manual/admin-api.md).
@ -100,7 +100,7 @@ A full reference of the administration API is available [here](@/documentation/r
Garage makes some internal metrics available in the Prometheus data format, Garage makes some internal metrics available in the Prometheus data format,
which allows you to build interactive dashboards to visualize the load and internal state of your storage cluster. which allows you to build interactive dashboards to visualize the load and internal state of your storage cluster.
For developers and performance-savvy administrators, For developpers and performance-savvy administrators,
Garage also supports exporting traces of what it does internally in OpenTelemetry format. Garage also supports exporting traces of what it does internally in OpenTelemetry format.
This allows to monitor the time spent at various steps of the processing of requests, This allows to monitor the time spent at various steps of the processing of requests,
in order to detect potential performance bottlenecks. in order to detect potential performance bottlenecks.
@ -129,5 +129,5 @@ related to objects stored in an S3 bucket.
In the context of our research project, [Aérogramme](https://aerogramme.deuxfleurs.fr), In the context of our research project, [Aérogramme](https://aerogramme.deuxfleurs.fr),
K2V is used to provide metadata and log storage for operations on encrypted e-mail storage. K2V is used to provide metadata and log storage for operations on encrypted e-mail storage.
Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/f8be15c37db857e177d543de7be863692628d567/doc/drafts/k2v-spec.md) Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md)
and on how to enable it in Garage [here](@/documentation/reference-manual/k2v.md). and on how to enable it in Garage [here](@/documentation/reference-manual/k2v.md).

View file

@ -16,10 +16,10 @@ the `k2v` feature flag enabled can be obtained from our download page under
with `-k2v` (example: `v0.7.2-k2v`). with `-k2v` (example: `v0.7.2-k2v`).
The specification of the K2V API can be found The specification of the K2V API can be found
[here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/f8be15c37db857e177d543de7be863692628d567/doc/drafts/k2v-spec.md). [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/doc/drafts/k2v-spec.md).
This document also includes a high-level overview of K2V's design. This document also includes a high-level overview of K2V's design.
The K2V API uses AWSv4 signatures for authentication, same as the S3 API. The K2V API uses AWSv4 signatures for authentification, same as the S3 API.
The AWS region used for signature calculation is always the same as the one The AWS region used for signature calculation is always the same as the one
defined for the S3 API in the config file. defined for the S3 API in the config file.
@ -55,3 +55,4 @@ cargo build --features cli --bin k2v-cli
The CLI utility is self-documented, run `k2v-cli --help` to learn how to use The CLI utility is self-documented, run `k2v-cli --help` to learn how to use
it. There is also a short README.md in the `src/k2v-client` folder with some it. There is also a short README.md in the `src/k2v-client` folder with some
instructions. instructions.

View file

@ -1,188 +0,0 @@
+++
title = "Known issues"
weight = 80
+++
Issues in each section are roughly sorted by order of decreasing impact, based on actual reports from users.
## Architectural limitations
Issues that are caused by design decisions of Garage internals, and that can't
be fixed without major architectural changes in the codebase.
### Metadata performance issues with many objects
**Related issues:**
- [#851 - Performances collapse with 10 millions pictures in a bucket](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/851)
- [#1222 - Cluster Setup Write Performance Degraded After Writing 10 Million Object (200-300Kb per object)](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/1222)
### Very big objects cause performance degradation
For each object, there is a single metadata entry called a `Version` that
contains a list of all of the data blocks in the object. For very big objects,
this entry can contain thousands of block references. During the uploading of
an object, this metadata entry needs to be read, deserialized, reserialized and
written for each individual data block uploaded. This means that the
complexity of an upload is `O(n²)` in the number of blocks needed.
This manifests by excessive metadata I/O and CPU usage, and uploads eventually stalling.
**Mitigation:** Increase the `block_size` configuration parameter to reduce the
number of blocks. Make sure multipart uploads use chunks that are at least
`block_size` in size, and that are an exact multiple of `block_size` to avoid
the creation of smaller blocks.
**Long-term solution:** An architectural change in the metadata system would be
required to store block lists in many independent metadata entries instead of
one single big entry per object.
**Related issues:**
- [#662 - Large Files fail to upload](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/662)
- [#1366 - High CPU usage and performance degradation during long multipart uploads](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/1366)
### No conditional writes / locking / WORM support (`if-none-match`, ...)
This is structurally impossible to implement in Garage due to the lack of a consensus algorithm,
which is one of Garage's core design choices which we cannot reconsider.
A semi-working, *unsafe* implementation of WORM and object locking could be
implemented, with the following constraint: only after the completion of the
first write (in case of WORM) or the setting of a lock (for object lock) can we
guarantee that the object cannot be overwritten. In case where an overwrite
requests arrives at the same time as the initial request to write or to lock
the object, we cannot implement a safe and consistent way to reject it. This
means that many practical use-cases for `if-none-match` cannot be supported
(e.g. using it to implement mutual exclusion between concurrent writers).
**Related issues:**
- [#1052 - Support conditional writes](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/1052)
- [#1127 - Feature Request: WORM (Write Once Read Many) / Object Lock Support](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/1127)
### `CreateBucket` race condition
Also due to the lack of a consensus algorithm, there is no mutual exclusion
between concurrent `CreateBucket` requests using the same bucket name.
**Related issues:**
- [#649 - Race condition in CreateBucket](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/649)
### Metadata and data have the same replication factor
There is a single `replication_factor` in the configuration file that applies both to data blocks and metadata entries.
This makes clusters with `replication_factor = 1` particularly vulnerable in cases of metadata corruption (see below), as there
is a single copy of the metadata for each object even in multi-node clusters.
**Mitigation:** Do not use `replication_factor = 1`.
**Long-term solution:** We want to allow scenarios such as replicating the
metadata on 2, 3 or more nodes and the data on only 1 or 2 nodes (for example),
so that the metadata can benefit from better redundancy without increasing the
storage costs for the entire dataset. This will require some important changes
in the codebase.
**Related issues:**
- [#720 - Separate replication modes for metadata/data](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/720)
### Node count limitation
Garage will have issues in clusters with too many nodes, it will not be able to
spread data uniformly among nodes and some nodes will fill up faster than
other. This starts to manifest when the number of nodes is bigger than `10 ×
replication_factor`. This is due to the fact that Garage uses only 256
partitions internally.
**Mitigation:** Build clusters with fewer, bigger nodes.
**Potential solution:** This can be fixed by increasing the number of
partitions in Garage. The code paths exist, there is [a `const`
somewhere](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/commit/6fd9bba0cb55062cb1725ab961b7fa8acb9dcc61/src/rpc/layout/mod.rs#L35)
that theoretically allows to increase the number of partitions up to `2^16`,
but this has not been tested so there might be bugs.
### Buckets are not sharded
For each bucket, the first metadata layer that contains an index of all objects
is not sharded. This index, which includes the names and all metadata (size,
headers, ...) for each object, is stored on `$replication_factor` nodes.
For instance with `replication_factor = 3`, a given bucket will use only 3
specific nodes for this index (chosen at random when the bucket is created) to
store this index. In a multi-zone deployments, these nodes will be spread in
different zones. Each bucket uses a different set of 3 random nodes for its
index.
As a consequence, very large buckets might cause uneven load distribution
within a cluster. If all of the requests on a cluster are for objects in a
single bucket, then the `$replication_factor` nodes that store the index will
become a hotspot in the cluster, with more intensive metadata access patterns.
There is no way of choosing which nodes will have this role.
Currently, we have no report of this being an issue in practice.
**Mitigation:** This impacts in particular clusters that are used for a single
purpose with a single bucket. This can be solved by dividing your dataset among
many buckets, using a client-side sharding strategy that you will have to
design. Use at least as many buckets as you have nodes on your cluster.
## Bugs
Known bugs that are complex to diagnose and fix, and therefore have not been
fixed yet.
### LMDB metadata corruption
Many users have reported situations where the LMDB metadata db becomes
corrupted, sometimes after a forced shutdown of Garage or in case of power
loss. A corrupted database file is generally not recoverable.
**Mitigation:** Use a `replication_factor` of at least 2. Configure automatic
snapshotting using `metadata_auto_snapshot_interval` so that in case of
corruption you can rollback to a working database.
Note that taking filesystem-level snapshots of your `metadata_dir`, although it
is much faster and less I/O intensive than Garage's built-in snapshotting, does
not ensure that the snapshot will be consistent. If the snapshot is taking
during a metadata write, the snapshot itself might be corrupted and thus not
usable as a rollback point. Therefore, prefer using
`metadata_auto_snapshot_interval` in all cases.
### Layout updates might require manual intervention
In case of disconnected nodes, when changing the cluster layout to remove these
nodes and add other nodes instead, Garage might not be able to properly evict
the old nodes from the system. This is a built-in security measure to avoid any
inconsistent cluster states.
This manifests by several cluster layout versions staying active even after a
full resync. You can diagnose this situation with `garage layout history`,
which will give you instructions to fix it.
### Tag assignment
In the `garage layout assign` command, the `-t` argument has to be repeated
multiple times to set multiple tags on a node. Writing multiple tags separated
by commas will result in a single string.
## General footguns
Choices made by the developers that users must be aware of if they don't want
to run into potential issues.
### Resync tranquility is conservative by default
By default, the worker parameters `resync-tranquility` and `resync-worker-count` are set to very conservative values, to avoid overloading nodes with I/O when data needs to be resynchronized between nodes.
This can cause issues where the resync queue grows faster than it can be cleared, which in turn causes performance issues in the rest of Garage.
This situation is indicated by a big resync queue with few resync errors (the queue is not caused by a disconnected/malfunctionning node).
To fix it, increase the number of resync workers and reduce the resync tranquility. For instance, if you want to resync as fast as possible:
```
garage worker set -a resync-worker-count 8
garage worker set -a resync-tranquility 0
```

View file

@ -45,7 +45,7 @@ we suppose that OpenIO supports presigned URLs.
All endpoints that are missing on Garage will return a 501 Not Implemented. All endpoints that are missing on Garage will return a 501 Not Implemented.
Some `x-amz-` headers are not implemented. Some `x-amz-` headers are not implemented.
### Core endpoints ### Core endoints
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
@ -135,12 +135,12 @@ If you need this feature, please [share your use case in our dedicated issue](ht
**PutBucketLifecycleConfiguration:** The only actions supported are **PutBucketLifecycleConfiguration:** The only actions supported are
`AbortIncompleteMultipartUpload` and `Expiration` (without the `AbortIncompleteMultipartUpload` and `Expiration` (without the
`ExpiredObjectDeleteMarker` field). All other operations are dependent on `ExpiredObjectDeleteMarker` field). All other operations are dependent on
either bucket versioning or storage classes which Garage currently does not either bucket versionning or storage classes which Garage currently does not
implement. The deprecated `Prefix` member directly in the the `Rule` implement. The deprecated `Prefix` member directly in the the `Rule`
structure/XML tag is not supported, specified prefixes must be inside the structure/XML tag is not supported, specified prefixes must be inside the
`Filter` structure/XML tag. `Filter` structure/XML tag.
**GetBucketVersioning:** Stub implementation which always returns "versioning not enabled", since Garage does not yet support bucket versioning. **GetBucketVersioning:** Stub implementation which always returns "versionning not enabled", since Garage does not yet support bucket versionning.
### Replication endpoints ### Replication endpoints
@ -155,7 +155,7 @@ Please open an issue if you have a use case for replication.
*Note: Ceph documentation briefly says that Ceph supports *Note: Ceph documentation briefly says that Ceph supports
[replication through the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api) [replication through the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api)
but with some limitations. but with some limitations.
Additionally, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.* Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.*
### Locking objects ### Locking objects
@ -197,7 +197,7 @@ Please open an issue if you have a use case.
### Vendor specific endpoints ### Vendor specific endpoints
<details><summary>Display Amazon specific endpoints</summary> <details><summary>Display Amazon specifc endpoints</summary>
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
@ -234,3 +234,4 @@ Please open an issue if you have a use case.
| [SelectObjectContent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html) | ❌ Missing | ❌| ❌| ❌| ❌| | [SelectObjectContent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html) | ❌ Missing | ❌| ❌| ❌| ❌|
</details> </details>

View file

@ -3,7 +3,7 @@ title = "S3 compatibility target"
weight = 5 weight = 5
+++ +++
If there is a specific S3 functionality you have a need for, feel free to open If there is a specific S3 functionnality you have a need for, feel free to open
a PR to put the corresponding endpoints higher in the list. Please explain a PR to put the corresponding endpoints higher in the list. Please explain
your motivations for doing so in the PR message. your motivations for doing so in the PR message.

View file

@ -68,7 +68,7 @@ Workflow for DELETE:
1. Check write permission (LDAP) 1. Check write permission (LDAP)
2. Get current version (or versions) in object table 2. Get current version (or versions) in object table
3. Do the deletion of those versions NOT IN A BACKGROUND JOB THIS TIME 3. Do the deletion of those versions NOT IN A BACKGROUND JOB THIS TIME
4. Return success to the user if we were able to delete blocks from the blocks table and entries from the object table 4. Return succes to the user if we were able to delete blocks from the blocks table and entries from the object table
To delete a version: To delete a version:
@ -92,7 +92,7 @@ Known issue: if someone is reading from a version that we want to delete and the
- file path = /meta/(first 3 hex digits of hash)/(rest of hash) - file path = /meta/(first 3 hex digits of hash)/(rest of hash)
- map block hash -> set of version UUIDs where it is referenced - map block hash -> set of version UUIDs where it is referenced
Useful metadata: Usefull metadata:
- list of versions that reference this block in the Casandra table, so that we can do GC by checking in Cassandra that the lines still exist - list of versions that reference this block in the Casandra table, so that we can do GC by checking in Cassandra that the lines still exist
- list of other nodes that we know have acknowledged a write of this block, useful in the rebalancing algorithm - list of other nodes that we know have acknowledged a write of this block, useful in the rebalancing algorithm

View file

@ -49,12 +49,12 @@ The ring construction that selects `n_token` random positions for each nodes giv
is not well-balanced: the space between the tokens varies a lot, and some partitions are thus bigger than others. is not well-balanced: the space between the tokens varies a lot, and some partitions are thus bigger than others.
This problem was demonstrated in the original Dynamo DB paper. This problem was demonstrated in the original Dynamo DB paper.
To solve this, we want to apply a better second method for partitioning our dataset: To solve this, we want to apply a better second method for partitionning our dataset:
1. fix an initially large number of partitions (say 1024) with evenly-spaced delimiters, 1. fix an initially large number of partitions (say 1024) with evenly-spaced delimiters,
2. attribute each partition randomly to a node, with a probability 2. attribute each partition randomly to a node, with a probability
proportional to its capacity (which `n_tokens` represented in the first proportionnal to its capacity (which `n_tokens` represented in the first
method) method)
For now we continue using the multi-DC ring walking described above. For now we continue using the multi-DC ring walking described above.
@ -66,7 +66,7 @@ I have studied two ways to do the attribution of partitions to nodes, in a way t
MagLev provided significantly better balancing, as it guarantees that the exact MagLev provided significantly better balancing, as it guarantees that the exact
same number of partitions is attributed to all nodes that have the same same number of partitions is attributed to all nodes that have the same
capacity (and that this number is proportional to the node's capacity, except capacity (and that this number is proportionnal to the node's capacity, except
for large values), however in both cases: for large values), however in both cases:
- the distribution is still bad, because we use the naive multi-DC ring walking - the distribution is still bad, because we use the naive multi-DC ring walking

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.3 to 0.4" title = "Migrating from 0.3 to 0.4"
weight = 80 weight = 20
+++ +++
**Migrating from 0.3 to 0.4 is unsupported. This document is only intended to **Migrating from 0.3 to 0.4 is unsupported. This document is only intended to

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.5 to 0.6" title = "Migrating from 0.5 to 0.6"
weight = 75 weight = 15
+++ +++
**This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster. **This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster.

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.6 to 0.7" title = "Migrating from 0.6 to 0.7"
weight = 74 weight = 14
+++ +++
**This guide explains how to migrate to 0.7 if you have an existing 0.6 cluster. **This guide explains how to migrate to 0.7 if you have an existing 0.6 cluster.
We don't recommend trying to migrate to 0.7 directly from 0.5 or older.** We don't recommend trying to migrate to 0.7 directly from 0.5 or older.**
@ -19,7 +19,7 @@ The migration steps are as follows:
2. Disable API and web access. Garage does not support disabling 2. Disable API and web access. Garage does not support disabling
these endpoints but you can change the port number or stop your reverse these endpoints but you can change the port number or stop your reverse
proxy for instance. proxy for instance.
3. Check once again that your cluster is healthy. Run again `garage repair --all-nodes --yes tables` which is quick. 3. Check once again that your cluster is healty. Run again `garage repair --all-nodes --yes tables` which is quick.
Also check your queues are empty, run `garage stats` to query them. Also check your queues are empty, run `garage stats` to query them.
4. Turn off Garage v0.6 4. Turn off Garage v0.6
5. Backup the metadata folder of all your nodes: `cd /var/lib/garage ; tar -acf meta-v0.6.tar.zst meta/` 5. Backup the metadata folder of all your nodes: `cd /var/lib/garage ; tar -acf meta-v0.6.tar.zst meta/`

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.7 to 0.8" title = "Migrating from 0.7 to 0.8"
weight = 73 weight = 13
+++ +++
**This guide explains how to migrate to 0.8 if you have an existing 0.7 cluster. **This guide explains how to migrate to 0.8 if you have an existing 0.7 cluster.

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.8 to 0.9" title = "Migrating from 0.8 to 0.9"
weight = 72 weight = 12
+++ +++
**This guide explains how to migrate to 0.9 if you have an existing 0.8 cluster. **This guide explains how to migrate to 0.9 if you have an existing 0.8 cluster.

View file

@ -1,6 +1,6 @@
+++ +++
title = "Migrating from 0.9 to 1.0" title = "Migrating from 0.9 to 1.0"
weight = 71 weight = 11
+++ +++
**This guide explains how to migrate to 1.0 if you have an existing 0.9 cluster. **This guide explains how to migrate to 1.0 if you have an existing 0.9 cluster.

View file

@ -1,70 +0,0 @@
+++
title = "Migrating from 1.0 to 2.0"
weight = 70
+++
**This guide explains how to migrate to v2.x if you have an existing v1.x.x cluster.
We don't recommend trying to migrate to v2.x directly from v0.9.x or older.**
This migration procedure has been tested on several clusters without issues.
However, it is still a *critical procedure* that might cause issues.
**Make sure to back up all your data before attempting it!**
You might also want to read our [general documentation on upgrading Garage](@/documentation/operations/upgrading.md).
## Changes introduced in v2.0
The following are **breaking changes** in Garage v2.0 that require your attention when migrating:
- The administration API has been completely reworked.
Some calls to the `/v1/` endpoints will still work but most will not.
New endpoints are prefixed by `/v2/`. **You will need to update all your code that makes use of the admin API.**
- `replication_mode` is no longer a supported configuration parameter,
please use `replication_factor` and `consistency_mode` instead.
## Migration procedure
The migration to Garage v2.0 can be done with almost no downtime,
by restarting all nodes at once in the new version.
The migration steps are as follows:
1. Do a `garage repair --all-nodes --yes tables`, check the logs and check that
all data seems to be synced correctly between nodes. If you have time, do
additional `garage repair` procedures (`blocks`, `versions`, `block_refs`,
etc.)
2. Ensure you have a snapshot of your Garage installation that you can restore
to in case the upgrade goes wrong, with one of the following options:
- You may use the `garage meta snapshot --all` command
to make a backup snapshot of the metadata directories of your nodes
for backup purposes. Once this command has completed, copy the following
files and directories from the `metadata_dir` of all your nodes
to somewhere safe: `snapshots`, `cluster_layout`, `data_layout`,
`node_key`, `node_key.pub`. (If you have set the `metadata_snapshots_dir`
to a different value in your config file, back up that directory instead.)
- If you are running a filesystem such as ZFS or BTRFS that support
snapshotting, you can create a filesystem-level snapshot of the `metadata_dir`
of all your nodes to be used as a restoration point if needed.
- You may also make a back-up manually: turn off each node
individually; back up its metadata folder (for instance, use the following
command if your metadata directory is `/var/lib/garage/meta`: `cd
/var/lib/garage ; tar -acf meta-v1.0.tar.zst meta/`); turn it back on
again. This will allow you to take a backup of all nodes without
impacting global cluster availability. You can do all nodes of a single
zone at once as this does not impact the availability of Garage.
3. Prepare your updated binaries and configuration files for Garage v2.0.
**Remember to update your configuration file to remove `replication_mode` and replace it by `replication_factor`.**
4. Shut down all v1.0 nodes simultaneously, and restart them all simultaneously
in v2.0. Use your favorite deployment tool (Ansible, Kubernetes, Nomad) to
achieve this as fast as possible. Garage v2.0 should be in a working state
as soon as enough nodes have started.
5. Monitor your cluster in the following hours to see if it works well under
your production load.

View file

@ -1,6 +1,6 @@
+++ +++
title = "Testing strategy" title = "Testing strategy"
weight = 100 weight = 30
+++ +++
@ -28,11 +28,11 @@ We should try to test in least invasive ways, i.e. minimize the impact of the te
- Not making `garage` a shared library (launch using `execve`, it's perfectly fine) - Not making `garage` a shared library (launch using `execve`, it's perfectly fine)
Instead, we should focus on building a clean outer interface for the `garage` binary, Instead, we should focus on building a clean outer interface for the `garage` binary,
for example loading configuration using environment variables instead of the configuration file if that's helpful for writing the tests. for example loading configuration using environnement variables instead of the configuration file if that's helpfull for writing the tests.
There are two reasons for this: There are two reasons for this:
- Keep the source code clean and focused - Keep the soure code clean and focused
- Test something that is as close as possible as the true garage that will actually be running - Test something that is as close as possible as the true garage that will actually be running
Reminder: rules of simplicity, concerning changes to Garage's source code. Reminder: rules of simplicity, concerning changes to Garage's source code.
@ -71,3 +71,5 @@ Interesting blog posts on the blog of the Sled database:
Misc: Misc:
- [mutagen](https://github.com/llogiq/mutagen) - mutation testing is a way to assert our test quality by mutating the code and see if the mutation makes the tests fail - [mutagen](https://github.com/llogiq/mutagen) - mutation testing is a way to assert our test quality by mutating the code and see if the mutation makes the tests fail
- [fuzzing](https://rust-fuzz.github.io/book/) - cargo supports fuzzing, it could be a way to test our software reliability in presence of garbage data. - [fuzzing](https://rust-fuzz.github.io/book/) - cargo supports fuzzing, it could be a way to test our software reliability in presence of garbage data.

View file

@ -13,12 +13,8 @@ We will bump the version numbers prefixed to each API endpoint each time the syn
or semantics change, meaning that code that relies on these endpoints will break or semantics change, meaning that code that relies on these endpoints will break
when changes are introduced. when changes are introduced.
The Garage administration API was introduced in version 0.7.2, and was The Garage administration API was introduced in version 0.7.2, this document
changed several times. does not apply to older versions of Garage.
**THIS DOCUMENT IS DEPRECATED.** We now have an OpenAPI spec which is automatically generated
from Garage's source code and is always up-to-date. See `doc/api/garage-admin-v2.html`.
Text in this document is no longer kept in sync with the admin API's actual behavior.
## Access control ## Access control
@ -56,28 +52,34 @@ Returns an HTTP status 200 if the node is ready to answer user's requests,
and an HTTP status 503 (Service Unavailable) if there are some partitions and an HTTP status 503 (Service Unavailable) if there are some partitions
for which a quorum of nodes is not available. for which a quorum of nodes is not available.
A simple textual message is also returned in a body with content-type `text/plain`. A simple textual message is also returned in a body with content-type `text/plain`.
See `/v2/GetClusterHealth` for an API that also returns JSON output. See `/v1/health` for an API that also returns JSON output.
### Other special endpoints
#### CheckDomain `GET /check?domain=<domain>`
Checks whether this Garage cluster serves a website for domain `<domain>`.
Returns HTTP 200 Ok if yes, or HTTP 4xx if no website is available for this domain.
### Cluster operations ### Cluster operations
#### GetClusterStatus `GET /v2/GetClusterStatus` #### GetClusterStatus `GET /v1/status`
Returns the cluster's current status in JSON, including: Returns the cluster's current status in JSON, including:
- ID of the node being queried and its version of the Garage daemon
- Live nodes - Live nodes
- Currently configured cluster layout - Currently configured cluster layout
- Staged changes to the cluster layout
Example response body: Example response body:
```json ```json
{ {
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
"garageVersion": "v1.3.0",
"garageFeatures": [
"k2v",
"lmdb",
"sqlite",
"metrics",
"bundled-libs"
],
"rustVersion": "1.68.0",
"dbEngine": "LMDB (using Heed crate)",
"layoutVersion": 5, "layoutVersion": 5,
"nodes": [ "nodes": [
{ {
@ -167,7 +169,7 @@ Example response body:
} }
``` ```
#### GetClusterHealth `GET /v2/GetClusterHealth` #### GetClusterHealth `GET /v1/health`
Returns the cluster's current health in JSON format, with the following variables: Returns the cluster's current health in JSON format, with the following variables:
@ -176,7 +178,7 @@ Returns the cluster's current health in JSON format, with the following variable
- degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions - degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
- unavailable: a quorum of write nodes is not available for some partitions - unavailable: a quorum of write nodes is not available for some partitions
- `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started - `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
- `connectedNodes`: the number of nodes this Garage node currently has an open connection to - `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to
- `storageNodes`: the number of storage nodes currently registered in the cluster layout - `storageNodes`: the number of storage nodes currently registered in the cluster layout
- `storageNodesOk`: the number of storage nodes to which a connection is currently open - `storageNodesOk`: the number of storage nodes to which a connection is currently open
- `partitions`: the total number of partitions of the data (currently always 256) - `partitions`: the total number of partitions of the data (currently always 256)
@ -200,7 +202,7 @@ Example response body:
} }
``` ```
#### ConnectClusterNodes `POST /v2/ConnectClusterNodes` #### ConnectClusterNodes `POST /v1/connect`
Instructs this Garage node to connect to other Garage nodes at specified addresses. Instructs this Garage node to connect to other Garage nodes at specified addresses.
@ -230,7 +232,7 @@ Example response:
] ]
``` ```
#### GetClusterLayout `GET /v2/GetClusterLayout` #### GetClusterLayout `GET /v1/layout`
Returns the cluster's current layout in JSON, including: Returns the cluster's current layout in JSON, including:
@ -291,7 +293,7 @@ Example response body:
} }
``` ```
#### UpdateClusterLayout `POST /v2/UpdateClusterLayout` #### UpdateClusterLayout `POST /v1/layout`
Send modifications to the cluster layout. These modifications will Send modifications to the cluster layout. These modifications will
be included in the staged role changes, visible in subsequent calls be included in the staged role changes, visible in subsequent calls
@ -328,7 +330,7 @@ This returns the new cluster layout with the proposed staged changes,
as returned by GetClusterLayout. as returned by GetClusterLayout.
#### ApplyClusterLayout `POST /v2/ApplyClusterLayout` #### ApplyClusterLayout `POST /v1/layout/apply`
Applies to the cluster the layout changes currently registered as Applies to the cluster the layout changes currently registered as
staged layout changes. staged layout changes.
@ -348,11 +350,23 @@ existing layout in the cluster.
This returns the message describing all the calculations done to compute the new This returns the message describing all the calculations done to compute the new
layout, as well as the description of the layout as returned by GetClusterLayout. layout, as well as the description of the layout as returned by GetClusterLayout.
#### RevertClusterLayout `POST /v2/RevertClusterLayout` #### RevertClusterLayout `POST /v1/layout/revert`
Clears all of the staged layout changes. Clears all of the staged layout changes.
This requests contains an empty body. Request body format:
```json
{
"version": 13
}
```
Reverting the staged changes is done by incrementing the version number
and clearing the contents of the staged change list.
Similarly to the CLI, the body must include the incremented
version number, which MUST be 1 + the value of the currently
existing layout in the cluster.
This returns the new cluster layout with all changes reverted, This returns the new cluster layout with all changes reverted,
as returned by GetClusterLayout. as returned by GetClusterLayout.
@ -360,7 +374,7 @@ as returned by GetClusterLayout.
### Access key operations ### Access key operations
#### ListKeys `GET /v2/ListKeys` #### ListKeys `GET /v1/key`
Returns all API access keys in the cluster. Returns all API access keys in the cluster.
@ -379,8 +393,8 @@ Example response:
] ]
``` ```
#### GetKeyInfo `GET /v2/GetKeyInfo?id=<access key id>` #### GetKeyInfo `GET /v1/key?id=<acces key id>`
#### GetKeyInfo `GET /v2/GetKeyInfo?search=<pattern>` #### GetKeyInfo `GET /v1/key?search=<pattern>`
Returns information about the requested API access key. Returns information about the requested API access key.
@ -388,7 +402,7 @@ If `id` is set, the key is looked up using its exact identifier (faster).
If `search` is set, the key is looked up using its name or prefix If `search` is set, the key is looked up using its name or prefix
of identifier (slower, all keys are enumerated to do this). of identifier (slower, all keys are enumerated to do this).
Optionally, the query parameter `showSecretKey=true` can be set to reveal the Optionnally, the query parameter `showSecretKey=true` can be set to reveal the
associated secret access key. associated secret access key.
Example response: Example response:
@ -454,7 +468,7 @@ Example response:
} }
``` ```
#### CreateKey `POST /v2/CreateKey` #### CreateKey `POST /v1/key`
Creates a new API access key. Creates a new API access key.
@ -469,7 +483,7 @@ Request body format:
This returns the key info, including the created secret key, This returns the key info, including the created secret key,
in the same format as the result of GetKeyInfo. in the same format as the result of GetKeyInfo.
#### ImportKey `POST /v2/ImportKey` #### ImportKey `POST /v1/key/import`
Imports an existing API key. Imports an existing API key.
This will check that the imported key is in the valid format, i.e. This will check that the imported key is in the valid format, i.e.
@ -487,7 +501,7 @@ Request body format:
This returns the key info in the same format as the result of GetKeyInfo. This returns the key info in the same format as the result of GetKeyInfo.
#### UpdateKey `POST /v2/UpdateKey?id=<access key id>` #### UpdateKey `POST /v1/key?id=<acces key id>`
Updates information about the specified API access key. Updates information about the specified API access key.
@ -509,14 +523,14 @@ The possible flags in `allow` and `deny` are: `createBucket`.
This returns the key info in the same format as the result of GetKeyInfo. This returns the key info in the same format as the result of GetKeyInfo.
#### DeleteKey `POST /v2/DeleteKey?id=<access key id>` #### DeleteKey `DELETE /v1/key?id=<acces key id>`
Deletes an API access key. Deletes an API access key.
### Bucket operations ### Bucket operations
#### ListBuckets `GET /v2/ListBuckets` #### ListBuckets `GET /v1/bucket`
Returns all storage buckets in the cluster. Returns all storage buckets in the cluster.
@ -558,8 +572,8 @@ Example response:
] ]
``` ```
#### GetBucketInfo `GET /v2/GetBucketInfo?id=<bucket id>` #### GetBucketInfo `GET /v1/bucket?id=<bucket id>`
#### GetBucketInfo `GET /v2/GetBucketInfo?globalAlias=<alias>` #### GetBucketInfo `GET /v1/bucket?globalAlias=<alias>`
Returns information about the requested storage bucket. Returns information about the requested storage bucket.
@ -602,7 +616,7 @@ Example response:
} }
``` ```
#### CreateBucket `POST /v2/CreateBucket` #### CreateBucket `POST /v1/bucket`
Creates a new storage bucket. Creates a new storage bucket.
@ -642,7 +656,7 @@ or no alias at all.
Technically, you can also specify both `globalAlias` and `localAlias` and that would create Technically, you can also specify both `globalAlias` and `localAlias` and that would create
two aliases, but I don't see why you would want to do that. two aliases, but I don't see why you would want to do that.
#### UpdateBucket `POST /v2/UpdateBucket?id=<bucket id>` #### UpdateBucket `PUT /v1/bucket?id=<bucket id>`
Updates configuration of the given bucket. Updates configuration of the given bucket.
@ -674,38 +688,16 @@ In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or
to remove the quotas. An absent value will be considered the same as a `null`. It is not possible to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
to change only one of the two quotas. to change only one of the two quotas.
#### DeleteBucket `POST /v2/DeleteBucket?id=<bucket id>` #### DeleteBucket `DELETE /v1/bucket?id=<bucket id>`
Deletes a storage bucket. A bucket cannot be deleted if it is not empty. Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
Warning: this will delete all aliases associated with the bucket! Warning: this will delete all aliases associated with the bucket!
#### CleanupIncompleteUploads `POST /v2/CleanupIncompleteUploads`
Cleanup all incomplete uploads in a bucket that are older than a specified number
of seconds.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"olderThanSecs": 3600
}
```
Response format
```json
{
"uploadsDeleted": 12
}
```
### Operations on permissions for keys on buckets ### Operations on permissions for keys on buckets
#### AllowBucketKey `POST /v2/AllowBucketKey` #### BucketAllowKey `POST /v1/bucket/allow`
Allows a key to do read/write/owner operations on a bucket. Allows a key to do read/write/owner operations on a bucket.
@ -726,7 +718,7 @@ Request body format:
Flags in `permissions` which have the value `true` will be activated. Flags in `permissions` which have the value `true` will be activated.
Other flags will remain unchanged. Other flags will remain unchanged.
#### DenyBucketKey `POST /v2/DenyBucketKey` #### BucketDenyKey `POST /v1/bucket/deny`
Denies a key from doing read/write/owner operations on a bucket. Denies a key from doing read/write/owner operations on a bucket.
@ -750,35 +742,19 @@ Other flags will remain unchanged.
### Operations on bucket aliases ### Operations on bucket aliases
#### AddBucketAlias `POST /v2/AddBucketAlias` #### GlobalAliasBucket `PUT /v1/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Creates an alias for a bucket in the namespace of a specific access key. Empty body. Creates a global alias for a bucket.
To create a global alias, specify the `globalAlias` field.
To create a local alias, specify the `localAlias` and `accessKeyId` fields.
Request body format: #### GlobalUnaliasBucket `DELETE /v1/bucket/alias/global?id=<bucket id>&alias=<global alias>`
```json Removes a global alias for a bucket.
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAlias": "my-bucket"
}
```
or: #### LocalAliasBucket `PUT /v1/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
```json Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"localAlias": "my-bucket"
}
```
#### RemoveBucketAlias `POST /v2/RemoveBucketAlias` #### LocalUnaliasBucket `DELETE /v1/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
Removes an alias for a bucket in the namespace of a specific access key. Removes a local alias for a bucket in the namespace of a specific access key.
To remove a global alias, specify the `globalAlias` field.
To remove a local alias, specify the `localAlias` and `accessKeyId` fields.
Request body format: same as AddBucketAlias.

View file

@ -35,7 +35,7 @@ Triples in K2V are constituted of three fields:
partition key in which the client wants to read/delete lists of items partition key in which the client wants to read/delete lists of items
- a sort key (`sk`), an utf8 string that defines the index of the triplet inside its - a sort key (`sk`), an utf8 string that defines the index of the triplet inside its
partition; triplets are uniquely identified by their partition key + sort key partition; triplets are uniquely idendified by their partition key + sort key
- a value (`v`), an opaque binary blob associated to the partition key + sort key; - a value (`v`), an opaque binary blob associated to the partition key + sort key;
they are transmitted as binary when possible but in most case in the JSON API they are transmitted as binary when possible but in most case in the JSON API
@ -74,7 +74,7 @@ are obsoleted by the new write.
**Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a **Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a
simple case where there was no insertion in-between reading the value simple case where there was no insertion in-between reading the value
mentioned above and writing `v4`, and supposing that node2 receives the mentionned above and writing `v4`, and supposing that node2 receives the
InsertItem query: InsertItem query:
- `node2` generates a timestamp `t4` such that `t4 > t3`. - `node2` generates a timestamp `t4` such that `t4 > t3`.
@ -332,7 +332,7 @@ Inserts a single item. This request does not use JSON, the body is sent directly
To supersede previous values, the HTTP header `X-Garage-Causality-Token` should To supersede previous values, the HTTP header `X-Garage-Causality-Token` should
be set to the causality token returned by a previous read on this key. This be set to the causality token returned by a previous read on this key. This
header can be omitted for the first writes to the key. header can be ommitted for the first writes to the key.
Example query: Example query:
@ -397,7 +397,7 @@ smallest partition key that exists. It returns partition keys in increasing
order, or decreasing order if `reverse` is set to `true`, order, or decreasing order if `reverse` is set to `true`,
and stops when either of the following conditions is met: and stops when either of the following conditions is met:
1. if `end` is specified, the partition key `end` is reached or surpassed (if it 1. if `end` is specfied, the partition key `end` is reached or surpassed (if it
is reached exactly, it is not included in the result) is reached exactly, it is not included in the result)
2. if `limit` is specified, `limit` partition keys have been listed 2. if `limit` is specified, `limit` partition keys have been listed
@ -491,7 +491,7 @@ the triplet is inserted for the first time, the causality token should be set to
The value is expected to be a base64-encoded binary blob. The value `null` can The value is expected to be a base64-encoded binary blob. The value `null` can
also be used to delete the triplet while preserving causality information: this also be used to delete the triplet while preserving causality information: this
allows to know if a delete has happened concurrently with an insert, in which allows to know if a delete has happenned concurrently with an insert, in which
case both are preserved and returned on reads (see below). case both are preserved and returned on reads (see below).
Partition keys and sort keys are utf8 strings which are stored sorted by Partition keys and sort keys are utf8 strings which are stored sorted by
@ -540,7 +540,7 @@ JSON struct with the following fields:
For each of the searches, triplets are listed and returned separately. The For each of the searches, triplets are listed and returned separately. The
semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The
additional parameter `singleItem` allows to get a single item, whose sort key additionnal parameter `singleItem` allows to get a single item, whose sort key
is the one given in `start`. Parameters `conflictsOnly` and `tombstones` is the one given in `start`. Parameters `conflictsOnly` and `tombstones`
control additional filters on the items that are returned. control additional filters on the items that are returned.

View file

@ -59,7 +59,7 @@ To link the effective storage capacity of the cluster to partition assignment, w
\end{equation} \end{equation}
This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored blocks. This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored blocks.
Every node $n$ will store some number $p_n$ of partitions (it is the number of partitions $p$ such that $n$ appears in the $\alpha_p$). Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/p_n$. This remark leads us to define the optimal size that we will want to maximize: Every node $n$ wille store some number $p_n$ of partitions (it is the number of partitions $p$ such that $n$ appears in the $\alpha_p$). Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/p_n$. This remark leads us to define the optimal size that we will want to maximize:
\begin{equation} \begin{equation}
\label{eq:optimal} \label{eq:optimal}

View file

@ -38,7 +38,7 @@ We would like to compute an assignment of nodes to partitions. We will impose so
\end{equation} \end{equation}
This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored large objects. This assumption is justified by the dispersion of the hashing function, when the number of partitions is small relative to the number of stored large objects.
Every node $n$ will store some number $k_n$ of partitions. Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/k_n$. This remark leads us to define the optimal size that we will want to maximize: Every node $n$ wille store some number $k_n$ of partitions. Hence the partitions stored by $n$ (and hence all partitions by our assumption) have there size bounded by $c_n/k_n$. This remark leads us to define the optimal size that we will want to maximize:
\begin{equation} \begin{equation}
\label{eq:optimal} \label{eq:optimal}
@ -62,7 +62,7 @@ For now, in the following, we ask the following redundancy constraint:
\textbf{Mode 3:} every partition needs to be assignated to three nodes. We try to spread the three nodes over different zones as much as possible. \textbf{Mode 3:} every partition needs to be assignated to three nodes. We try to spread the three nodes over different zones as much as possible.
\textbf{Warning:} This is a working document written incrementally. The last version of the algorithm is the \textbf{parametric assignment} described in the next section. \textbf{Warning:} This is a working document written incrementaly. The last version of the algorithm is the \textbf{parametric assignment} described in the next section.
\section{Computation of a parametric assignment} \section{Computation of a parametric assignment}
@ -318,7 +318,7 @@ $$
$$ $$
which is the universal upper bound on $s^*$. Hence any optimal utilization $(n_v)$ can be modified to another optimal utilization such that $n_v\ge \hat{n}_v$ which is the universal upper bound on $s^*$. Hence any optimal utilization $(n_v)$ can be modified to another optimal utilization such that $n_v\ge \hat{n}_v$
Because $z_0$ cannot store more than $N$ partition occurrences, in any assignment, at least $2N$ partitions must be assignated to the zones $Z\setminus\{z_0\}$. Let $C_0 = C-c_{z_0}$. Suppose that there exists a zone $z_1\neq z_0$ such that $c_{z_1}/C_0 \ge 1/2$. Then, with the same argument as for $z_0$, we can define Because $z_0$ cannot store more than $N$ partition occurences, in any assignment, at least $2N$ partitions must be assignated to the zones $Z\setminus\{z_0\}$. Let $C_0 = C-c_{z_0}$. Suppose that there exists a zone $z_1\neq z_0$ such that $c_{z_1}/C_0 \ge 1/2$. Then, with the same argument as for $z_0$, we can define
$$\hat{n}_v = \left\lfloor\frac{c_v}{c_{z_1}}N\right\rfloor$$ $$\hat{n}_v = \left\lfloor\frac{c_v}{c_{z_1}}N\right\rfloor$$
for every $v\in z_1$. for every $v\in z_1$.
@ -351,7 +351,7 @@ Define $3N$ tokens $t_1,\ldots, t_{3N}\in V$ as follows:
Then for $1\le i \le N$, define the triplet $T_i$ to be Then for $1\le i \le N$, define the triplet $T_i$ to be
$(t_i, t_{i+N}, t_{i+2N})$. Since the same nodes of a zone appear contiguously, the three nodes of a triplet must belong to three distinct zones. $(t_i, t_{i+N}, t_{i+2N})$. Since the same nodes of a zone appear contiguously, the three nodes of a triplet must belong to three distinct zones.
However simple, this solution to go from an utilization to an assignment has the drawback of not spreading the triplets: a node will tend to be associated to the same two other nodes for many partitions. Hence, during data transfer, it will tend to use only two link, instead of spreading the bandwidth use over many other links to other nodes. To achieve this goal, we will reframe the search of an assignment as a flow problem. and in the flow algorithm, we will introduce randomness in the order of exploration. This will be sufficient to obtain a good dispersion of the triplets. However simple, this solution to go from an utilization to an assignment has the drawback of not spreading the triplets: a node will tend to be associated to the same two other nodes for many partitions. Hence, during data transfer, it will tend to use only two link, instead of spreading the bandwith use over many other links to other nodes. To achieve this goal, we will reframe the search of an assignment as a flow problem. and in the flow algorithm, we will introduce randomness in the order of exploration. This will be sufficient to obtain a good dispersion of the triplets.
\begin{figure} \begin{figure}
\centering \centering
@ -436,7 +436,7 @@ T_3=(b,c,d').
$$ $$
One can check that in this case, it is impossible to minimize both the number of zone and node changes. One can check that in this case, it is impossible to minimize both the number of zone and node changes.
Because of the redundancy constraint, we cannot use a greedy algorithm to just replace nodes in the triplets to try to get the new utilization rate: this could lead to blocking situation where there is still a hole to fill in a triplet but no available node satisfies the zone separation constraint. To circumvent this issue, we propose an algorithm based on finding cycles in a graph encoding of the assignment. As in section \ref{sec:opt_assign}, we can explore the neighbours in a random order in the graph algorithms, to spread the triplets distribution. Because of the redundancy constraint, we cannot use a greedy algorithm to just replace nodes in the triplets to try to get the new utilization rate: this could lead to blocking situation where there is still a hole to fill in a triplet but no available node satisfies the zone separation constraint. To circumvent this issue, we propose an algorithm based on finding cycles in a graph encoding of the assignment. As in section \ref{sec:opt_assign}, we can explore the neigbours in a random order in the graph algorithms, to spread the triplets distribution.
\subsubsection{Minimizing the zone discrepancy} \subsubsection{Minimizing the zone discrepancy}
@ -550,8 +550,8 @@ We give some considerations of worst case complexity for these algorithms. In th
Algorithm \ref{alg:util} can be implemented with complexity $O(\#V^2)$. The complexity of the function call at line \ref{lin:subutil} is $O(\#V)$. The difference between the sum of the subutilizations and $3N$ is at most the sum of the rounding errors when computing the $\hat{n}_v$. Hence it is bounded by $\#V$ and the loop at line \ref{lin:loopsub} is iterated at most $\#V$ times. Finding the minimizing $v$ at line \ref{lin:findmin} takes $O(\#V)$ operations (naively, we could also use a heap). Algorithm \ref{alg:util} can be implemented with complexity $O(\#V^2)$. The complexity of the function call at line \ref{lin:subutil} is $O(\#V)$. The difference between the sum of the subutilizations and $3N$ is at most the sum of the rounding errors when computing the $\hat{n}_v$. Hence it is bounded by $\#V$ and the loop at line \ref{lin:loopsub} is iterated at most $\#V$ times. Finding the minimizing $v$ at line \ref{lin:findmin} takes $O(\#V)$ operations (naively, we could also use a heap).
Algorithm \ref{alg:opt} can be implemented with complexity $O(N^3\times \#Z)$. The flow graph has $O(N+\#Z)$ vertices and $O(N\times \#Z)$ edges. Dinic's algorithm has complexity $O(\#\mathrm{Vertices}^2\#\mathrm{Edges})$ hence in our case it is $O(N^3\times \#Z)$. Algorithm \ref{alg:opt} can be implemented with complexity $O(N^3\times \#Z)$. The flow graph has $O(N+\#Z)$ vertices and $O(N\times \#Z)$ edges. Dinic's algorithm has complexity $O(\#\mathrm{Vertices}^2\#\mathrm{Edges})$ hence in our case it is $O(N^3\times \#Z)$.
Algorithm \ref{alg:mini} can be implemented with complexity $O(N^3\# Z)$ under \eqref{hyp:A} and $O(N^3 \#Z \#V)$ under \eqref{hyp:B}. Algorithm \ref{alg:mini} can be implented with complexity $O(N^3\# Z)$ under \eqref{hyp:A} and $O(N^3 \#Z \#V)$ under \eqref{hyp:B}.
The graph $G_T$ has $O(N)$ vertices and $O(N\times \#Z)$ edges under assumption \eqref{hyp:A} and respectively $O(N\times \#Z)$ vertices and $O(N\times \#V)$ edges under assumption \eqref{hyp:B}. The loop at line \ref{lin:repeat} is iterated at most $N$ times since the distance between $T$ and $T'$ decreases at every iteration. Bellman-Ford algorithm has complexity $O(\#\mathrm{Vertices}\#\mathrm{Edges})$, which in our case amounts to $O(N^2\# Z)$ under \eqref{hyp:A} and $O(N^2 \#Z \#V)$ under \eqref{hyp:B}. The graph $G_T$ has $O(N)$ vertices and $O(N\times \#Z)$ edges under assumption \eqref{hyp:A} and respectively $O(N\times \#Z)$ vertices and $O(N\times \#V)$ edges under assumption \eqref{hyp:B}. The loop at line \ref{lin:repeat} is iterated at most $N$ times since the distance between $T$ and $T'$ decreases at every iteration. Bellman-Ford algorithm has complexity $O(\#\mathrm{Vertices}\#\mathrm{Edges})$, which in our case amounts to $O(N^2\# Z)$ under \eqref{hyp:A} and $O(N^2 \#Z \#V)$ under \eqref{hyp:B}.
\begin{algorithm} \begin{algorithm}
@ -637,7 +637,7 @@ We try to maximize $s^*$ defined in \eqref{eq:optimal}. So we can compute the op
\subsection{Computation of a candidate assignment} \subsection{Computation of a candidate assignment}
To compute a candidate assignment (that does not optimize zone spreading nor distance to a previous assignment yet), we can use the following flow problem. To compute a candidate assignment (that does not optimize zone spreading nor distance to a previous assignment yet), we can use the folowing flow problem.
Define the oriented weighted graph $(X,E)$. The set of vertices $X$ contains the source $\mathbf{s}$, the sink $\mathbf{t}$, vertices Define the oriented weighted graph $(X,E)$. The set of vertices $X$ contains the source $\mathbf{s}$, the sink $\mathbf{t}$, vertices
$\mathbf{x}_p, \mathbf{u}^+_p, \mathbf{u}^-_p$ for every partition $p$, vertices $\mathbf{y}_{p,z}$ for every partition $p$ and zone $z$, and vertices $\mathbf{z}_v$ for every node $v$. $\mathbf{x}_p, \mathbf{u}^+_p, \mathbf{u}^-_p$ for every partition $p$, vertices $\mathbf{y}_{p,z}$ for every partition $p$ and zone $z$, and vertices $\mathbf{z}_v$ for every node $v$.
@ -680,14 +680,14 @@ Given the flow $f$, let $G_f=(X',E_f)$ be the multi-graph where $X' = X\setminus
\end{itemize} \end{itemize}
To summarize, arcs are oriented left to right if they correspond to a presence of flow in $f$, and right to left if they correspond to an absence of flow. They are positively weighted if we want them to stay at their current state, and negatively if we want them to switch. Let us compute the weight of such graph. To summarize, arcs are oriented left to right if they correspond to a presence of flow in $f$, and right to left if they correspond to an absence of flow. They are positively weighted if we want them to stay at their current state, and negatively if we want them to switch. Let us compute the weight of such graph.
\begin{multiline*} \begin{multline*}
w(G_f) = \sum_{e\in E_f} w(e_f) \\ w(G_f) = \sum_{e\in E_f} w(e_f) \\
= =
(\alpha - \beta -\gamma) N_1 + (\alpha +\beta - \gamma) N_2 + (\alpha+\beta+\gamma) N_3 (\alpha - \beta -\gamma) N_1 + (\alpha +\beta - \gamma) N_2 + (\alpha+\beta+\gamma) N_3
\\ + \\ +
\#V\times N - 4 \sum_p 3-\#(T_p\cap T'_p) \\ \#V\times N - 4 \sum_p 3-\#(T_p\cap T'_p) \\
=(\#V-12+\alpha-\beta-\gamma)\times N + 4Q_V + 2\beta N_2 + 2(\beta+\gamma) N_3 \\ =(\#V-12+\alpha-\beta-\gamma)\times N + 4Q_V + 2\beta N_2 + 2(\beta+\gamma) N_3 \\
\end{multiline*} \end{multline*}
As for the mode 3-strict, one can check that the difference of two such graphs corresponding to the same $(n_v)$ is always eulerian. Hence we can navigate in this class with the same greedy algorithm that discovers positive cycles and flips them. As for the mode 3-strict, one can check that the difference of two such graphs corresponding to the same $(n_v)$ is always eulerian. Hence we can navigate in this class with the same greedy algorithm that discovers positive cycles and flips them.

View file

@ -1,17 +0,0 @@
*
!*.txt
!*.md
!assets
!.gitignore
!*.svg
!*.png
!*.jpg
!*.tex
!Makefile
!.gitignore
!assets/*.drawio.pdf
!talk.pdf

View file

@ -1,19 +0,0 @@
ASSETS=../assets/lattice/lattice1.pdf_tex \
../assets/lattice/lattice2.pdf_tex \
../assets/lattice/lattice3.pdf_tex \
../assets/lattice/lattice4.pdf_tex \
../assets/lattice/lattice5.pdf_tex \
../assets/lattice/lattice6.pdf_tex \
../assets/lattice/lattice7.pdf_tex \
../assets/lattice/lattice8.pdf_tex \
../assets/logos/deuxfleurs.pdf \
../assets/timeline-22-24.pdf
talk.pdf: talk.tex $(ASSETS)
pdflatex talk.tex
%.pdf: %.svg
inkscape -D -z --file=$^ --export-pdf=$@
%.pdf_tex: %.svg
inkscape -D -z --file=$^ --export-pdf=$@ --export-latex

Binary file not shown.

View file

@ -1,702 +0,0 @@
\nonstopmode
\documentclass[aspectratio=169,xcolor={svgnames}]{beamer}
\usepackage[utf8]{inputenc}
% \usepackage[frenchb]{babel}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{breqn}
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
\usepackage{import}
\usepackage{adjustbox}
\usepackage[absolute,overlay]{textpos}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
\useoutertheme{infolines}
\setbeamertemplate{headline}{}
\beamertemplatenavigationsymbolsempty
\definecolor{TitleOrange}{RGB}{255,137,0}
\setbeamercolor{title}{fg=TitleOrange}
\setbeamercolor{frametitle}{fg=TitleOrange}
\definecolor{ListOrange}{RGB}{255,145,5}
\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}
\definecolor{verygrey}{RGB}{70,70,70}
\setbeamercolor{normal text}{fg=verygrey}
\usepackage{tabu}
\usepackage{multicol}
\usepackage{vwcol}
\usepackage{stmaryrd}
\usepackage{graphicx}
\usepackage[normalem]{ulem}
\AtBeginSection[]{
\begin{frame}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
\usebeamerfont{title}\insertsectionhead\par%
\end{beamercolorbox}
\vfill
\end{frame}
}
\title{Garage, an S3 backend as reliable as possible}
\author{Garage Authors}
\date{JoSy S3, 2025-10-08}
\begin{document}
\begin{frame}
\centering
\includegraphics[width=.3\linewidth]{../../sticker/Garage.png}
\vspace{1em}
{\large\bf Garage, an S3 backend as reliable as possible}
\vspace{1em}
\url{https://garagehq.deuxfleurs.fr/}\\
\url{mailto:garagehq@deuxfleurs.fr}\\
\texttt{\#garage:deuxfleurs.fr} on Matrix
\end{frame}
\section{Meet Garage}
\begin{frame}
\frametitle{A non-profit initiative}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{../assets/logos/deuxfleurs.pdf}
\end{column}
\begin{column}{.8\textwidth}
\textbf{Part of a degrowth initiative}\\
Garage has been created at Deuxfleurs where we experiment running Internet services without datacenter on commodity and refurbished hardware.
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{../assets/community.png}
\end{column}
\begin{column}{.8\textwidth}
\textbf{Developed by a community}\\
{\small Some recent contributors: Arthur C, Charles H, dongdigua, Etienne L, Jonah A, Julien K, Lapineige, MagicRR, Milas B, Niklas M, RockWolf, Schwitzd, trinity-1686a, Xavier S, babykart, Baptiste J, eddster2309, James O'C, Joker9944, Maximilien R, Renjaya RZ, Yureka...}
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{../assets/logos/AGPLv3_Logo.png}
\end{column}
\begin{column}{.8\textwidth}
\textbf{Owned by nobody, open-core is impossible, zero VC money}\\
AGPL + no Contributor License Agreement = Garage ownership spreads among hundredth of contributors.
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Getting support for Garage}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{../assets/alex.jpg}
\end{column}
\begin{column}{.4\textwidth}
\textbf{Alex Auvolat}\\
PhD; co-founder of Deuxfleurs\\
Garage maintainer, Freelance
\end{column}
\begin{column}{.3\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{../assets/support.png}
\end{column}
\begin{column}{.1\textwidth}
~
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{../assets/quentin.jpg}
\end{column}
\begin{column}{.4\textwidth}
\textbf{Quentin Dufour}\\
PhD; co-founder of Deuxfleurs\\
Garage contributor, Freelance
\end{column}
\begin{column}{.4\textwidth}
For support requests, write at: \\
\url{garagehq@deuxfleurs.fr}
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{../assets/armael.jpg}
\end{column}
\begin{column}{.4\textwidth}
\textbf{Armaël Guéneau}\\
PhD; member of Deuxfleurs\\
Garage contributor, Freelance
\end{column}
\begin{column}{.4\textwidth}
Eligible: email support, architecture design, specific feature development, etc.
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Our initial goal}
\centering
\Large
Being a self-sovereign community to be free of our degrowth choice
$\big\downarrow$
As web citizens, datacenters are big black boxes. \\
We want to leave them to autonoumously manage our servers.
$\big\downarrow$
We want reliable services without relying on dedicated hardware or places.
\end{frame}
\begin{frame}
\frametitle{Building a resilient system with cheap stuff}
\only<1,4-7>{
\begin{itemize}
\item \textcolor<5->{gray}{Commodity hardware (e.g. old desktop PCs)\\
\vspace{.5em}
\visible<4->{{\footnotesize (can die at any time)}}}
\vspace{1.5em}
\item<5-> \textcolor<7->{gray}{Regular Internet (e.g. FTTB, FTTH) and power grid connections\\
\vspace{.5em}
\visible<6->{{\footnotesize (can be unavailable randomly)}}}
\vspace{1.5em}
\item<7-> \textbf{Geographical redundancy} (multi-site replication)
\end{itemize}
}
\only<2>{
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/neptune.jpg}
\end{center}
}
\only<3>{
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/atuin.jpg}
\end{center}
}
\only<8>{
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/inframap_jdll2023.pdf}
\end{center}
}
\end{frame}
\begin{frame}
\frametitle{Object storage: a crucial component}
\begin{center}
\includegraphics[height=6em]{../assets/logos/Amazon-S3.jpg}
\hspace{3em}
\visible<2->{\includegraphics[height=5em]{../assets/logos/minio.png}}
\hspace{3em}
\visible<3>{\includegraphics[height=6em]{../../logo/garage_hires_crop.png}}
\end{center}
\vspace{1em}
S3: a de-facto standard, many compatible applications
\vspace{1em}
\visible<2->{MinIO is self-hostable but not suited for geo-distributed deployments}
\vspace{1em}
\visible<3->{\textbf{Garage is a self-hosted drop-in replacement for the Amazon S3 object store}}
\end{frame}
\begin{frame}
\frametitle{CRDTs / weak consistency instead of consensus}
\underline{Internally, Garage uses only CRDTs} (conflict-free replicated data types)
\vspace{2em}
Why not Raft, Paxos, ...? Issues of consensus algorithms:
\vspace{1em}
\begin{itemize}
\item<2-> \textbf{Software complexity}
\vspace{1em}
\item<3-> \textbf{Performance issues:}
\vspace{.5em}
\begin{itemize}
\item<4-> The leader is a \textbf{bottleneck} for all requests\\
\vspace{.5em}
\item<5-> \textbf{Sensitive to higher latency} between nodes
\vspace{.5em}
\item<6-> \textbf{Takes time to reconverge} when disrupted (e.g. node going down)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The data model of object storage}
Object storage is basically a \textbf{key-value store}:
\vspace{.5em}
{\scriptsize
\begin{center}
\begin{tabular}{|l|p{7cm}|}
\hline
\textbf{Key: file path + name} & \textbf{Value: file data + metadata} \\
\hline
\hline
\texttt{index.html} &
\texttt{Content-Type: text/html; charset=utf-8} \newline
\texttt{Content-Length: 24929} \newline
\texttt{<binary blob>} \\
\hline
\texttt{img/logo.svg} &
\texttt{Content-Type: text/svg+xml} \newline
\texttt{Content-Length: 13429} \newline
\texttt{<binary blob>} \\
\hline
\texttt{download/index.html} &
\texttt{Content-Type: text/html; charset=utf-8} \newline
\texttt{Content-Length: 26563} \newline
\texttt{<binary blob>} \\
\hline
\end{tabular}
\end{center}
}
\vspace{1em}
\begin{itemize}
\item<2> Maps well to CRDT data types
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Performance gains in practice}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/perf/endpoint_latency_0.7_0.8_minio.png}
\end{center}
\end{frame}
% ======================================== OPERATING
% ======================================== OPERATING
% ======================================== OPERATING
\section{Production clusters}
\begin{frame}
\frametitle{Deployment kinds}
\includegraphics[width=.9\linewidth]{../assets/cluster_kind.png}
\vspace{1em}
\end{frame}
\begin{frame}
\frametitle{How big they are?}
\includegraphics[width=.9\linewidth]{../assets/cluster_size.png}
\vspace{1em}
\textit{"Petabyte storage setup for a video site. Nginx as CDN in-front using garage-s3-website feature. Each storage node has ~64TB storage with raid10, no replication within garage. 25gbit nic. haproxy to loadbalance across 5 nodes. mostly reads with very few writes."}
\vspace{1em}
\textit{"We currently manage 7 Garage nodes, 28TB total storage, 6M blocks for 3M objects and 4TB of object data. We have been running Garage in production for 2.5 years."}
\end{frame}
\begin{frame}
\frametitle{Operating Garage}
\begin{center}
\only<1-2>{
\includegraphics[width=.9\linewidth]{../assets/screenshots/garage_status_0.10.png}
\\\vspace{1em}
\visible<2>{\includegraphics[width=.9\linewidth]{../assets/screenshots/garage_status_unhealthy_0.10.png}}
}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Garage's architecture}
\begin{center}
\only<1>{\includegraphics[width=.45\linewidth]{../assets/garage.drawio.pdf}}%
\only<2>{\includegraphics[width=.6\linewidth]{../assets/garage_sync.drawio.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Digging deeper}
\begin{center}
\only<1>{\includegraphics[width=.9\linewidth]{../assets/screenshots/garage_stats_0.10.png}}
\only<2>{\includegraphics[width=.5\linewidth]{../assets/screenshots/garage_worker_list_0.10.png}}
\only<3>{\includegraphics[width=.6\linewidth]{../assets/screenshots/garage_worker_param_0.10.png}}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Potential limitations and bottlenecks}
\begin{itemize}
\item Global:
\begin{itemize}
\item Max. $\sim$100 nodes per cluster (excluding gateways)
\end{itemize}
\vspace{1em}
\item Metadata:
\begin{itemize}
\item One big bucket = bottleneck, object list on 3 nodes only
\end{itemize}
\vspace{1em}
\item Block manager:
\begin{itemize}
\item Lots of small files on disk
\item Processing the resync queue can be slow
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Deployment advice for very large clusters}
\begin{itemize}
\item Metadata storage:
\begin{itemize}
\item ZFS mirror (x2) on fast NVMe
\item Use LMDB storage engine
\end{itemize}
\vspace{.5em}
\item Data block storage:
\begin{itemize}
\item Use Garage's native multi-HDD support
\item XFS on individual drives
\item Increase block size (1MB $\to$ 10MB, requires more RAM and good networking)
\item Tune \texttt{resync-tranquility} and \texttt{resync-worker-count} dynamically
\end{itemize}
\vspace{.5em}
\item Other :
\begin{itemize}
\item Split data over several buckets
\item Use less than 100 storage nodes
\item Use gateway nodes
\end{itemize}
\vspace{.5em}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Focus on Deuxfleurs}
Host institutional websites, partnership with a web agency.
Matrix media backend.
Plan to use it as an email backend for an internally developed email server.
\end{frame}
% ======================================== TIMELINE
% ======================================== TIMELINE
% ======================================== TIMELINE
\section{Recent developments}
% ====================== v0.7.0 ===============================
\begin{frame}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/tl.drawio.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{April 2022 - Garage v0.7.0}
Focus on \underline{observability and ecosystem integration}
\vspace{2em}
\begin{itemize}
\item \textbf{Monitoring:} metrics and traces, using OpenTelemetry
\vspace{1em}
\item Replication modes with 1 or 2 copies / weaker consistency
\vspace{1em}
\item Kubernetes integration for node discovery
\vspace{1em}
\item Admin API (v0.7.2)
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Metrics (Prometheus + Grafana)}
\begin{center}
\includegraphics[width=.9\linewidth]{../assets/screenshots/grafana_dashboard.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Traces (Jaeger)}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/screenshots/jaeger_listobjects.png}
\end{center}
\end{frame}
% ====================== v0.8.0 ===============================
\begin{frame}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/tl.drawio.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{November 2022 - Garage v0.8.0}
Focus on \underline{performance}
\vspace{2em}
\begin{itemize}
\item \textbf{Alternative metadata DB engines} (LMDB, Sqlite)
\vspace{1em}
\item \textbf{Performance improvements:} block streaming, various optimizations...
\vspace{1em}
\item Bucket quotas (max size, max \#objects)
\vspace{1em}
\item Quality of life improvements, observability, etc.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{About metadata DB engines}
\textbf{Issues with Sled:}
\vspace{1em}
\begin{itemize}
\item Huge files on disk
\vspace{.5em}
\item Unpredictable performance, especially on HDD
\vspace{.5em}
\item API limitations
\vspace{.5em}
\item Not actively maintained
\end{itemize}
\vspace{2em}
\textbf{LMDB:} very stable, good performance, file size is reasonable\\
\textbf{Sqlite} also available as a second choice
\vspace{1em}
Sled will be removed in Garage v1.0
\end{frame}
\begin{frame}
\frametitle{DB engine performance comparison}
\begin{center}
\includegraphics[width=.6\linewidth]{../assets/perf/db_engine.png}
\end{center}
NB: Sqlite was slow due to synchronous mode, now configurable
\end{frame}
\begin{frame}
\frametitle{Block streaming}
\begin{center}
\only<1>{\includegraphics[width=.8\linewidth]{../assets/schema-streaming-1.png}}
\only<2>{\includegraphics[width=.8\linewidth]{../assets/schema-streaming-2.png}}
\end{center}
\end{frame}
\begin{frame}
\frametitle{TTFB benchmark}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/perf/ttfb.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Throughput benchmark}
\begin{center}
\includegraphics[width=.7\linewidth]{../assets/perf/io-0.7-0.8-minio.png}
\end{center}
\end{frame}
% ====================== v0.9.0 ===============================
\begin{frame}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/tl.drawio.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{October 2023 - Garage v0.9.0}
Focus on \underline{streamlining \& usability}
\vspace{2em}
\begin{itemize}
\item Support multiple HDDs per node
\vspace{1em}
\item S3 compatibility:
\vspace{1em}
\begin{itemize}
\item support basic lifecycle configurations
\vspace{.5em}
\item allow for multipart upload part retries
\end{itemize}
\vspace{1em}
\item LMDB by default, deprecation of Sled
\vspace{1em}
\item New layout computation algorithm
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Layout computation}
\begin{overprint}
\onslide<1>
\begin{center}
\includegraphics[width=\linewidth, trim=0 0 0 -4cm]{../assets/screenshots/garage_status_0.9_prod_zonehl.png}
\end{center}
\onslide<2>
\begin{center}
\includegraphics[width=.7\linewidth]{../assets/map.png}
\end{center}
\end{overprint}
\vspace{1em}
Garage stores replicas on different zones when possible
\end{frame}
\begin{frame}
\frametitle{What a "layout" is}
\textbf{A layout is a precomputed index table:}
\vspace{1em}
{\footnotesize
\begin{center}
\begin{tabular}{|l|l|l|l|}
\hline
\textbf{Partition} & \textbf{Node 1} & \textbf{Node 2} & \textbf{Node 3} \\
\hline
\hline
Partition 0 & df-ymk (bespin) & Abricot (scorpio) & Courgette (neptune) \\
\hline
Partition 1 & Ananas (scorpio) & Courgette (neptune) & df-ykl (bespin) \\
\hline
Partition 2 & df-ymf (bespin) & Celeri (neptune) & Abricot (scorpio) \\
\hline
\hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ \\
\hline
Partition 255 & Concombre (neptune) & df-ykl (bespin) & Abricot (scorpio) \\
\hline
\end{tabular}
\end{center}
}
\vspace{2em}
\visible<2->{
The index table is built centrally using an optimal algorithm,\\
then propagated to all nodes
}
\vspace{1em}
\visible<3->{
\footnotesize
Oulamara, M., \& Auvolat, A. (2023). \emph{An algorithm for geo-distributed and redundant storage in Garage}.\\ arXiv preprint arXiv:2302.13798.
}
\end{frame}
% ====================== v1.0.0 ===============================
\begin{frame}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/tl.drawio.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{April 2024 - Garage v1.0.0}
Focus on \underline{consistency, security \& stability}
\vspace{2em}
\begin{itemize}
\item Fix consistency issues when reshuffling data (Jepsen testing)
\vspace{1em}
\item \textbf{Security audit} by Radically Open Security
\vspace{1em}
\item Misc. S3 features (SSE-C, checksums, ...) and compatibility fixes
\end{itemize}
\end{frame}
% ====================== v2.0.0 ===============================
\begin{frame}
\begin{center}
\includegraphics[width=.8\linewidth]{../assets/tl.drawio.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Garage v2.0.0}
Focus on \underline{}
\vspace{2em}
\begin{itemize}
\item TODO
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Currently funding...}
\textit{...}
\end{frame}
\begin{frame}
\frametitle{We run community surveys}
\begin{center}
\includegraphics[width=.6\linewidth]{../assets/survey_requested_features.png}
\end{center}
\end{frame}
% ======================================== END
% ======================================== END
% ======================================== END
\begin{frame}
\frametitle{Where to find us}
\begin{center}
\includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\
\vspace{-1em}
\url{https://garagehq.deuxfleurs.fr/}\\
\url{mailto:garagehq@deuxfleurs.fr}\\
\texttt{\#garage:deuxfleurs.fr} on Matrix
\vspace{1.5em}
\includegraphics[width=.06\linewidth]{../assets/logos/rust_logo.png}
\includegraphics[width=.13\linewidth]{../assets/logos/AGPLv3_Logo.png}
\end{center}
\end{frame}
\end{document}
%% vim: set ts=4 sw=4 tw=0 noet spelllang=en :

View file

@ -1,18 +0,0 @@
*
!*.txt
!*.md
!assets
!.gitignore
!*.svg
!*.png
!*.jpg
!*.tex
!Makefile
!.gitignore
!assets/*.drawio.pdf
talk.{nav,out,snm,toc,aux,log}
!talk.pdf

View file

@ -1,3 +0,0 @@
talk.pdf: talk.tex
pdflatex talk.tex

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 297 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 240 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

View file

@ -1,330 +0,0 @@
%\nonstopmode
\documentclass[aspectratio=169]{beamer}
\usepackage[utf8]{inputenc}
% \usepackage[frenchb]{babel}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{breqn}
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
\beamertemplatenavigationsymbolsempty
\definecolor{TitleOrange}{RGB}{255,137,0}
\setbeamercolor{title}{fg=TitleOrange}
\setbeamercolor{frametitle}{fg=TitleOrange}
\definecolor{ListOrange}{RGB}{255,145,5}
\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}
\definecolor{verygrey}{RGB}{70,70,70}
\setbeamercolor{normal text}{fg=verygrey}
\usepackage{tabu}
\usepackage{multicol}
\usepackage{vwcol}
\usepackage{stmaryrd}
\usepackage{graphicx}
\usepackage[normalem]{ulem}
\title{Garage Object Storage: 2.0 update and best practices}
\subtitle{a new storage platform for self-hosted geo-distributed clusters}
\author{Maximilien Richer, Deuxfleurs}
\date{FOSDEM '26}
\begin{document}
\begin{frame}
\centering
\includegraphics[width=.3\linewidth]{../../sticker/Garage.pdf}
\vspace{1em}
{\large\bf Maximilien Richer, Deuxfleurs}
\vspace{1em}
\url{https://garagehq.deuxfleurs.fr/}
Matrix channel: \texttt{\#garage:deuxfleurs.fr}
\end{frame}
\begin{frame}
\frametitle{Our objective at Deuxfleurs}
\begin{center}
French association promoting digital sovereignty and privacy\\
through self-hosting hosting \textbf{as an alternative to large cloud providers}
\end{center}
\vspace{2em}
\vspace{2em}
\begin{center}
\textbf{This requires \underline{resilience}}\\
{\footnotesize (we want good uptime/availability with low supervision)}
\end{center}
\end{frame}
\begin{frame}
\frametitle{But what is Garage, exactly?}
\textbf{Garage is a self-hosted drop-in replacement for the Amazon S3 object store}\\
\vspace{.5em}
that implements resilience through geographical redundancy on commodity hardware
\begin{center}
\includegraphics[width=.8\linewidth]{assets/garageuses.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{What makes Garage different?}
\textbf{Coordination-free:}
\vspace{2em}
\begin{itemize}
\item No Raft or Paxos
\vspace{1em}
\item Internal data types are CRDTs
\vspace{1em}
\item All nodes are equivalent (no master/leader/index node)
\end{itemize}
\vspace{2em}
$\to$ less sensitive to higher latencies between nodes
\end{frame}
\begin{frame}
\frametitle{What makes Garage different?}
\begin{center}
TODO update with latest garage and minio versions
\includegraphics[width=.9\linewidth]{assets/endpoint-latency-dc.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{What makes Garage different?}
\textbf{Consistency model:}
\vspace{2em}
\begin{itemize}
\item Not ACID (not required by S3 spec) / not linearizable
\vspace{1em}
\item \textbf{Read-after-write consistency}\\
{\footnotesize (stronger than eventual consistency)}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{What makes Garage different?}
\textbf{Location-aware:}
\vspace{2em}
\begin{center}
\includegraphics[width=\linewidth]{assets/location-aware.png}
\end{center}
\vspace{2em}
Garage replicates data on different zones when possible
\end{frame}
\begin{frame}
\frametitle{What makes Garage different?}
\begin{center}
\includegraphics[width=.8\linewidth]{assets/map.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{An ever-increasing compatibility list}
\begin{center}
\includegraphics[width=.7\linewidth]{assets/compatibility.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Version history and roadmap}
\begin{itemize}
\item v0.3: initial beta release (2021)
\item v0.7: first released version (2022)
\item v1.0: stable release (2024), will be deprecated in summer 2026 1y after v2.0 was released
\item v2.0: stable release (2025)
\begin{itemize}
\item new HTTP admin API
\item reworded replication configuration: \texttt{replication\_mode} changed to \texttt{replication\_factor} \& \texttt{consistency\_policy}
\end{itemize}
\item
\end{itemize}
\begin{center}
v3.0: TBA may include versionning support, tag on buckets and objets, retention policies...
\end{center}
\end{frame}
\begin{frame}
\centering
{\large\bf Best practices for Garage deployments}
\end{frame}
\begin{frame}
\frametitle{Things you should know}
\begin{itemize}
\item no TLS support, use your own proxy
\item no anonymous access (use website endpoint)
\item you need to assign roles to nodes manually
\item the replication factor cannot be changed easily
\item the default region is \texttt{garage} and not \texttt{us-east-1}
\item only use the \texttt{degraded} consistency policy for data recovery!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{What hardware should I use?}
\begin{itemize}
\item do NOT use network file storage (NFS, SMB, etc.) for \texttt{\/metadata}
\item get a \textbf{write-intensive flash disk} for the \texttt{\/metadata} folder
\item set \texttt{metadata} on a RAID1 if possible, with a COW filesystem (e.g. Btrfs or ZFS)
\item get large HDDs for the \texttt{\/data} folder
\item use XFS and garage multi-hdd mode for best performance
\item you can use a RAID for data but you'll leave a lot of performance on the table
\end{itemize}
\center\textit{Garage doesn't require a powerful CPUs nor much RAM, but your performance will depend on your disks!}
\end{frame}
\begin{frame}
\frametitle{Picking a metadata engine}
All files-to-block mappings are stored in the metadata engine, including bucket and object metadata. Files below 3KB are stored directly in the metadata engine.
\vspace{1em}
\begin{itemize}
\item Sled: removed in 1.x, move to SQLite or LMDB
\item \textbf{SQLite}: safer, \textbf{recommended for small clusters and single-node}
\item LMDB: faster, recommended for large clusters with metadata redundancy
\begin{itemize}
\item Warning: limited to 480 bytes per key with LMDB (not an issue in practice)
\end{itemize}
\item Fjall: experimental but promising rust-native engine, test it and let us know!
\end{itemize}
\center{Metadata engine can be set node per node, and changed later with a migration tool}
\end{frame}
\begin{frame}
\frametitle{Single-node deployment}
\begin{itemize}
\item garage was initially designed for multi-node deployments
\item single-node deployments are possible, but you will lose resilience
\item \textbf{If you do please ensure you have backups} (especially for metadata)
\begin{itemize}
\item set up \texttt{metadata\_auto\_snapshot\_interval}
\end{itemize}
\item use sqlite to minimize data loss risks on powercuts
\item or use a UPS!
\end{itemize}
\vspace{1em}
Use \texttt{github.com/bikeshedder/garage-single-node} for an easy single-node setup!
\end{frame}
\begin{frame}
\frametitle{Multi-node deployment}
\begin{itemize}
\item try to have geo-distributed zones
\item multiple nodes per zone to add more capacity
\item at least 3 zones for best resilience
\item keep in mind your available network and IO bandwidth
\item \textbf{Rebalancing a cluster can take multiple weeks with large HDDs and slow network links}
\item monitor your nodes with Prometheus + Grafana
\end{itemize}
\center{Deuxfleurs has been running a 9TB (3TB usable) 8-nodes cluster (3+3+2) over retail fiber (10ms site-to-site latency) for close to 5 years now. We heard there are petabyte clusters out there!}
\end{frame}
\begin{frame}
\frametitle{Deploying and administering garage at scale}
\begin{itemize}
\item deploy with your favorite tool (eg. Ansible) and system manager (eg. systemd)
\item or use Docker, docker-compose, Kubernetes or Nomad
\item Kubernetes and Consul are supported for node-to-node discovery
\begin{itemize}
\item you'll still have to manage the layout manually!
\end{itemize}
\item use gateway nodes to optimize network usage
\item ajust \texttt{resync-tranquility} and \texttt{scrub-tranquility} to your ressources
\end{itemize}
\center{Kubernetes storage controller: \texttt{github.com/bmarinov/garage-storage-controller}}
\end{frame}
\begin{frame}
\frametitle{Community UI available!}
\begin{center}
\includegraphics[width=0.9\linewidth]{assets/community-ui.png}\\
\vspace{-1em}
\url{https://github.com/khairul169/garage-webui}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Official Embedded UI comming later this year!}
\begin{center}
\includegraphics[width=0.9\linewidth]{assets/Garage Web Admin - Dashboard@2x.png}\\
\vspace{-1em}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Official Embedded UI comming this year!}
\begin{center}
\includegraphics[width=0.9\linewidth]{assets/Garage Web Admin - Bucket details page@2x.png}\\
\vspace{-1em}
\end{center}
\end{frame}
\begin{frame}
\frametitle{How to make sense of garage metrics?}
\begin{center}
\includegraphics[width=0.7\linewidth]{assets/garage-stats.png}\\
\vspace{-1em}
\end{center}
\end{frame}
\begin{frame}
\frametitle{What if things go wrong?}
\begin{itemize}
\item set logs to debug with \texttt{RUST_LOG=garage_api_common=debug,garage_api_s3=debug,garage=debug}
\item auth issues: check your reverse proxy configuration
\item slow resync: check your network and disk IO usage, and \texttt{resync-tranquility} worker configuration
\item big LMDB database: stop garage and compact with \texttt{mdb\_copy -c}
\item ask us on matrix \texttt{\#garage:deuxfleurs.fr} or open an issue on git.deuxfleurs.fr!
\begin{itemize}
\item provide the output of \texttt{garage status}, \texttt{garage stats} and relevant metrics and logs
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Moving from Minio}
\begin{itemize}
\item list your buckets and your keys
\item create buckets and keys on the garage cluster
\begin{itemize}
\item you cannot import non-garage keys yet, patch to come soon!
\end{itemize}
\item loop over buckets, copy with rclone
\begin{itemize}
\item see doc \url{https://garagehq.deuxfleurs.fr/documentation/connect/cli/}
\end{itemize}
\item blog post coming soon!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Demo time!}
\end{frame}
\begin{frame}
\frametitle{Get Garage now!}
\begin{center}
\includegraphics[width=.3\linewidth]{../../logo/garage_hires.png}\\
\vspace{-1em}
\url{https://garagehq.deuxfleurs.fr/}\\
Matrix channel: \texttt{\#garage:deuxfleurs.fr}
\vspace{2em}
\includegraphics[width=.09\linewidth]{assets/rust_logo.png}
\includegraphics[width=.2\linewidth]{assets/AGPLv3_Logo.png}
\end{center}
\end{frame}
\end{document}
%% vim: set ts=4 sw=4 tw=0 noet spelllang=fr :

View file

@ -1,6 +0,0 @@
Compile with:
```
typst compile talk.typ --root ../..
```

File diff suppressed because one or more lines are too long

View file

@ -1,374 +0,0 @@
#import "@preview/slydst:0.1.5": *
// some display rules
#set par(spacing: 2em)
#set list(spacing: 1em)
#show link: set text(font: "DejaVu Sans Mono", size: 9pt)
// some functions to customize styles
#let vhcenter(content) = [
#v(1fr)
#align(center)[#content]
#v(1fr)
]
#let imgcenter(..args) = vhcenter(image(..args))
#let mytable(..args) = {
show table.cell: set text(size: 9pt)
set table(stroke: 0.5pt + black)
grid(
columns: (1cm, 1fr, 1cm),
[], table(..args), []
)
}
// actual slides
#show: slides.with(
//title: "Garage",
authors: ("Alex Auvolat",),
date: "2026-06-03",
layout: "large",
//ratio: 16/9,
ratio: 4/3,
title-color: rgb("#ff9329"),
)
#title-slide[
#align(center)[
#image("../../sticker/Garage.png", width: 20%)
#v(1em)
*An introduction to Garage*\
Alex Auvolat, Deuxfleurs
#v(1em)
#link("https://garagehq.deuxfleurs.fr/")\
Matrix channel: `#garage:deuxfleurs.fr`
]
]
== A non-profit initiative
#grid(
columns: (2fr, 8fr),
[#v(2em)],[],
[
#image("../assets/logos/deuxfleurs.svg", width: 50%)
],
[
*Part of a degrowth initiative*\
Garage has been created at Deuxfleurs where we experiment running Internet services without datacenter on commodity and refurbished hardware.
],
[#v(2em)],[],
[
#image("../assets/community.png", width: 50%)
],
[
*Developed by a community*\
#text(size: 0.8em)[Some recent contributors: Arthur C, Charles H, dongdigua, Etienne L, Jonah A, Julien K, Lapineige, MagicRR, Milas B, Niklas M, RockWolf, Schwitzd, trinity-1686a, Xavier S, babykart, Baptiste J, eddster2309, James O'C, Joker9944, Maximilien R, Renjaya RZ, Yureka...]
],
[#v(3em)],[],
[
#image("../assets/logos/AGPLv3_Logo.png", width: 50%)
],
[
*Owned by nobody, open-core is impossible, zero VC money*\
AGPL + no Contributor License Agreement = Garage ownership spreads among hundredth of contributors.
]
)
== Our initial objective at Deuxfleurs
#v(4em)
#align(center)[
#text(weight: "bold")[
Promote self-hosting and small-scale hosting\
as an alternative to large cloud providers
]
]
#v(2em)
Why is it hard?
#v(2em)
#align(center)[
#underline[Resilience]\
#text(size: 0.8em)[we want good uptime/availability with low supervision]
]
== Our very low-tech infrastructure
//== Building a resilient system with cheap stuff
//
#v(4em)
#[
#set list(spacing: 2em)
- Commodity hardware (e.g. old desktop PCs)\
#text(size: 0.8em)[(can die at any time)]
- Regular Internet (e.g. FTTB, FTTH) and power grid connections\
#text(size: 0.8em)[(can be unavailable randomly)]
- *Geographical redundancy* (multi-site replication)
]
#pagebreak()
#imgcenter("../assets/neptune.jpg", width: 100%)
#pagebreak()
#imgcenter("../assets/atuin.jpg", width: 100%)
#pagebreak()
#imgcenter("../assets/inframap_jdll2023.pdf", width: 100%)
== Object storage: a crucial component
#vhcenter[
#grid(
columns: (3fr, 3fr, 3fr),
[#image("../assets/logos/Amazon-S3.jpg", height: 6em)],
[#image("../assets/logos/minio.png", height: 5em)],
[#image("../../logo/garage_hires_crop.png", height: 6em)]
)
]
S3: a de-facto standard, many compatible applications
MinIO: not suited for geo-distributed deployments, becoming closed source
*Garage is a self-hosted drop-in replacement for the Amazon S3 object store*
#v(2em)
== Principle 1: based on CRDTs
#v(1cm)
#underline[Internally, Garage uses only CRDTs] (conflict-free replicated data types)
Why not Raft, Paxos, ...? Issues of consensus algorithms:
- *Software complexity*
- *Performance issues:*
- The leader is a *bottleneck* for all requests
- *Sensitive to higher latency* between nodes
- *Takes time to reconverge* when disrupted (e.g. node going down)
== The data model of object storage
#[
#set list(spacing: 1em)
Object storage is basically a *key-value store*:
#mytable(
columns: (2fr, 5fr),
align: left,
[*Key: file path + name*], [*Value: file data + metadata*],
[`index.html`], text(size: 8pt)[
`Content-Type: text/html; charset=utf-8`\
`Content-Length: 24929`\
`<binary blob>`
],
[`img/logo.svg`], text(size: 8pt)[
`Content-Type: text/svg+xml`\
`Content-Length: 13429`\
`<binary blob>`
],
[`download/index.html`], text(size: 8pt)[
`Content-Type: text/html; charset=utf-8`\
`Content-Length: 26563`\
`<binary blob>`
]
)
*Consistency model:*
- Not ACID (not required by S3 spec) / not linearizable
- *Read-after-write consistency*\
#text(size: 0.8em)[(stronger than eventual consistency)]
]
== Principle 2: geo-distributed data model
#imgcenter("../assets/map.png", width: 90%)
Garage stores replicas on different zones when possible
== Zone-aware cluster configuration
#imgcenter("../assets/screenshots/garage_status_0.9_prod_zonehl.png", width: 100%)
== Performance evaluation
#imgcenter("../assets/perf/endpoint_latency_0.7_0.8_minio.png", width: 100%)
#pagebreak()
#imgcenter("../assets/perf/ttfb.png", width: 100%)
#pagebreak()
#imgcenter("../assets/perf/io-0.7-0.8-minio.png", width: 100%)
= Deploying Garage
== Garage in the wild
#imgcenter("../assets/cluster_kind.png", width: 100%)
== Size of known deployments
#imgcenter("../assets/cluster_size.png", width: 100%)
_"Petabyte storage setup for a video site. Nginx as CDN in-front using garage-s3-website feature. Each storage node has ~64TB storage with raid10, no replication within garage. 25gbit nic. haproxy to loadbalance across 5 nodes. mostly reads with very few writes."_
_"We currently manage 7 Garage nodes, 28TB total storage, 6M blocks for 3M objects and 4TB of object data. We have been running Garage in production for 2.5 years."_
== Setting up data and metadata storage
#vhcenter[
#mytable(
columns: (0.7fr, 1fr, 1fr),
inset: 0.8em,
align: center + horizon,
table.header[][*Metadata storage*][*Data storage*],
[*Content*],[access keys, buckets\ index of objects],[raw data blocks],
[*Size*],[\< 10\% of data\ rarely over 100GB],[replication × dataset size\ *no erasure-coding*],
[*Constraints*],[latency sensitive\ write-intensive under load],[big\ many files],
[*Ideal hardware*],[entreprise-grade SSD],[HDD],
[*Recommended redundancy*],[RAID1],[none, use disks directly\ *avoid RAID if possible*],
[*Recommended filesystem*],[ZFS, Btrfs],[XFS on invidual disks],
[*Tunables in Garage*],[database engine\
automatic snapshots],[block size\ compression],
)
]
== Chosing a replication factor
#vhcenter[
#mytable(
columns: (0.7fr, 1fr, 1.3fr),
inset: 0.8em,
align: center + horizon,
table.header[*Replication factor*][*Pro*][*Cons*],
[*1*], [easy single-node setup\ full space efficiency], [no metadata redundancy\ *vunlerable to hardware crash or data corruption*\ no high-availability],
[*2*], [redundancy\ limited storage overhead], [limited high-availability\ (read-only when one node is unavailable)],
[*3*], [high-availability setup\ best data resilience], [big storage overhead],
[*4, 5, ...*], [possible if needed], [...],
)
#v(0.5cm)
*Important note:* metadata replication == data replication\
Choose well, this cannot be changed easily!
]
== Picking a metadata engine
#vhcenter[
All files-to-block mappings are stored in the metadata engine, including bucket and object metadata. Files below 3KB are stored directly in the metadata engine.
#v(0.5cm)
#mytable(
columns: (0.7fr, 1fr, 1.3fr),
inset: 0.8em,
align: center + horizon,
table.header[*Metadata engine*][*Characteristics*][*Use case*],
[*SQlite*],[safer],[single node deployment\ small clusters\ clusters with infrequent access],
[*LMDB*],[faster\ sometimes has inexplicable corruptions],[larger clusters with metadata redundancy],
[*Fjall*],[experimental\ best of both worlds?],[help us test it!],
)
#v(0.5cm)
Metadata engine can be set node per-node, and changed later with a migration tool
]
== Avoiding common issues as soon as possible
#vhcenter[
#mytable(
columns: (1fr, 1.4fr),
inset: 0.8em,
align: center + horizon,
table.header[*Risk*][*How to avoid*],
[*Metadata corruption*\ (esp. with LMDB)],[Configure automatic snapshots with\ `metadata_auto_snapshot_interval`\ Use replication factor 2 or 3],
[*Data not well balanced between nodes*],[Avoid clusters with too many nodes\ Target: \#nodes 10 × replication_factor],
[*Performance issues with many objects in one single bucket*],[Spread your data over multiple buckets],
[*Performance issues with big objects*],[Increase `block_size` configuration parameter\ Target: object size 1000 × `block_size`,\ `block_size` 100MB],
[*Performance issues with many small objects*],[Have enough RAM to fit the entire metadata DB],
)
]
== Other things to consider during set-up
#vhcenter[
#mytable(
columns: (1fr, 1.2fr),
inset: 0.8em,
align: center + horizon,
[*Tools for cluster deployment*],[Ansible + systemd\ NixOS\ Kubernetes or Nomad with Docker],
[*Initial cluster setup*],[Manual layout configuration\ Read the documentation!],
[*TLS support on public endpoints*],[Add an external reverse-proxy (Nginx, ...)],
[*S3 anonymous access*],[Not implemented, use website endpoint],
[*Monitoring*],[Prometheus + Grafana for Garage metrics\ External tool to monitor HDD health],
)
]
== Monitoring with Prometheus + Grafana
#imgcenter("../2026-01-31-fosdem/assets/garage-stats.png", width: 83%)
== Common issues and their solutions
#vhcenter[
#mytable(
columns: (1fr, 1.5fr),
inset: 0.8em,
align: center + horizon,
table.header[*Problem*][*Solution*],
table.cell(rowspan: 2)[*S3 access authorization issues*],[Correctly set the `region` parameter in your S3 client\ default = `garage`, not `us-east-1`],[Check your reverse proxy configuration],
[*Debugging other API issues*],[Set `RUST_LOG=garage=debug` to investigate],
[*Resync queue fills up*],[`garage worker set -a resync-worker-count 8`\ `garage worker set -a resync-tranquility 0`],
[*LMDB database too big*],[Stop garage and compact with `mdb_copy -c`],
[*Data recovery with dead/unavailable nodes*],[Consistency mode `degraded` allows to read data from an unhealthy cluster. *Do not use it for regular operation.*],
[*Other issues*],[Ask us on matrix `#garage:deuxfleurs.fr` or open an issue on `git.deuxfleurs.fr`\
Provide the output of `garage status`, `garage stats` and relevant metrics and logs],
)
]
== Future developments
#imgcenter("../assets/survey_requested_features.png", width: 80%)
#pagebreak()
#imgcenter("../2026-01-31-fosdem/assets/Garage Web Admin - Dashboard@2x.png", width: 100%)
#pagebreak()
#imgcenter("../2026-01-31-fosdem/assets/Garage Web Admin - Bucket details page@2x.png", width: 100%)
== Where to find us
#align(center)[
#v(1fr)
#image("../../logo/garage_hires.png", width: 25%)
#link("https://garagehq.deuxfleurs.fr/")\
#link("mailto:garagehq@deuxfleurs.fr")\
`#garage:deuxfleurs.fr` on Matrix
#v(1fr)
#grid(columns: (6%,3%,13%),
image("../assets/logos/rust_logo.png"),
[],
image("../assets/logos/AGPLv3_Logo.png"),
)
]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 183 KiB

15
flake.lock generated
View file

@ -12,17 +12,16 @@
"original": { "original": {
"owner": "ipetkov", "owner": "ipetkov",
"repo": "crane", "repo": "crane",
"rev": "6fe74265bbb6d016d663b1091f015e2976c4a527",
"type": "github" "type": "github"
} }
}, },
"flake-compat": { "flake-compat": {
"locked": { "locked": {
"lastModified": 1761640442, "lastModified": 1717312683,
"narHash": "sha256-AtrEP6Jmdvrqiv4x2xa5mrtaIp3OEe8uBYCDZDS+hu8=", "narHash": "sha256-FrlieJH50AuvagamEvWMIE6D2OAnERuDboFDYAED/dE=",
"owner": "nix-community", "owner": "nix-community",
"repo": "flake-compat", "repo": "flake-compat",
"rev": "4a56054d8ffc173222d09dad23adf4ba946c8884", "rev": "38fd3954cf65ce6faf3d0d45cd26059e059f07ea",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -81,17 +80,17 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1776914043, "lastModified": 1763952169,
"narHash": "sha256-qug5r56yW1qOsjSI99l3Jm15JNT9CvS2otkXNRNtrPI=", "narHash": "sha256-+PeDBD8P+NKauH+w7eO/QWCIp8Cx4mCfWnh9sJmy9CM=",
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "2d35c4358d7de3a0e606a6e8b27925d981c01cc3", "rev": "ab726555a9a72e6dc80649809147823a813fa95b",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "2d35c4358d7de3a0e606a6e8b27925d981c01cc3", "rev": "ab726555a9a72e6dc80649809147823a813fa95b",
"type": "github" "type": "github"
} }
}, },

View file

@ -6,13 +6,12 @@
inputs.nixpkgs.url = inputs.nixpkgs.url =
"github:NixOS/nixpkgs/cfe2c7d5b5d3032862254e68c37a6576b633d632"; "github:NixOS/nixpkgs/cfe2c7d5b5d3032862254e68c37a6576b633d632";
# Rust overlay as of 2026-04-23 # Rust overlay as of 2025-11-24
inputs.rust-overlay.url = inputs.rust-overlay.url =
"github:oxalica/rust-overlay/2d35c4358d7de3a0e606a6e8b27925d981c01cc3"; "github:oxalica/rust-overlay/ab726555a9a72e6dc80649809147823a813fa95b";
inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs"; inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
# Crane as of 2025-01-24 inputs.crane.url = "github:ipetkov/crane";
inputs.crane.url = "github:ipetkov/crane/6fe74265bbb6d016d663b1091f015e2976c4a527";
inputs.flake-compat.url = "github:nix-community/flake-compat"; inputs.flake-compat.url = "github:nix-community/flake-compat";
inputs.flake-utils.url = "github:numtide/flake-utils"; inputs.flake-utils.url = "github:numtide/flake-utils";
@ -67,7 +66,7 @@
clippy = lints.garage-cargo-clippy; clippy = lints.garage-cargo-clippy;
}; };
# ---- development shell, for making native builds only ---- # ---- developpment shell, for making native builds only ----
devShells = devShells =
let let
targets = compile { targets = compile {
@ -90,17 +89,6 @@
cargo-outdated cargo-outdated
cargo-machete cargo-machete
nixpkgs-fmt nixpkgs-fmt
openssl
socat
killall
];
};
# dev shell for fuzzing
fuzz = pkgs.mkShell {
buildInputs = with pkgs; [
targets.toolchainNightly
cargo-fuzz
]; ];
}; };
}; };

4
fuzz/.gitignore vendored
View file

@ -1,4 +0,0 @@
target
corpus
artifacts
coverage

View file

@ -1,66 +0,0 @@
[package]
name = "garage-fuzz"
version = "0.0.0"
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
arbitrary = { workspace = true, features = ["derive"]}
libfuzzer-sys = { workspace = true }
garage_db.workspace = true
garage_table.workspace = true
garage_util.workspace = true
garage_model = { workspace = true, default-features = false, features = ["arbitrary"] }
[[bin]]
name = "version_crdt"
path = "fuzz_targets/version_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "mpu_crdt"
path = "fuzz_targets/mpu_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "bucket_crdt"
path = "fuzz_targets/bucket_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "block_ref_crdt"
path = "fuzz_targets/block_ref_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "admin_api_token_crdt"
path = "fuzz_targets/admin_api_token_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "key_crdt"
path = "fuzz_targets/key_crdt.rs"
test = false
doc = false
bench = false
[[bin]]
name = "bucket_alias_crdt"
path = "fuzz_targets/bucket_alias_crdt.rs"
test = false
doc = false
bench = false

View file

@ -1,11 +0,0 @@
# Fuzzing
## Setup
Install cargo fuzz: `cargo install cargo-fuzz`
## Launch
Run `cargo fuzz run <fuzz_target>` where `<fuzz_target>` is the name (without extension) of one of the `.rs` files in the `fuzz_targets` directory.
If you launch the command outside of the fuzz directory, you need to force the nightly toolchain with `cargo +nightly`.

View file

@ -1,37 +0,0 @@
#![no_main]
use garage_fuzz::check_crdt_laws;
use garage_model::admin_token_table::{AdminApiToken, AdminApiTokenParams, AdminApiTokenScope};
use garage_util::crdt;
use libfuzzer_sys::fuzz_target;
type Input = (
bool,
crdt::Lww<String>,
crdt::Lww<Option<u64>>,
crdt::Lww<AdminApiTokenScope>,
);
fn make(input: Input) -> AdminApiToken {
let (deleted, name, expiration, scope) = input;
let state = if deleted {
crdt::Deletable::Deleted
} else {
crdt::Deletable::present(AdminApiTokenParams {
created: 0,
token_hash: String::new(),
name,
expiration,
scope,
})
};
AdminApiToken {
prefix: String::new(),
state,
}
}
fuzz_target!(|inputs: (Input, Input, Input)| {
let (a, b, c) = inputs;
check_crdt_laws(make(a), make(b), make(c));
});

View file

@ -1,20 +0,0 @@
#![no_main]
use garage_fuzz::check_crdt_laws;
use garage_model::s3::block_ref_table::BlockRef;
use libfuzzer_sys::fuzz_target;
/// Build a BlockRef with a fixed block hash and version UUID so that CRDT state
/// can be compared across merge results. Only the deleted flag varies.
fn make_block_ref(deleted: bool) -> BlockRef {
BlockRef {
block: [0u8; 32].into(),
version: [0u8; 32].into(),
deleted: deleted.into(),
}
}
fuzz_target!(|inputs: (bool, bool, bool)| {
let (d1, d2, d3) = inputs;
check_crdt_laws(make_block_ref(d1), make_block_ref(d2), make_block_ref(d3));
});

Some files were not shown because too many files have changed in this diff Show more