Compare commits

...

128 commits

Author SHA1 Message Date
Alex Auvolat
b6b18427a5 use optimization level 3 and thin LTO for release builds (#1405)
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1405
Co-authored-by: Alex Auvolat <lx@deuxfleurs.fr>
Co-committed-by: Alex Auvolat <lx@deuxfleurs.fr>
2026-04-16 08:47:02 +00:00
Gauthier Zirnhelt
9987166b2b Fix the LifecycleWorker being uncooperative (#1396)
## Summary

This PR ensures that the `LifecycleWorker` yields at least once to the Tokio scheduler in between each batch of 100 objects.

## Problem being solved

I'm administrating a Garage cluster which has been experiencing timeouts on all endpoints while the lifecycle worker is running at midnight UTC : `Ping timeout` error messages and even requests eventually failing due to `Could not reach quorum ...`.

I have found that this happens while the lifecycle worker is working on a big bucket (containing millions of objects) with a lifecycle rule that applies to very few objects.
The `process_object()` function does not hit any `await`:
- `last_bucket` is always the same, so the `bucket_table` is not read asynchronously
- no transaction is made on the `object_table` because my lifecycle rule (almost) never applies to any object

The first commit in this PR adds an executable which reproduces the problem that I've been experiencing in a self-contained way : the lifecycle worker starves the Tokio scheduler so much that no other task is able to run (or very rarely).
To run it : `cargo run -p garage_model --bin lifecycle-starvation-test`.
This commit can be dropped post-review, as it's only useful to demonstrate the starvation.

The error messages completely stopped after adding the extra yield to the nodes of my cluster.
The duration of the lifecycle worker task does not appear to have changed at all from what I can see (looking at the timestamps produced either by the self-contained binary or by each of my nodes with the `Lifecycle worker finished` message).

## Note

An other potential fix would have been to force the `WorkerProcessor` to yield before re-enqueuing a busy task, but this would have affected all Garage workers even though it's only the `LifecycleWorker` being uncooperative.

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1396
Reviewed-by: Alex <lx@deuxfleurs.fr>
Co-authored-by: Gauthier Zirnhelt <gauthier.zirnhelt@insimo.fr>
Co-committed-by: Gauthier Zirnhelt <gauthier.zirnhelt@insimo.fr>
2026-04-15 09:56:24 +00:00
trinity-1686a
b72b090a09 fix silent write errors (#1358)
fix #1355

some write errors are not reported when calling write_all. That's notably the case of ENOSPC on small buffers (1MiB).
on ext4, the error is catched when calling flush(). This is hopefully the case on most local filesystems, though afaik this assumption doesn't hold for NFS

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1358
Co-authored-by: trinity-1686a <trinity@deuxfleurs.fr>
Co-committed-by: trinity-1686a <trinity@deuxfleurs.fr>
2026-02-21 07:21:24 +00:00
Armael
8551aefed4 Fix: correctly parse CORS website configuration with no rules (#1320)
When sending a website config with an empty list of CORS rules, garage currently incorrectly refuses it with error message "Invalid XML: missing field `CORSRule`".
This fix the issue by following the documentation of quick-xml related to serde field parameters for this specific scenario:  https://docs.rs/quick-xml/latest/quick_xml/de/#sequences-xsall-and-xssequence-xml-schema-types .

(I've based this PR on main-v1 because we want it for deuxfleurs' deployment.)

Co-authored-by: Armaël Guéneau <armael.gueneau@ens-lyon.org>
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1320
Co-authored-by: Armael <armael@noreply.localhost>
Co-committed-by: Armael <armael@noreply.localhost>
2026-02-07 13:11:20 +00:00
Alex Auvolat
47bf5d9fb0 bump version to v1.3.1 2026-01-24 13:01:27 +01:00
Alex Auvolat
5df37dae5e update cargo dependencies in main-v1 (#1299)
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1299
Co-authored-by: Alex Auvolat <lx@deuxfleurs.fr>
Co-committed-by: Alex Auvolat <lx@deuxfleurs.fr>
2026-01-24 11:59:01 +00:00
Alex
44af0bdab3 Merge pull request 'Backport #1283 and #1290 to main-v1' (#1297) from backports-v1 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1297
2026-01-24 11:34:28 +00:00
rmoff
a7d6620e18 Fix typo in error message 2026-01-24 12:21:45 +01:00
Joe Anderson
8eb12755e4 Allow bucket to be missing from presigned post params 2026-01-24 12:21:25 +01:00
maximilien
c685a2cbaf Merge pull request 'Update doc/book/cookbook/binary-packages.md' (#1269) from nmstoker/garage:main-v1 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1269
2025-12-21 21:12:15 +00:00
maximilien
969f42a970 Merge pull request 'feat: add service annotations' (#1264) from deimosfr/garage:feat/add_helm_svc_annotations into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1264
Reviewed-by: maximilien <git@mricher.fr>
2025-12-21 21:00:00 +00:00
nmstoker
424d4f8d4d Update doc/book/cookbook/binary-packages.md
Correct the Arch Linux link as garage is now available in the official repos under extra, and no longer in AUR.
2025-12-20 13:16:38 +00:00
Pierre Mavro
bf5290036f
feat: add service annotations 2025-12-18 18:12:22 +01:00
Alex
4efc8bac07 Merge pull request 'Add the parameter, which replaces . This is to accommodate different storage media such as HDD and NVMe.' (#1251) from perrynzhou/garage:dev into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1251
Reviewed-by: Alex <lx@deuxfleurs.fr>
2025-12-17 10:05:49 +00:00
perrynzhou
f3dcc39903 Merge branch 'main-v1' into dev 2025-12-17 10:05:19 +00:00
maximilien
43e02920c2 Merge pull request 'docs: fix typo in doc/book/cookbook/kubernetes.md' (#1259) from simonpasquier/garage:fix-typo into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1259
Reviewed-by: maximilien <me@mricher.fr>
2025-12-17 07:09:59 +00:00
Simon Pasquier
dcc2fe4ac5
docs: fix typo in doc/book/cookbook/kubernetes.md 2025-12-16 10:16:44 +01:00
perrynzhou@gmail.com
e3a5ec6ef6 rename put_blocks_max_parallel to block_max_concurrent_writes_per_request and update configuration.md 2025-12-12 07:09:38 +08:00
perrynzhou@gmail.com
4d124e1c76 Add the parameter, which replaces . This is to accommodate different storage media such as HDD and NVMe. 2025-12-10 06:43:51 +08:00
Alex
d769a7be5d Merge pull request 'Update rust toolchain to 1.91.0' (#1233) from toolchain-update into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1233
2025-11-25 09:58:17 +00:00
Alex Auvolat
511cf0c6ec disable awscli checksumming in ci scripts
required because garage.deuxfleurs.fr is still running v1.x
2025-11-24 18:37:34 +01:00
Alex Auvolat
95693d45b2 run cargo fmt as a nix derivation 2025-11-24 18:09:53 +01:00
Alex Auvolat
ca296477f3 disable checksums in aws cli (todo: revert in main-v2) 2025-11-24 17:58:57 +01:00
Alex Auvolat
ca3b4a050d update nixos image used in woodpecker ci 2025-11-24 17:35:51 +01:00
Alex Auvolat
a057ab23ea Update rust toolchain 2025-11-24 11:09:46 +01:00
Alex
58bc65b9a8 Merge pull request 'migrate to this error, garage-v1' (#1218) from thiserror into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1218
Reviewed-by: Alex <lx@deuxfleurs.fr>
2025-11-12 08:05:32 +00:00
trinity-1686a
ac851d6dee fmt 2025-11-01 18:04:54 +01:00
trinity-1686a
eac2aa6fe4 Merge pull request 'fix: default config path changed for alpine binary' (#1204) from berndsen-io/garage:fix-alpine-docs into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1204
Reviewed-by: trinity-1686a <trinity@deuxfleurs.fr>
2025-11-01 16:43:32 +00:00
trinity-1686a
1e0201ada2 Merge pull request 'Update link to signature v2.' (#1211) from teo-tsirpanis/garage:sigv2-docs into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1211
Reviewed-by: trinity-1686a <trinity@deuxfleurs.fr>
2025-11-01 16:43:05 +00:00
trinity-1686a
82297371bf migrate to this error
it doesn't generate a bazillion warning at compile time
2025-11-01 17:20:39 +01:00
teo-tsirpanis
174f4f01a8 Update link to signature v2. 2025-10-26 15:54:08 +00:00
fgberry
1aac7b4875 chore: spacing 2025-10-24 11:25:33 +02:00
fgberry
b43c58cbe5 fix: default config path changed for alpine binary 2025-10-24 11:22:32 +02:00
Alex
9481ac428e Merge pull request 'sigv4: don't enforce x-amz-content-sha256 to be in signed headers list (fix #770)' (#1195) from fix-770 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1195
2025-10-14 09:34:35 +00:00
Alex Auvolat
1c29d04cc5 sigv4: don't enforce x-amz-content-sha256 to be in signed headers list (fix #770)
From the following page:
https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html

> In both cases, because the x-amz-content-sha256 header value is already
> part of your HashedPayload, you are not required to include the
> x-amz-content-sha256 header as a canonical header.
2025-10-14 11:18:25 +02:00
Alex
b48a8eaa1f Merge pull request 'properly handle precondition time equal to object time' (#1193) from precondition-ms into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1193
2025-10-10 19:41:06 +00:00
trinity-1686a
42fd8583bd properly handle precondition time equal to object time 2025-10-08 17:54:22 +02:00
Alex
236af3a958 Merge pull request 'Garage v1.3.0' (#1166) from rel-v1.3.0 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1166
2025-09-14 21:26:21 +00:00
Alex Auvolat
4b1fdbef55 bump version to v1.3.0 2025-09-14 21:36:33 +02:00
Alex Auvolat
0f1b488be0 fix rust warnings 2025-09-14 21:25:37 +02:00
Alex
0bbf63ee0e Merge pull request 'update rusqlite and snapshot using VACUUM INTO' (#1164) from update-rusqlite into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1164
2025-09-14 18:28:01 +00:00
Alex
879d941d7b Merge pull request 'add garage repair clear-resync-queue (fix #1151)' (#1165) from clear-resync-queue into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1165
2025-09-14 17:50:41 +00:00
Alex Auvolat
d726cf0299 add garage repair clear-resync-queue (fix #1151) 2025-09-14 19:34:44 +02:00
Alex
0c7aeab6f8 Merge pull request 'garage_db: fix error handling logic (fix #1138)' (#1163) from fix-1138 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1163
2025-09-14 17:26:08 +00:00
Alex Auvolat
5687fc0375 update rusqlite and snapshot using VACUUM INTO 2025-09-14 19:22:36 +02:00
Alex
97f1e9ab52 Merge pull request 'Add Plakar documentation (backup tools)' (#1119) from Lapineige/garage:Plakar_support into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1119
2025-09-14 16:08:36 +00:00
Lapineige
60b1d78b56 Add Plakar documentation 2025-09-14 18:07:49 +02:00
Alex Auvolat
4c895a7186 garage_db: fix error handling logic (fix #1138) 2025-09-14 18:03:31 +02:00
Alex
c3b5cbf212 Merge pull request 'fix panic when cluster_layout cannot be saved (fix #1150)' (#1158) from fix-1150 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1158
2025-09-13 15:58:52 +00:00
Alex
57a467b5c0 Merge pull request 'Block manager: limit simultaneous block reads from disk' (#1157) from block-max-simultaneous-reads into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1157
2025-09-13 15:53:24 +00:00
Alex Auvolat
6cf6db5c61 fix panic when cluster_layout cannot be saved (fix #1150) 2025-09-13 17:49:25 +02:00
Alex Auvolat
d5a57e3e13 block: read_block: don't add not found blocks to resync queue 2025-09-13 17:38:23 +02:00
Alex Auvolat
5cf354acb4 block: maximum number of simultaneous reads 2025-09-13 17:38:06 +02:00
Alex
2b007ddea3 Merge pull request 'woodpecker: require the nix=enabled label' (#1152) from woodpecker-nix-flag into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1152
2025-09-04 09:10:10 +00:00
Alex Auvolat
c8599a8636 woodpecker: require the nix=enabled label 2025-09-04 11:06:46 +02:00
Alex
0b901bf291 Merge pull request 'garage_db: reduce frequency of sqlite snapshot progress log (fix #1129)' (#1146) from fix-1129 into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1146
2025-08-27 22:26:32 +00:00
Alex Auvolat
c8c20d6f47 garage_db: reduce frequency of sqlite snapshot progress log (fix #1129) 2025-08-28 00:07:35 +02:00
Alex
e5db610e4c Merge pull request 'K2V client: allow custom HTTP client' (#731) from k2v/shared_http_client into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/731
Reviewed-by: maximilien <me@mricher.fr>
2025-08-27 21:21:09 +00:00
Alex
65c6f8adea Merge pull request 'garage_db: refactor open function' (#1142) from factor-db-open into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1142
2025-08-27 21:10:59 +00:00
Alex Auvolat
54b9bf02a3 garage_db: refactor open function 2025-08-27 23:03:09 +02:00
Alex
469153233f Merge pull request 'garage_db: rename len to approximate_len as it is used for stats only' (#1141) from db-approximate-len into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1141
2025-08-27 20:44:50 +00:00
Alex Auvolat
90bba5889a garage_db: rename len to approximate_len as it is used for stats only 2025-08-27 21:23:45 +02:00
Alex
a64b567d43 Merge pull request 'Add experimental support for Fjall DB engine' (#906) from withings/garage:feat/fjall-db-engine into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/906
2025-08-27 19:09:40 +00:00
Alex Auvolat
6ea86db8cd document fjall db engine, remove flakey metadata_fsync implementation 2025-08-27 20:22:41 +02:00
Alex Auvolat
aa69c06f2b fix potential race condition and naming bug in fjall adapter 2025-08-27 20:22:38 +02:00
Alex Auvolat
a6c6c44310 nix: build and test fjall feature 2025-08-27 18:54:42 +02:00
Julien Kritter
96d7713915 Add support for an LSM-tree-based backend with Fjall 2025-08-27 18:54:34 +02:00
Alex
d64498c3d3 Merge pull request 'log access keys' (#1122) from 1686a/log-access-key into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1122
2025-08-27 16:18:16 +00:00
trinity-1686a
b340599e68 log access keys 2025-08-03 15:30:56 +02:00
Alex
5448012b27 Merge pull request 'Pixelfed_support' (#1118) from Lapineige/garage:Pixelfed_support into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1118
2025-08-02 15:03:57 +00:00
Alex
ce34d11a65 Merge pull request 'don't die on SIGHUP' (#1121) from 1686a/handle-sighup into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1121
2025-08-02 14:53:58 +00:00
Alex
8cb7623ebd Merge pull request 'handle ECONNABORTED' (#1120) from 1686a/handle-econnaborted into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1120
2025-08-02 14:53:45 +00:00
trinity-1686a
5469c95877 handle ECONNABORTED 2025-08-02 13:14:01 +02:00
trinity-1686a
f930c6f643 don't die on SIGHUP 2025-08-02 13:09:33 +02:00
Alex
afcb22bf16 Merge pull request 'Fix typo in peertube buckets names' (#1117) from Lapineige/garage:main into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1117
2025-08-02 08:27:01 +00:00
Lapineige
cc29a40d51 Actualiser doc/book/connect/apps/index.md 2025-08-01 21:35:15 +00:00
Lapineige
0f3f180c3e Merge branch 'main-v1' into main 2025-08-01 21:33:58 +00:00
Lapineige
70cf6004ae Fix typo in peertube buckets names 2025-08-01 21:32:59 +00:00
Alex
c7571ff89b Merge pull request 'Fix some unsoundness in lmdb adapter unsafe' (#1099) from krtab/garage:fix_some_ub into main-v1
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1099
2025-07-31 19:38:23 +00:00
Arthur Carcano
1b42919bf7 Fix some unsoundness in lmdb adapter unsafe 2025-07-25 23:33:51 +02:00
Alex
3f4ab3a4a3 Merge pull request 'Garage v1.2.0' (#1068) from rel-1.2.0 into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1068
2025-06-13 16:12:29 +00:00
Alex Auvolat
3a4afc04a9 cargo: update crossbeam-channel to avoid yanked version 2025-06-13 17:22:47 +02:00
Alex Auvolat
fbf03e9378 bump version to v1.2.0 2025-06-13 14:21:28 +02:00
Alex
9eb07d4c7b Merge pull request 'cli: mark block refs as deleted in garage block purge (fix #1055)' (#1067) from fix-1055 into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1067
2025-06-13 11:53:41 +00:00
Alex Auvolat
85ee4f5d8c cli: mark block refs as deleted in garage block purge 2025-06-13 13:52:02 +02:00
Alex
328072d122 Merge pull request 'put web error in a basic webpage' (#1064) from trinity-1686a/garage:1686a/non-xml-web-error into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1064
2025-06-12 06:06:38 +00:00
trinity-1686a
26bc807905 put web error in a basic webpage
before, it was a plain string, with an xml content type

this caused browsers to show very ugly and meaningless pages
2025-06-10 22:23:06 +02:00
Alex
a9f5f242b2 Merge pull request 'feat: add log to journald feature' (#1056) from ragazenta/garage:feat/tracing-journald into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1056
2025-06-10 18:38:23 +00:00
maximilien
ae98abca5c Merge pull request 'Add eddster2309/ansible-role-garage as deployment option' (#1057) from eddster2309/garage:main into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1057
Reviewed-by: maximilien <me@mricher.fr>
2025-06-08 11:56:31 +00:00
eddster2309
adfa44ad70 Add architecture support 2025-06-03 09:22:43 +00:00
eddster2309
47143b88ad Add eddster2309/ansible-role-garage as deployment option 2025-06-03 09:15:57 +00:00
Renjaya Raga Zenta
8843aa92fa
feat: add log to journald feature
The systemd-journald is used in most major Linux distros that use systemd.
This enables logging using the systemd-journald native protocol, instead
of just writing to stderr.
2025-06-02 11:55:27 +07:00
Alex
b601b3e46d Merge pull request 'documentation: Minor doc change to clarify why the capacity does not matter and how the zone name is used' (#1051) from ddxv/garage:docs-quick-start into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1051
2025-05-30 16:26:19 +00:00
Alex
a19d2f16e2 Merge pull request 'api: s3: implement get bucket acl' (#1045) from ragazenta/garage:feat/dummy-acl into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1045
2025-05-30 16:25:04 +00:00
trinity-1686a
fc8fc60f6d emit internal error when we detect race condition (#1053) (fix #1050)
i went with a `500`/`InternalError`/`Please try again.` because that is something i've seen AWS S3 report while developing other software, and i'm not convinced all clients would understand a 409 conflict properly (GET don't usually conflict)

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1053
Co-authored-by: trinity-1686a <trinity@deuxfleurs.fr>
Co-committed-by: trinity-1686a <trinity@deuxfleurs.fr>
2025-05-30 16:24:12 +00:00
Alex
77079a1498 Merge pull request '[1.1.x] speed up UploadPartCopy' (#1047) from yuka/garage:uploadpartcopy-v1 into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1047
2025-05-30 16:22:35 +00:00
James O'Claire
2a4f729b57 Minor doc change to clarify why the capacity does not matter and how the zone name is used 2025-05-28 09:49:50 +08:00
Renjaya Raga Zenta
1b042e379e
api: s3: implement get bucket acl 2025-05-26 09:43:15 +07:00
Yureka
ffbce0f689 speed up UploadPartCopy
(cherry picked from commit db54bf96c7)
2025-05-23 20:36:32 +02:00
Alex
37e5621dde Merge pull request 'documentation updates' (#1046) from doc-updates into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1046
2025-05-23 15:05:19 +00:00
Alex Auvolat
6529ff379a documentation updates 2025-05-23 17:02:23 +02:00
Alex
a8d73682a4 Merge pull request 'more resilience to inconsistent alias states' (#989) from fix-bucket-aliases into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/989
2025-05-22 17:41:42 +00:00
Alex Auvolat
8654eb19bf implement repair procedure to fix inconsistent bucket aliases 2025-05-22 19:34:38 +02:00
maximilien
54ea412188 Merge pull request 'Add kubernetes CRD' (#994) from babykart/garage:k8s-crd into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/994
2025-05-22 17:15:56 +00:00
Alex Auvolat
2ade8c86f6 more resilience to inconsistent alias states 2025-05-22 19:12:05 +02:00
babykart
b15e2cbb6c Update Kubernetes cookbook
Signed-off-by: babykart <babykart@gmail.com>
2025-05-22 17:11:14 +00:00
babykart
0fd1b7342b Add Kubernetes CRD and the related kustomization
Signed-off-by: babykart <babykart@gmail.com>
2025-05-22 17:11:14 +00:00
Alex
be16bc7a05 Merge pull request 'Fix behavior of CopyObject wrt x-amz-website-redirect-location' (#1037) from Armael/garage:copy-website-redirect into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1037
2025-05-22 13:57:28 +00:00
Alex
bfaa1ca6b7 Merge pull request 'api: lifecycle: 404 if missing lifecycle config' (#1043) from ragazenta/garage:no-lifecycle-response into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1043
2025-05-22 13:56:52 +00:00
Alex
de8eeab4ad Merge pull request 'optionally support puny code (fix #273)' (#1042) from trinity-1686a/garage:1686a/punnycode into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1042
2025-05-22 12:49:10 +00:00
Renjaya Raga Zenta
ae3f7ee76c
api: lifecycle: 404 if missing lifecycle config 2025-05-22 19:33:54 +07:00
trinity-1686a
2dc3a6dbbe document allow_punycode configuration option 2025-05-22 14:08:06 +02:00
Armaël Guéneau
c6bc3f229b Fix behavior of CopyObject wrt x-amz-website-redirect-location 2025-05-22 14:03:11 +02:00
trinity-1686a
bba9202f31 add test for punycode 2025-05-19 20:36:03 +02:00
trinity-1686a
a605a80806 support punnycode in api/web endpoint 2025-05-19 18:11:55 +02:00
trinity-1686a
539af12d21 allow punnycode in bucket name 2025-05-19 18:07:04 +02:00
Alex
a2a9e3cec4 Merge pull request 'doc: Add systemd example to increase file descriptors limit' (#1023) from baptiste/garage:systemd_openfiles into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1023
2025-05-09 10:34:07 +00:00
Baptiste Jonglez
14274bc13c doc: Add systemd example to increase file descriptors limit 2025-05-08 10:27:53 +02:00
Alex
bf4691d98a Merge pull request 'Fix #1007: hint that region can be changed depending on cluster config' (#1015) from garage-1007-update-region-in-doc into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1015
2025-04-24 07:41:32 +00:00
Maximilien R.
ad151cb1dc Fix #1007: hint that region can be changed depending on cluster config 2025-04-23 23:30:16 +02:00
babykart
3c20984a08 helm-chart: Cosmetic changes
Signed-off-by: babykart <babykart@gmail.com>
2025-04-21 10:04:53 +00:00
babykart
e6e4e051a1 helm-chart: Add metadata_auto_snapshot_interval
Signed-off-by: babykart <babykart@gmail.com>
2025-04-21 10:04:53 +00:00
babykart
9b38cba6f3 helm-chart: Add livenessProbe & readinessProbe
Signed-off-by: babykart <babykart@gmail.com>
2025-04-21 10:04:53 +00:00
Alex
4ef954d176 Merge pull request 'Fix Docker run volume mappings' (#1012) from Zoob/garage:main into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1012
2025-04-19 20:05:16 +00:00
Zoob
02498a93d0 doc: fix Docker run volume mappings 2025-04-19 18:46:36 +00:00
Alex
4caad5425d Merge pull request 'metadata: Create compact LMDB snapshots' (#1008) from baptiste/garage:lmdb_compact_snapshot into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1008
2025-04-17 19:26:32 +00:00
Baptiste Jonglez
9ec3f8cc3c metadata: Create compact LMDB snapshots
See #1006

LMDB files never shrink, so we can end up with a large database that
contains a smaller amount of actual data.

Compacting the snapshots is an easy win: it will write faster to disk,
take less space, and if needed you can reimport an already-compacted
snapshot as the main database.
2025-04-12 23:18:50 +02:00
Quentin Dufour
8b35a946d9
Allow external HTTP client 2024-02-23 17:09:47 +01:00
101 changed files with 3337 additions and 1455 deletions

View file

@ -1,3 +1,6 @@
labels:
nix: "enabled"
when: when:
event: event:
- push - push
@ -9,27 +12,32 @@ when:
steps: steps:
- name: check formatting - name: check formatting
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-shell --attr devShell --run "cargo fmt -- --check" - nix-build -j4 --attr flakePackages.fmt
- name: build - name: build
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-build -j4 --attr flakePackages.dev - nix-build -j4 --attr flakePackages.dev
- name: unit + func tests (lmdb) - name: unit + func tests (lmdb)
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-build -j4 --attr flakePackages.tests-lmdb - nix-build -j4 --attr flakePackages.tests-lmdb
- name: unit + func tests (sqlite) - name: unit + func tests (sqlite)
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-build -j4 --attr flakePackages.tests-sqlite - nix-build -j4 --attr flakePackages.tests-sqlite
- name: unit + func tests (fjall)
image: nixpkgs/nix:nixos-24.05
commands:
- nix-build -j4 --attr flakePackages.tests-fjall
- name: integration tests - name: integration tests
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-build -j4 --attr flakePackages.dev - nix-build -j4 --attr flakePackages.dev
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false) - nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)

View file

@ -1,3 +1,6 @@
labels:
nix: "enabled"
when: when:
event: event:
- deployment - deployment
@ -8,7 +11,7 @@ depends_on:
steps: steps:
- name: refresh-index - name: refresh-index
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
environment: environment:
AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id from_secret: garagehq_aws_access_key_id
@ -19,7 +22,7 @@ steps:
- nix-shell --attr ci --run "refresh_index" - nix-shell --attr ci --run "refresh_index"
- name: multiarch-docker - name: multiarch-docker
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
environment: environment:
DOCKER_AUTH: DOCKER_AUTH:
from_secret: docker_auth from_secret: docker_auth

View file

@ -1,3 +1,6 @@
labels:
nix: "enabled"
when: when:
event: event:
- deployment - deployment
@ -16,17 +19,17 @@ matrix:
steps: steps:
- name: build - name: build
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-build --attr releasePackages.${ARCH} --argstr git_version ${CI_COMMIT_TAG:-$CI_COMMIT_SHA} - nix-build --attr releasePackages.${ARCH} --argstr git_version ${CI_COMMIT_TAG:-$CI_COMMIT_SHA}
- name: check is static binary - name: check is static binary
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-shell --attr ci --run "./script/not-dynamic.sh result/bin/garage" - nix-shell --attr ci --run "./script/not-dynamic.sh result/bin/garage"
- name: integration tests - name: integration tests
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false) - nix-shell --attr ci --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
when: when:
@ -36,7 +39,7 @@ steps:
ARCH: i386 ARCH: i386
- name: upgrade tests - name: upgrade tests
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
commands: commands:
- nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false) - nix-shell --attr ci --run "./script/test-upgrade.sh v0.8.4 x86_64-unknown-linux-musl" || (cat /tmp/garage.log; false)
when: when:
@ -44,7 +47,7 @@ steps:
ARCH: amd64 ARCH: amd64
- name: push static binary - name: push static binary
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
environment: environment:
TARGET: "${TARGET}" TARGET: "${TARGET}"
AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID:
@ -55,7 +58,7 @@ steps:
- nix-shell --attr ci --run "to_s3" - nix-shell --attr ci --run "to_s3"
- name: docker build and publish - name: docker build and publish
image: nixpkgs/nix:nixos-22.05 image: nixpkgs/nix:nixos-24.05
environment: environment:
DOCKER_PLATFORM: "linux/${ARCH}" DOCKER_PLATFORM: "linux/${ARCH}"
CONTAINER_NAME: "dxflrs/${ARCH}_garage" CONTAINER_NAME: "dxflrs/${ARCH}_garage"

1971
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -24,18 +24,18 @@ default-members = ["src/garage"]
# Internal Garage crates # Internal Garage crates
format_table = { version = "0.1.1", path = "src/format-table" } format_table = { version = "0.1.1", path = "src/format-table" }
garage_api_common = { version = "1.1.0", path = "src/api/common" } garage_api_common = { version = "1.3.1", path = "src/api/common" }
garage_api_admin = { version = "1.1.0", path = "src/api/admin" } garage_api_admin = { version = "1.3.1", path = "src/api/admin" }
garage_api_s3 = { version = "1.1.0", path = "src/api/s3" } garage_api_s3 = { version = "1.3.1", path = "src/api/s3" }
garage_api_k2v = { version = "1.1.0", path = "src/api/k2v" } garage_api_k2v = { version = "1.3.1", path = "src/api/k2v" }
garage_block = { version = "1.1.0", path = "src/block" } garage_block = { version = "1.3.1", path = "src/block" }
garage_db = { version = "1.1.0", path = "src/db", default-features = false } garage_db = { version = "1.3.1", path = "src/db", default-features = false }
garage_model = { version = "1.1.0", path = "src/model", default-features = false } garage_model = { version = "1.3.1", path = "src/model", default-features = false }
garage_net = { version = "1.1.0", path = "src/net" } garage_net = { version = "1.3.1", path = "src/net" }
garage_rpc = { version = "1.1.0", path = "src/rpc" } garage_rpc = { version = "1.3.1", path = "src/rpc" }
garage_table = { version = "1.1.0", path = "src/table" } garage_table = { version = "1.3.1", path = "src/table" }
garage_util = { version = "1.1.0", path = "src/util" } garage_util = { version = "1.3.1", path = "src/util" }
garage_web = { version = "1.1.0", path = "src/web" } garage_web = { version = "1.3.1", path = "src/web" }
k2v-client = { version = "0.0.4", path = "src/k2v-client" } k2v-client = { version = "0.0.4", path = "src/k2v-client" }
# External crates from crates.io # External crates from crates.io
@ -52,13 +52,11 @@ chrono = "0.4"
crc32fast = "1.4" crc32fast = "1.4"
crc32c = "0.6" crc32c = "0.6"
crypto-common = "0.1" crypto-common = "0.1"
err-derive = "0.3"
gethostname = "0.4" gethostname = "0.4"
git-version = "0.3.4" git-version = "0.3.4"
hex = "0.4" hex = "0.4"
hexdump = "0.1" hexdump = "0.1"
hmac = "0.12" hmac = "0.12"
idna = "0.5"
itertools = "0.12" itertools = "0.12"
ipnet = "2.9.0" ipnet = "2.9.0"
lazy_static = "1.4" lazy_static = "1.4"
@ -66,6 +64,7 @@ md-5 = "0.10"
mktemp = "0.5" mktemp = "0.5"
nix = { version = "0.29", default-features = false, features = ["fs"] } nix = { version = "0.29", default-features = false, features = ["fs"] }
nom = "7.1" nom = "7.1"
parking_lot = "0.12"
parse_duration = "2.1" parse_duration = "2.1"
pin-project = "1.0.12" pin-project = "1.0.12"
pnet_datalink = "0.34" pnet_datalink = "0.34"
@ -84,12 +83,14 @@ pretty_env_logger = "0.5"
structopt = { version = "0.3", default-features = false } structopt = { version = "0.3", default-features = false }
syslog-tracing = "0.3" syslog-tracing = "0.3"
tracing = "0.1" tracing = "0.1"
tracing-journald = "0.3.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
heed = { version = "0.11", default-features = false, features = ["lmdb"] } heed = { version = "0.11", default-features = false, features = ["lmdb"] }
rusqlite = "0.31.0" rusqlite = "0.37"
r2d2 = "0.8" r2d2 = "0.8"
r2d2_sqlite = "0.24" r2d2_sqlite = "0.31"
fjall = "2.4"
async-compression = { version = "0.4", features = ["tokio", "zstd"] } async-compression = { version = "0.4", features = ["tokio", "zstd"] }
zstd = { version = "0.13", default-features = false } zstd = { version = "0.13", default-features = false }
@ -135,7 +136,7 @@ prometheus = "0.13"
aws-sigv4 = { version = "1.1", default-features = false } aws-sigv4 = { version = "1.1", default-features = false }
hyper-rustls = { version = "0.26", default-features = false, features = ["http1", "http2", "ring", "rustls-native-certs"] } hyper-rustls = { version = "0.26", default-features = false, features = ["http1", "http2", "ring", "rustls-native-certs"] }
log = "0.4" log = "0.4"
thiserror = "1.0" thiserror = "2.0"
# ---- used only as build / dev dependencies ---- # ---- used only as build / dev dependencies ----
assert-json-diff = "2.0" assert-json-diff = "2.0"
@ -145,12 +146,8 @@ aws-smithy-runtime = { version = "1.8", default-features = false, features = ["t
aws-sdk-config = { version = "1.62", default-features = false } aws-sdk-config = { version = "1.62", default-features = false }
aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] } aws-sdk-s3 = { version = "1.79", default-features = false, features = ["rt-tokio"] }
[profile.dev]
#lto = "thin" # disabled for now, adds 2-4 min to each CI build
lto = "off"
[profile.release] [profile.release]
lto = true lto = "thin"
codegen-units = 1 codegen-units = 16
opt-level = "s" opt-level = 3
strip = true strip = "debuginfo"

View file

@ -12,7 +12,7 @@ In this section, we cover the following web applications:
| [Mastodon](#mastodon) | ✅ | Natively supported | | [Mastodon](#mastodon) | ✅ | Natively supported |
| [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` | | [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` |
| [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` | | [ejabberd](#ejabberd) | ✅ | `mod_s3_upload` |
| [Pixelfed](#pixelfed) | ❓ | Not yet tested | | [Pixelfed](#pixelfed) | ✅ | Natively supported |
| [Pleroma](#pleroma) | ❓ | Not yet tested | | [Pleroma](#pleroma) | ❓ | Not yet tested |
| [Lemmy](#lemmy) | ✅ | Supported with pict-rs | | [Lemmy](#lemmy) | ✅ | Supported with pict-rs |
| [Funkwhale](#funkwhale) | ❓ | Not yet tested | | [Funkwhale](#funkwhale) | ❓ | Not yet tested |
@ -69,7 +69,7 @@ $CONFIG = array(
'hostname' => '127.0.0.1', // Can also be a domain name, eg. garage.example.com 'hostname' => '127.0.0.1', // Can also be a domain name, eg. garage.example.com
'port' => 3900, // Put your reverse proxy port or your S3 API port 'port' => 3900, // Put your reverse proxy port or your S3 API port
'use_ssl' => false, // Set it to true if you have a TLS enabled reverse proxy 'use_ssl' => false, // Set it to true if you have a TLS enabled reverse proxy
'region' => 'garage', // Garage has only one region named "garage" 'region' => 'garage', // Garage default region is named "garage", edit according to your cluster config
'use_path_style' => true // Garage supports only path style, must be set to true 'use_path_style' => true // Garage supports only path style, must be set to true
], ],
], ],
@ -135,7 +135,7 @@ bucket but doesn't also know the secret encryption key.
*Click on the picture to zoom* *Click on the picture to zoom*
Add a new external storage. Put what you want in "folder name" (eg. "shared"). Select "Amazon S3". Keep "Access Key" for the Authentication field. Add a new external storage. Put what you want in "folder name" (eg. "shared"). Select "Amazon S3". Keep "Access Key" for the Authentication field.
In Configuration, put your bucket name (eg. nextcloud), the host (eg. 127.0.0.1), the port (eg. 3900 or 443), the region (garage). Tick the SSL box if you have put an HTTPS proxy in front of garage. You must tick the "Path access" box and you must leave the "Legacy authentication (v2)" box empty. Put your Key ID (eg. GK...) and your Secret Key in the last two input boxes. Finally click on the tick symbol on the right of your screen. In Configuration, put your bucket name (eg. nextcloud), the host (eg. 127.0.0.1), the port (eg. 3900 or 443), the region ("garage" if you use the default, or the one your configured in your `garage.toml`). Tick the SSL box if you have put an HTTPS proxy in front of garage. You must tick the "Path access" box and you must leave the "Legacy authentication (v2)" box empty. Put your Key ID (eg. GK...) and your Secret Key in the last two input boxes. Finally click on the tick symbol on the right of your screen.
Now go to your "Files" app and a new "linked folder" has appeared with the name you chose earlier (eg. "shared"). Now go to your "Files" app and a new "linked folder" has appeared with the name you chose earlier (eg. "shared").
@ -191,10 +191,10 @@ garage key create peertube-key
Keep the Key ID and the Secret key in a pad, they will be needed later. Keep the Key ID and the Secret key in a pad, they will be needed later.
We need two buckets, one for normal videos (named peertube-video) and one for webtorrent videos (named peertube-playlist). We need two buckets, one for normal videos (named peertube-videos) and one for webtorrent videos (named peertube-playlists).
```bash ```bash
garage bucket create peertube-videos garage bucket create peertube-videos
garage bucket create peertube-playlist garage bucket create peertube-playlists
``` ```
Now we allow our key to read and write on these buckets: Now we allow our key to read and write on these buckets:
@ -238,7 +238,7 @@ object_storage:
# Put localhost only if you have a garage instance running on that node # Put localhost only if you have a garage instance running on that node
endpoint: 'http://localhost:3900' # or "garage.example.com" if you have TLS on port 443 endpoint: 'http://localhost:3900' # or "garage.example.com" if you have TLS on port 443
# Garage supports only one region for now, named garage # Garage default region is named "garage", edit according to your config
region: 'garage' region: 'garage'
credentials: credentials:
@ -253,7 +253,7 @@ object_storage:
proxify_private_files: false proxify_private_files: false
streaming_playlists: streaming_playlists:
bucket_name: 'peertube-playlist' bucket_name: 'peertube-playlists'
# Keep it empty for our example # Keep it empty for our example
prefix: '' prefix: ''
@ -441,7 +441,7 @@ media_storage_providers:
store_synchronous: True # do we want to wait that the file has been written before returning? store_synchronous: True # do we want to wait that the file has been written before returning?
config: config:
bucket: matrix # the name of our bucket, we chose matrix earlier bucket: matrix # the name of our bucket, we chose matrix earlier
region_name: garage # only "garage" is supported for the region field region_name: garage # "garage" by default, edit according to your cluster config
endpoint_url: http://localhost:3900 # the path to the S3 endpoint endpoint_url: http://localhost:3900 # the path to the S3 endpoint
access_key_id: "GKxxx" # your Key ID access_key_id: "GKxxx" # your Key ID
secret_access_key: "xxxx" # your Secret Key secret_access_key: "xxxx" # your Secret Key

View file

@ -161,3 +161,49 @@ kopia repository validate-provider
You can then run all the standard kopia commands: `kopia snapshot create`, `kopia mount`... You can then run all the standard kopia commands: `kopia snapshot create`, `kopia mount`...
Everything should work out-of-the-box. Everything should work out-of-the-box.
## Plakar
Create your key and bucket on Garage server:
```bash
garage key create my-plakar-key
garage bucket create plakar-backups
garage bucket allow plakar-backups --read --write --key my-plakar-key
```
On Plakar server, add your Garage as a storage location:
```bash
plakar store add garageS3 s3://my-garage.tld/plakar-backups \
region=garage # Or as you've specified in garage.toml \
access_key=<Key ID from "garage key info my-plakar-key"> \
secret_access_key=<Secret key from "garage key info my-plakar-key">
```
Then create the repository.
```bash
plakar at @garageS3 create -plaintext # Unencrypted
# or
plakar at @garageS3 create #encrypted
```
If you encrypt your backups (Plakar default), you will need to define a strong passphrase. Do not forget to save your password safely. It will be needed to decrypt your backups.
After the repository has been created, check that everything works as expected (that might give an empty result as no file has been added yet, but no error message):
```bash
plakar at @garageS3 check
```
Now that everything is configure, you can use Garage as your backups storage. For instance sync it with a local backup storage:
```bash
$ plakar at ~/backups sync to @garageS3
```
Or list the S3 storage content:
```bash
$ plakar at @garageS3 ls
```
More information in Plakar documentation: https://www.plakar.io/docs/main/quickstart/

View file

@ -8,18 +8,18 @@ have published Ansible roles. We list them and compare them below.
## Comparison of Ansible roles ## Comparison of Ansible roles
| Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | | Feature | [ansible-role-garage](#zorun-ansible-role-garage) | [garage-docker-ansible-deploy](#moan0s-garage-docker-ansible-deploy) | [eddster ansible-role-garage](#eddster-ansible-role-garage) |
|------------------------------------|---------------------------------------------|---------------------------------------------------------------| |------------------------------------|---------------------------------------------|---------------------------------------------------------------|---------------------------------|
| **Runtime** | Systemd | Docker | | **Runtime** | Systemd | Docker | Systemd |
| **Target OS** | Any Linux | Any Linux | | **Target OS** | Any Linux | Any Linux | Any Linux |
| **Architecture** | amd64, arm64, i686 | amd64, arm64 | | **Architecture** | amd64, arm64, i686 | amd64, arm64 | arm64, arm, 386, amd64 |
| **Additional software** | None | Traefik | | **Additional software** | None | Traefik | Ngnix and Keepalived (optional) |
| **Automatic node connection** | ❌ | ✅ | | **Automatic node connection** | ❌ | ✅ | ✅ |
| **Layout management** | ❌ | ✅ | | **Layout management** | ❌ | ✅ | ✅ |
| **Manage buckets & keys** | ❌ | ✅ (basic) | | **Manage buckets & keys** | ❌ | ✅ (basic) | ✅ |
| **Allow custom Garage config** | ✅ | ❌ | | **Allow custom Garage config** | ✅ | ❌ | ❌ |
| **Facilitate Garage upgrades** | ✅ | ❌ | | **Facilitate Garage upgrades** | ✅ | ❌ | ✅ |
| **Multiple instances on one host** | ✅ | ✅ | | **Multiple instances on one host** | ✅ | ✅ | ❌ |
## zorun/ansible-role-garage ## zorun/ansible-role-garage
@ -49,3 +49,15 @@ structured DNS names, etc).
As a result, this role makes it easier to start with Garage on Ansible, As a result, this role makes it easier to start with Garage on Ansible,
but is less flexible. but is less flexible.
## eddster2309/ansible-role-garage
[Source code](https://github.com/eddster2309/ansible-role-garage), [Ansible galaxy](https://galaxy.ansible.com/ui/standalone/roles/eddster2309/garage/)
This role is a opinionated but customisable role using the official Garage
static binaries and only requires Systemd. As such it should work on any
Linux based host. It includes all the nesscary configuration to
automatically setup a clustered Garage deployment. Most Garage
configuration options are exposed through Ansible variables so while you
can't provide a custom config you can get very close. It can optionally
installed a HA nginx deployment with Keepalived.

View file

@ -15,9 +15,10 @@ Alpine Linux repositories (available since v3.17):
apk add garage apk add garage
``` ```
The default configuration file is installed to `/etc/garage.toml`. You can run The default configuration file is installed to `/etc/garage/garage.toml`. You can run
Garage using: `rc-service garage start`. If you don't specify `rpc_secret`, it Garage using: `rc-service garage start`.
will be automatically replaced with a random string on the first start.
If you don't specify `rpc_secret`, it will be automatically replaced with a random string on the first start.
Please note that this package is built without Consul discovery, Kubernetes Please note that this package is built without Consul discovery, Kubernetes
discovery, OpenTelemetry exporter, and K2V features (K2V will be enabled once discovery, OpenTelemetry exporter, and K2V features (K2V will be enabled once
@ -26,7 +27,7 @@ it's stable).
## Arch Linux ## Arch Linux
Garage is available in the [AUR](https://aur.archlinux.org/packages/garage). Garage is available in the official repositories under [extra](https://archlinux.org/packages/extra/x86_64/garage).
## FreeBSD ## FreeBSD

View file

@ -11,7 +11,7 @@ Firstly clone the repository:
```bash ```bash
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage git clone https://git.deuxfleurs.fr/Deuxfleurs/garage
cd garage/scripts/helm cd garage/script/helm
``` ```
Deploy with default options: Deploy with default options:
@ -26,6 +26,13 @@ Or deploy with custom values:
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
``` ```
If you want to manage the CustomRessourceDefinition used by garage for its `kubernetes_discovery` outside of the helm chart, add `garage.kubernetesSkipCrd: true` to your custom values and use the kustomization before deploying the helm chart:
```bash
kubectl apply -k ../k8s/crd
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
```
After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI: After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI:
```bash ```bash

View file

@ -96,14 +96,14 @@ to store 2 TB of data in total.
## Get a Docker image ## Get a Docker image
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v1.1.0`) and not the `latest` tag. We encourage you to use a fixed tag (eg. `v1.3.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v1.1.0` but it's up to you For this example, we will use the latest published version at the time of the writing which is `v1.3.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated). to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
For example: For example:
``` ```
sudo docker pull dxflrs/garage:v1.1.0 sudo docker pull dxflrs/garage:v1.3.0
``` ```
## Deploying and configuring Garage ## Deploying and configuring Garage
@ -171,7 +171,7 @@ docker run \
-v /etc/garage.toml:/etc/garage.toml \ -v /etc/garage.toml:/etc/garage.toml \
-v /var/lib/garage/meta:/var/lib/garage/meta \ -v /var/lib/garage/meta:/var/lib/garage/meta \
-v /var/lib/garage/data:/var/lib/garage/data \ -v /var/lib/garage/data:/var/lib/garage/data \
dxflrs/garage:v1.1.0 dxflrs/garage:v1.3.0
``` ```
With this command line, Garage should be started automatically at each boot. With this command line, Garage should be started automatically at each boot.
@ -185,7 +185,7 @@ If you want to use `docker-compose`, you may use the following `docker-compose.y
version: "3" version: "3"
services: services:
garage: garage:
image: dxflrs/garage:v1.1.0 image: dxflrs/garage:v1.3.0
network_mode: "host" network_mode: "host"
restart: unless-stopped restart: unless-stopped
volumes: volumes:

View file

@ -28,6 +28,7 @@ StateDirectory=garage
DynamicUser=true DynamicUser=true
ProtectHome=true ProtectHome=true
NoNewPrivileges=true NoNewPrivileges=true
LimitNOFILE=42000
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -129,10 +129,10 @@ docker run \
-d \ -d \
--name garaged \ --name garaged \
-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903 \ -p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903 \
-v /etc/garage.toml:/path/to/garage.toml \ -v /path/to/garage.toml:/etc/garage.toml \
-v /var/lib/garage/meta:/path/to/garage/meta \ -v /path/to/garage/meta:/var/lib/garage/meta \
-v /var/lib/garage/data:/path/to/garage/data \ -v /path/to/garage/data:/var/lib/garage/data \
dxflrs/garage:v1.1.0 dxflrs/garage:v1.3.0
``` ```
Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903` Under Linux, you can substitute `--network host` for `-p 3900:3900 -p 3901:3901 -p 3902:3902 -p 3903:3903`
@ -182,11 +182,12 @@ ID Hostname Address Tag Zone Capacit
## Creating a cluster layout ## Creating a cluster layout
Creating a cluster layout for a Garage deployment means informing Garage Creating a cluster layout for a Garage deployment means informing Garage
of the disk space available on each node of the cluster of the disk space available on each node of the cluster, `-c`,
as well as the zone (e.g. datacenter) each machine is located in. as well as the name of the zone (e.g. datacenter), `-z`, each machine is located in.
For our test deployment, we are using only one node. The way in which we configure For our test deployment, we are have only one node with zone named `dc1` and a
it does not matter, you can simply write: capacity of `1G`, though the capacity is ignored for a single node deployment
and can be changed later when adding new nodes.
```bash ```bash
garage layout assign -z dc1 -c 1G <node_id> garage layout assign -z dc1 -c 1G <node_id>

View file

@ -24,7 +24,8 @@ db_engine = "lmdb"
block_size = "1M" block_size = "1M"
block_ram_buffer_max = "256MiB" block_ram_buffer_max = "256MiB"
block_max_concurrent_reads = 16
block_max_concurrent_writes_per_request =10
lmdb_map_size = "1T" lmdb_map_size = "1T"
compression_level = 1 compression_level = 1
@ -46,6 +47,7 @@ bootstrap_peers = [
"212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901", "212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901",
] ]
allow_punycode = false
[consul_discovery] [consul_discovery]
api = "catalog" api = "catalog"
@ -92,29 +94,32 @@ The following gives details about each available configuration option.
[Environment variables](#env_variables). [Environment variables](#env_variables).
Top-level configuration options: Top-level configuration options, in alphabetical order:
[`allow_punycode`](#allow_punycode),
[`allow_world_readable_secrets`](#allow_world_readable_secrets), [`allow_world_readable_secrets`](#allow_world_readable_secrets),
[`block_max_concurrent_reads`](`block_max_concurrent_reads),
[`block_ram_buffer_max`](#block_ram_buffer_max), [`block_ram_buffer_max`](#block_ram_buffer_max),
[`block_max_concurrent_writes_per_request`](#block_max_concurrent_writes_per_request),
[`block_size`](#block_size), [`block_size`](#block_size),
[`bootstrap_peers`](#bootstrap_peers), [`bootstrap_peers`](#bootstrap_peers),
[`compression_level`](#compression_level), [`compression_level`](#compression_level),
[`consistency_mode`](#consistency_mode),
[`data_dir`](#data_dir), [`data_dir`](#data_dir),
[`data_fsync`](#data_fsync), [`data_fsync`](#data_fsync),
[`db_engine`](#db_engine), [`db_engine`](#db_engine),
[`disable_scrub`](#disable_scrub), [`disable_scrub`](#disable_scrub),
[`use_local_tz`](#use_local_tz),
[`lmdb_map_size`](#lmdb_map_size), [`lmdb_map_size`](#lmdb_map_size),
[`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval), [`metadata_auto_snapshot_interval`](#metadata_auto_snapshot_interval),
[`metadata_dir`](#metadata_dir), [`metadata_dir`](#metadata_dir),
[`metadata_fsync`](#metadata_fsync), [`metadata_fsync`](#metadata_fsync),
[`metadata_snapshots_dir`](#metadata_snapshots_dir), [`metadata_snapshots_dir`](#metadata_snapshots_dir),
[`replication_factor`](#replication_factor), [`replication_factor`](#replication_factor),
[`consistency_mode`](#consistency_mode),
[`rpc_bind_addr`](#rpc_bind_addr), [`rpc_bind_addr`](#rpc_bind_addr),
[`rpc_bind_outgoing`](#rpc_bind_outgoing), [`rpc_bind_outgoing`](#rpc_bind_outgoing),
[`rpc_public_addr`](#rpc_public_addr), [`rpc_public_addr`](#rpc_public_addr),
[`rpc_public_addr_subnet`](#rpc_public_addr_subnet) [`rpc_public_addr_subnet`](#rpc_public_addr_subnet)
[`rpc_secret`/`rpc_secret_file`](#rpc_secret). [`rpc_secret`/`rpc_secret_file`](#rpc_secret),
[`use_local_tz`](#use_local_tz).
The `[consul_discovery]` section: The `[consul_discovery]` section:
[`api`](#consul_api), [`api`](#consul_api),
@ -151,13 +156,17 @@ The `[admin]` section:
### Environment variables {#env_variables} ### Environment variables {#env_variables}
The following configuration parameter must be specified as an environment The following configuration parameters must be specified as environment variables,
variable, it does not exist in the configuration file: they do not exist in the configuration file:
- `GARAGE_LOG_TO_SYSLOG` (since `v0.9.4`): set this to `1` or `true` to make the - `GARAGE_LOG_TO_SYSLOG` (since `v0.9.4`): set this to `1` or `true` to make the
Garage daemon send its logs to `syslog` (using the libc `syslog` function) Garage daemon send its logs to `syslog` (using the libc `syslog` function)
instead of printing to stderr. instead of printing to stderr.
- `GARAGE_LOG_TO_JOURNALD` (since `v1.2.0`): set this to `1` or `true` to make the
Garage daemon send its logs to `journald` (using the native protocol of `systemd-journald`)
instead of printing to stderr.
The following environment variables can be used to override the corresponding The following environment variables can be used to override the corresponding
values in the configuration file: values in the configuration file:
@ -169,7 +178,7 @@ values in the configuration file:
### Top-level configuration options ### Top-level configuration options
#### `replication_factor` {#replication_factor} #### `replication_factor` (since `v1.0.0`) {#replication_factor}
The replication factor can be any positive integer smaller or equal the node count in your cluster. The replication factor can be any positive integer smaller or equal the node count in your cluster.
The chosen replication factor has a big impact on the cluster's failure tolerancy and performance characteristics. The chosen replication factor has a big impact on the cluster's failure tolerancy and performance characteristics.
@ -217,7 +226,7 @@ is in progress. In theory, no data should be lost as rebalancing is a
routine operation for Garage, although we cannot guarantee you that everything routine operation for Garage, although we cannot guarantee you that everything
will go right in such an extreme scenario. will go right in such an extreme scenario.
#### `consistency_mode` {#consistency_mode} #### `consistency_mode` (since `v1.0.0`) {#consistency_mode}
The consistency mode setting determines the read and write behaviour of your cluster. The consistency mode setting determines the read and write behaviour of your cluster.
@ -327,6 +336,7 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows:
| --------- | ----------------- | ------------- | | --------- | ----------------- | ------------- |
| [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` | | [LMDB](https://www.symas.com/lmdb) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` | | [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
| [Fjall](https://github.com/fjall-rs/fjall) (**experimental support** since `v1.3.0`) | `"fjall"` | `<metadata_dir>/db.fjall/` |
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` | | [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0. Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
@ -363,6 +373,14 @@ LMDB works very well, but is known to have the following limitations:
so it is not the best choice for high-performance storage clusters, so it is not the best choice for high-performance storage clusters,
but it should work fine in many cases. but it should work fine in many cases.
- Fjall: a storage engine based on LSM trees, which theoretically allow for
higher write throughput than other storage engines that are based on B-trees.
Using Fjall could potentially improve Garage's performance significantly in
write-heavy workloads. **Support for Fjall is experimental at this point**,
we have added it to Garage for evaluation purposes only. **Do not use it for
production-critical workloads.**
It is possible to convert Garage's metadata directory from one format to another It is possible to convert Garage's metadata directory from one format to another
using the `garage convert-db` command, which should be used as follows: using the `garage convert-db` command, which should be used as follows:
@ -400,6 +418,7 @@ Here is how this option impacts the different database engines:
|----------|------------------------------------|-------------------------------| |----------|------------------------------------|-------------------------------|
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` | | Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` | | LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
| Fjall | default options | not supported |
Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`). Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`).
@ -506,6 +525,37 @@ node.
The default value is 256MiB. The default value is 256MiB.
#### `block_max_concurrent_reads` (since `v1.3.0` / `v2.1.0`) {#block_max_concurrent_reads}
The maximum number of blocks (individual files in the data directory) open
simultaneously for reading.
Reducing this number does not limit the number of data blocks that can be
transferred through the network simultaneously. This mechanism was just added
as a backpressure mechanism for HDD read speed: it helps avoid a situation
where too many requests are coming in and Garage is reading too many block
files simultaneously, thus not making timely progress on any of the reads.
When a request to read a data block comes in through the network, the requests
awaits for one of the `block_max_concurrent_reads` slots to be available
(internally implemented using a Semaphore object). Once it acquired a read
slot, it reads the entire block file to RAM and frees the slot as soon as the
block file is finished reading. Only after the slot is released will the
block's data start being transferred over the network. If the request fails to
acquire a reading slot wihtin 15 seconds, it fails with a timeout error.
Timeout events can be monitored through the `block_read_semaphore_timeouts`
metric in Prometheus: a non-zero number of such events indicates an I/O
bottleneck on HDD read speed.
#### `block_max_concurrent_writes_per_request` (since `v2.1.0`) {#block_max_concurrent_writes_per_request}
This parameter is designed to adapt to the concurrent write performance of
different storage media.Maximum number of parallel block writes per put request
Higher values improve throughput but increase memory usage.
Default: 3, Recommended: 10-30 for NVMe, 3-10 for HDD
#### `lmdb_map_size` {#lmdb_map_size} #### `lmdb_map_size` {#lmdb_map_size}
This parameters can be used to set the map size used by LMDB, This parameters can be used to set the map size used by LMDB,
@ -604,7 +654,7 @@ be obtained by running `garage node id` and then included directly in the
key will be returned by `garage node id` and you will have to add the IP key will be returned by `garage node id` and you will have to add the IP
yourself. yourself.
### `allow_world_readable_secrets` or `GARAGE_ALLOW_WORLD_READABLE_SECRETS` (env) {#allow_world_readable_secrets} #### `allow_world_readable_secrets` or `GARAGE_ALLOW_WORLD_READABLE_SECRETS` (env) {#allow_world_readable_secrets}
Garage checks the permissions of your secret files to make sure they're not Garage checks the permissions of your secret files to make sure they're not
world-readable. In some cases, the check might fail and consider your files as world-readable. In some cases, the check might fail and consider your files as
@ -616,6 +666,13 @@ permission verification.
Alternatively, you can set the `GARAGE_ALLOW_WORLD_READABLE_SECRETS` Alternatively, you can set the `GARAGE_ALLOW_WORLD_READABLE_SECRETS`
environment variable to `true` to bypass the permissions check. environment variable to `true` to bypass the permissions check.
#### `allow_punycode` {#allow_punycode}
Allow creating buckets with names containing punycode. When used for buckets served
as websites, this allows using almost any unicode character in the domain name.
Default to `false`.
### The `[consul_discovery]` section ### The `[consul_discovery]` section
Garage supports discovering other nodes of the cluster using Consul. For this Garage supports discovering other nodes of the cluster using Consul. For this

View file

@ -23,17 +23,17 @@ Feel free to open a PR to suggest fixes this table. Minio is missing because the
- 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them. - 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them.
## High-level features ## High-level features
| Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | ✅ | ✅ | ✅ | | [signature v2](https://docs.aws.amazon.com/AmazonS3/latest/API/Appendix-Sigv2.html) (deprecated) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ | | [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
| [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ | | [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ |
| [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ | | [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ |
| [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) | | [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) |
| [SSE-C encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) | ✅ Implemented | ❓ | ✅ | ❌ | ✅ | | [SSE-C encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) | ✅ Implemented | ❓ | ✅ | ❌ | ✅ |
| [Bucket versioning](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Versioning.html) | ❌ Missing | ✅ | ✅ | ❌ | ✅ |
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part *Note:* OpenIO does not says if it supports presigned URLs. Because it is part
of signature v4 and they claim they support it without additional precisions, of signature v4 and they claim they support it without additional precisions,

View file

@ -70,7 +70,7 @@ Example response body:
```json ```json
{ {
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df", "node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
"garageVersion": "v1.1.0", "garageVersion": "v1.3.0",
"garageFeatures": [ "garageFeatures": [
"k2v", "k2v",
"lmdb", "lmdb",

16
flake.lock generated
View file

@ -50,17 +50,17 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1736692550, "lastModified": 1763977559,
"narHash": "sha256-7tk8xH+g0sJkKLTJFOxphJxxOjMDFMWv24nXslaU2ro=", "narHash": "sha256-g4MKqsIRy5yJwEsI+fYODqLUnAqIY4kZai0nldAP6EM=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "7c4869c47090dd7f9f1bdfb49a22aea026996815", "rev": "cfe2c7d5b5d3032862254e68c37a6576b633d632",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "7c4869c47090dd7f9f1bdfb49a22aea026996815", "rev": "cfe2c7d5b5d3032862254e68c37a6576b633d632",
"type": "github" "type": "github"
} }
}, },
@ -80,17 +80,17 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1738549608, "lastModified": 1763952169,
"narHash": "sha256-GdyT9QEUSx5k/n8kILuNy83vxxdyUfJ8jL5mMpQZWfw=", "narHash": "sha256-+PeDBD8P+NKauH+w7eO/QWCIp8Cx4mCfWnh9sJmy9CM=",
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d", "rev": "ab726555a9a72e6dc80649809147823a813fa95b",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "35c6f8c4352f995ecd53896200769f80a3e8f22d", "rev": "ab726555a9a72e6dc80649809147823a813fa95b",
"type": "github" "type": "github"
} }
}, },

View file

@ -2,13 +2,13 @@
description = description =
"Garage, an S3-compatible distributed object store for self-hosted deployments"; "Garage, an S3-compatible distributed object store for self-hosted deployments";
# Nixpkgs 24.11 as of 2025-01-12 # Nixpkgs 25.05 as of 2025-11-24
inputs.nixpkgs.url = inputs.nixpkgs.url =
"github:NixOS/nixpkgs/7c4869c47090dd7f9f1bdfb49a22aea026996815"; "github:NixOS/nixpkgs/cfe2c7d5b5d3032862254e68c37a6576b633d632";
# Rust overlay as of 2025-02-03 # Rust overlay as of 2025-11-24
inputs.rust-overlay.url = inputs.rust-overlay.url =
"github:oxalica/rust-overlay/35c6f8c4352f995ecd53896200769f80a3e8f22d"; "github:oxalica/rust-overlay/ab726555a9a72e6dc80649809147823a813fa95b";
inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs"; inputs.rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
inputs.crane.url = "github:ipetkov/crane"; inputs.crane.url = "github:ipetkov/crane";
@ -30,6 +30,10 @@
inherit system nixpkgs crane rust-overlay extraTestEnv; inherit system nixpkgs crane rust-overlay extraTestEnv;
release = false; release = false;
}).garage-test; }).garage-test;
lints = (compile {
inherit system nixpkgs crane rust-overlay;
release = false;
});
in in
{ {
packages = { packages = {
@ -53,6 +57,13 @@
tests-sqlite = testWith { tests-sqlite = testWith {
GARAGE_TEST_INTEGRATION_DB_ENGINE = "sqlite"; GARAGE_TEST_INTEGRATION_DB_ENGINE = "sqlite";
}; };
tests-fjall = testWith {
GARAGE_TEST_INTEGRATION_DB_ENGINE = "fjall";
};
# lints (fmt, clippy)
fmt = lints.garage-cargo-fmt;
clippy = lints.garage-cargo-clippy;
}; };
# ---- developpment shell, for making native builds only ---- # ---- developpment shell, for making native builds only ----

View file

@ -48,7 +48,7 @@ let
inherit (pkgs) lib stdenv; inherit (pkgs) lib stdenv;
toolchainFn = (p: p.rust-bin.stable."1.82.0".default.override { toolchainFn = (p: p.rust-bin.stable."1.91.0".default.override {
targets = lib.optionals (target != null) [ rustTarget ]; targets = lib.optionals (target != null) [ rustTarget ];
extensions = [ extensions = [
"rust-src" "rust-src"
@ -68,12 +68,13 @@ let
rootFeatures = if features != null then rootFeatures = if features != null then
features features
else else
([ "bundled-libs" "lmdb" "sqlite" "k2v" ] ++ (lib.optionals release [ ([ "bundled-libs" "lmdb" "sqlite" "fjall" "k2v" ] ++ (lib.optionals release [
"consul-discovery" "consul-discovery"
"kubernetes-discovery" "kubernetes-discovery"
"metrics" "metrics"
"telemetry-otlp" "telemetry-otlp"
"syslog" "syslog"
"journald"
])); ]));
featuresStr = lib.concatStringsSep "," rootFeatures; featuresStr = lib.concatStringsSep "," rootFeatures;
@ -189,4 +190,15 @@ in rec {
pkgs.cacert pkgs.cacert
]; ];
} // extraTestEnv); } // extraTestEnv);
# ---- source code linting ----
garage-cargo-fmt = craneLib.cargoFmt (commonArgs // {
cargoExtraArgs = "";
});
garage-cargo-clippy = craneLib.cargoClippy (commonArgs // {
cargoArtifacts = garage-deps;
cargoClippyExtraArgs = "--all-targets -- -D warnings";
});
} }

View file

@ -1,6 +1,7 @@
export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
export AWS_DEFAULT_REGION='garage' export AWS_DEFAULT_REGION='garage'
export AWS_REQUEST_CHECKSUM_CALCULATION='when_required'
# FUTUREWORK: set AWS_ENDPOINT_URL instead, once nixpkgs bumps awscli to >=2.13.0. # FUTUREWORK: set AWS_ENDPOINT_URL instead, once nixpkgs bumps awscli to >=2.13.0.
function aws { command aws --endpoint-url http://127.0.0.1:3911 $@ ; } function aws { command aws --endpoint-url http://127.0.0.1:3911 $@ ; }

View file

@ -1,24 +1,18 @@
apiVersion: v2 apiVersion: v2
name: garage name: garage
description: S3-compatible object store for small self-hosted geo-distributed deployments description: S3-compatible object store for small self-hosted geo-distributed deployments
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application type: application
version: 0.7.3
appVersion: "v1.3.1"
home: https://garagehq.deuxfleurs.fr/
icon: https://garagehq.deuxfleurs.fr/images/garage-logo.svg
# This is the chart version. This version number should be incremented each time you make changes keywords:
# to the chart and its templates, including the app version. - geo-distributed
# Versions are expected to follow Semantic Versioning (https://semver.org/) - read-after-write-consistency
version: 0.7.0 - s3-compatible
# This is the version number of the application being deployed. This version number should be sources:
# incremented each time you make changes to the application. Versions are not expected to - https://git.deuxfleurs.fr/Deuxfleurs/garage.git
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes. maintainers: []
appVersion: "v1.1.0"

View file

@ -1,9 +1,15 @@
# garage # garage
![Version: 0.6.0](https://img.shields.io/badge/Version-0.6.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.0.1](https://img.shields.io/badge/AppVersion-v1.0.1-informational?style=flat-square) ![Version: 0.7.3](https://img.shields.io/badge/Version-0.7.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.3.1](https://img.shields.io/badge/AppVersion-v1.3.1-informational?style=flat-square)
S3-compatible object store for small self-hosted geo-distributed deployments S3-compatible object store for small self-hosted geo-distributed deployments
**Homepage:** <https://garagehq.deuxfleurs.fr/>
## Source Code
* <https://git.deuxfleurs.fr/Deuxfleurs/garage.git>
## Values ## Values
| Key | Type | Default | Description | | Key | Type | Default | Description |
@ -23,6 +29,7 @@ S3-compatible object store for small self-hosted geo-distributed deployments
| garage.existingConfigMap | string | `""` | if not empty string, allow using an existing ConfigMap for the garage.toml, if set, ignores garage.toml | | garage.existingConfigMap | string | `""` | if not empty string, allow using an existing ConfigMap for the garage.toml, if set, ignores garage.toml |
| garage.garageTomlString | string | `""` | String Template for the garage configuration if set, ignores above values. Values can be templated, see https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/ | | garage.garageTomlString | string | `""` | String Template for the garage configuration if set, ignores above values. Values can be templated, see https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/ |
| garage.kubernetesSkipCrd | bool | `false` | Set to true if you want to use k8s discovery but install the CRDs manually outside of the helm chart, for example if you operate at namespace level without cluster ressources | | garage.kubernetesSkipCrd | bool | `false` | Set to true if you want to use k8s discovery but install the CRDs manually outside of the helm chart, for example if you operate at namespace level without cluster ressources |
| garage.metadataAutoSnapshotInterval | string | `""` | If this value is set, Garage will automatically take a snapshot of the metadata DB file at a regular interval and save it in the metadata directory. https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#metadata_auto_snapshot_interval |
| garage.replicationMode | string | `"3"` | Default to 3 replicas, see the replication_mode section at https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode | | garage.replicationMode | string | `"3"` | Default to 3 replicas, see the replication_mode section at https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode |
| garage.rpcBindAddr | string | `"[::]:3901"` | | | garage.rpcBindAddr | string | `"[::]:3901"` | |
| garage.rpcSecret | string | `""` | If not given, a random secret will be generated and stored in a Secret object | | garage.rpcSecret | string | `""` | If not given, a random secret will be generated and stored in a Secret object |
@ -49,6 +56,7 @@ S3-compatible object store for small self-hosted geo-distributed deployments
| initImage.pullPolicy | string | `"IfNotPresent"` | | | initImage.pullPolicy | string | `"IfNotPresent"` | |
| initImage.repository | string | `"busybox"` | | | initImage.repository | string | `"busybox"` | |
| initImage.tag | string | `"stable"` | | | initImage.tag | string | `"stable"` | |
| livenessProbe | object | `{}` | Specifies a livenessProbe |
| monitoring.metrics.enabled | bool | `false` | If true, a service for monitoring is created with a prometheus.io/scrape annotation | | monitoring.metrics.enabled | bool | `false` | If true, a service for monitoring is created with a prometheus.io/scrape annotation |
| monitoring.metrics.serviceMonitor.enabled | bool | `false` | If true, a ServiceMonitor CRD is created for a prometheus operator https://github.com/coreos/prometheus-operator | | monitoring.metrics.serviceMonitor.enabled | bool | `false` | If true, a ServiceMonitor CRD is created for a prometheus operator https://github.com/coreos/prometheus-operator |
| monitoring.metrics.serviceMonitor.interval | string | `"15s"` | | | monitoring.metrics.serviceMonitor.interval | string | `"15s"` | |
@ -71,6 +79,7 @@ S3-compatible object store for small self-hosted geo-distributed deployments
| podSecurityContext.runAsGroup | int | `1000` | | | podSecurityContext.runAsGroup | int | `1000` | |
| podSecurityContext.runAsNonRoot | bool | `true` | | | podSecurityContext.runAsNonRoot | bool | `true` | |
| podSecurityContext.runAsUser | int | `1000` | | | podSecurityContext.runAsUser | int | `1000` | |
| readinessProbe | object | `{}` | Specifies a readinessProbe |
| resources | object | `{}` | | | resources | object | `{}` | |
| securityContext.capabilities | object | `{"drop":["ALL"]}` | The default security context is heavily restricted, feel free to tune it to your requirements | | securityContext.capabilities | object | `{"drop":["ALL"]}` | The default security context is heavily restricted, feel free to tune it to your requirements |
| securityContext.readOnlyRootFilesystem | bool | `true` | | | securityContext.readOnlyRootFilesystem | bool | `true` | |

View file

@ -19,6 +19,10 @@ data:
compression_level = {{ .Values.garage.compressionLevel }} compression_level = {{ .Values.garage.compressionLevel }}
{{- if .Values.garage.metadataAutoSnapshotInterval }}
metadata_auto_snapshot_interval = {{ .Values.garage.metadataAutoSnapshotInterval | quote }}
{{- end }}
rpc_bind_addr = "{{ .Values.garage.rpcBindAddr }}" rpc_bind_addr = "{{ .Values.garage.rpcBindAddr }}"
# rpc_secret will be populated by the init container from a k8s secret object # rpc_secret will be populated by the init container from a k8s secret object
rpc_secret = "__RPC_SECRET_REPLACE__" rpc_secret = "__RPC_SECRET_REPLACE__"

View file

@ -4,6 +4,10 @@ metadata:
name: {{ include "garage.fullname" . }} name: {{ include "garage.fullname" . }}
labels: labels:
{{- include "garage.labels" . | nindent 4 }} {{- include "garage.labels" . | nindent 4 }}
{{- with .Values.service.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec: spec:
type: {{ .Values.service.type }} type: {{ .Values.service.type }}
ports: ports:

View file

@ -78,15 +78,14 @@ spec:
{{- with .Values.extraVolumeMounts }} {{- with .Values.extraVolumeMounts }}
{{- toYaml . | nindent 12 }} {{- toYaml . | nindent 12 }}
{{- end }} {{- end }}
# TODO {{- with .Values.livenessProbe }}
# livenessProbe: livenessProbe:
# httpGet: {{- toYaml . | nindent 12 }}
# path: / {{- end }}
# port: 3900 {{- with .Values.readinessProbe }}
# readinessProbe: readinessProbe:
# httpGet: {{- toYaml . | nindent 12 }}
# path: / {{- end }}
# port: 3900
resources: resources:
{{- toYaml .Values.resources | nindent 12 }} {{- toYaml .Values.resources | nindent 12 }}
volumes: volumes:

View file

@ -21,6 +21,10 @@ garage:
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression-level # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#compression-level
compressionLevel: "1" compressionLevel: "1"
# -- If this value is set, Garage will automatically take a snapshot of the metadata DB file at a regular interval and save it in the metadata directory.
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#metadata_auto_snapshot_interval
metadataAutoSnapshotInterval: ""
rpcBindAddr: "[::]:3901" rpcBindAddr: "[::]:3901"
# -- If not given, a random secret will be generated and stored in a Secret object # -- If not given, a random secret will be generated and stored in a Secret object
rpcSecret: "" rpcSecret: ""
@ -120,6 +124,8 @@ service:
# - NodePort (+ Ingress) # - NodePort (+ Ingress)
# - LoadBalancer # - LoadBalancer
type: ClusterIP type: ClusterIP
# -- Annotations to add to the service
annotations: {}
s3: s3:
api: api:
port: 3900 port: 3900
@ -191,6 +197,21 @@ resources: {}
# cpu: 100m # cpu: 100m
# memory: 512Mi # memory: 512Mi
# -- Specifies a livenessProbe
livenessProbe: {}
#httpGet:
# path: /health
# port: 3903
#initialDelaySeconds: 5
#periodSeconds: 30
# -- Specifies a readinessProbe
readinessProbe: {}
#httpGet:
# path: /health
# port: 3903
#initialDelaySeconds: 5
#periodSeconds: 30
nodeSelector: {} nodeSelector: {}
tolerations: [] tolerations: []

View file

@ -0,0 +1,43 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: garagenodes.deuxfleurs.fr
spec:
conversion:
strategy: None
group: deuxfleurs.fr
names:
kind: GarageNode
listKind: GarageNodeList
plural: garagenodes
singular: garagenode
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: Auto-generated derived type for Node via `CustomResource`
properties:
spec:
properties:
address:
format: ip
type: string
hostname:
type: string
port:
format: uint16
minimum: 0
type: integer
required:
- address
- hostname
- port
type: object
required:
- spec
title: GarageNode
type: object
served: true
storage: true
subresources: {}

View file

@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- garagenodes.deuxfleurs.fr.yaml

View file

@ -34,6 +34,8 @@ in
jq jq
]; ];
shellHook = '' shellHook = ''
export AWS_REQUEST_CHECKSUM_CALCULATION='when_required'
function to_s3 { function to_s3 {
aws \ aws \
--endpoint-url https://garage.deuxfleurs.fr \ --endpoint-url https://garage.deuxfleurs.fr \

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api_admin" name = "garage_api_admin"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -22,7 +22,7 @@ garage_api_common.workspace = true
argon2.workspace = true argon2.workspace = true
async-trait.workspace = true async-trait.workspace = true
err-derive.workspace = true thiserror.workspace = true
hex.workspace = true hex.workspace = true
tracing.workspace = true tracing.workspace = true

View file

@ -277,7 +277,7 @@ pub async fn handle_create_bucket(
let helper = garage.locked_helper().await; let helper = garage.locked_helper().await;
if let Some(ga) = &req.global_alias { if let Some(ga) = &req.global_alias {
if !is_valid_bucket_name(ga) { if !is_valid_bucket_name(ga, garage.config.allow_punycode) {
return Err(Error::bad_request(format!( return Err(Error::bad_request(format!(
"{}: {}", "{}: {}",
ga, INVALID_BUCKET_NAME_MESSAGE ga, INVALID_BUCKET_NAME_MESSAGE
@ -292,7 +292,7 @@ pub async fn handle_create_bucket(
} }
if let Some(la) = &req.local_alias { if let Some(la) = &req.local_alias {
if !is_valid_bucket_name(&la.alias) { if !is_valid_bucket_name(&la.alias, garage.config.allow_punycode) {
return Err(Error::bad_request(format!( return Err(Error::bad_request(format!(
"{}: {}", "{}: {}",
la.alias, INVALID_BUCKET_NAME_MESSAGE la.alias, INVALID_BUCKET_NAME_MESSAGE
@ -382,7 +382,7 @@ pub async fn handle_delete_bucket(
for ((key_id, alias), _, active) in state.local_aliases.items().iter() { for ((key_id, alias), _, active) in state.local_aliases.items().iter() {
if *active { if *active {
helper helper
.unset_local_bucket_alias(bucket.id, key_id, alias) .purge_local_bucket_alias(bucket.id, key_id, alias)
.await?; .await?;
} }
} }

View file

@ -1,8 +1,8 @@
use std::convert::TryFrom; use std::convert::TryFrom;
use err_derive::Error;
use hyper::header::HeaderValue; use hyper::header::HeaderValue;
use hyper::{HeaderMap, StatusCode}; use hyper::{HeaderMap, StatusCode};
use thiserror::Error;
pub use garage_model::helper::error::Error as HelperError; pub use garage_model::helper::error::Error as HelperError;
@ -16,20 +16,17 @@ use garage_api_common::helpers::*;
/// Errors of this crate /// Errors of this crate
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "{}", _0)] #[error("{0}")]
/// Error from common error /// Error from common error
Common(#[error(source)] CommonError), Common(#[from] CommonError),
// Category: cannot process // Category: cannot process
/// The API access key does not exist /// The API access key does not exist
#[error(display = "Access key not found: {}", _0)] #[error("Access key not found: {0}")]
NoSuchAccessKey(String), NoSuchAccessKey(String),
/// In Import key, the key already exists /// In Import key, the key already exists
#[error( #[error("Key {0} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.")]
display = "Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.",
_0
)]
KeyAlreadyExists(String), KeyAlreadyExists(String),
} }

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api_common" name = "garage_api_common"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -24,11 +24,10 @@ chrono.workspace = true
crc32fast.workspace = true crc32fast.workspace = true
crc32c.workspace = true crc32c.workspace = true
crypto-common.workspace = true crypto-common.workspace = true
err-derive.workspace = true thiserror.workspace = true
hex.workspace = true hex.workspace = true
hmac.workspace = true hmac.workspace = true
md-5.workspace = true md-5.workspace = true
idna.workspace = true
tracing.workspace = true tracing.workspace = true
nom.workspace = true nom.workspace = true
pin-project.workspace = true pin-project.workspace = true

View file

@ -1,7 +1,7 @@
use std::convert::TryFrom; use std::convert::TryFrom;
use err_derive::Error;
use hyper::StatusCode; use hyper::StatusCode;
use thiserror::Error;
use garage_util::error::Error as GarageError; use garage_util::error::Error as GarageError;
@ -12,48 +12,48 @@ use garage_model::helper::error::Error as HelperError;
pub enum CommonError { pub enum CommonError {
// ---- INTERNAL ERRORS ---- // ---- INTERNAL ERRORS ----
/// Error related to deeper parts of Garage /// Error related to deeper parts of Garage
#[error(display = "Internal error: {}", _0)] #[error("Internal error: {0}")]
InternalError(#[error(source)] GarageError), InternalError(#[from] GarageError),
/// Error related to Hyper /// Error related to Hyper
#[error(display = "Internal error (Hyper error): {}", _0)] #[error("Internal error (Hyper error): {0}")]
Hyper(#[error(source)] hyper::Error), Hyper(#[from] hyper::Error),
/// Error related to HTTP /// Error related to HTTP
#[error(display = "Internal error (HTTP error): {}", _0)] #[error("Internal error (HTTP error): {0}")]
Http(#[error(source)] http::Error), Http(#[from] http::Error),
// ---- GENERIC CLIENT ERRORS ---- // ---- GENERIC CLIENT ERRORS ----
/// Proper authentication was not provided /// Proper authentication was not provided
#[error(display = "Forbidden: {}", _0)] #[error("Forbidden: {0}")]
Forbidden(String), Forbidden(String),
/// Generic bad request response with custom message /// Generic bad request response with custom message
#[error(display = "Bad request: {}", _0)] #[error("Bad request: {0}")]
BadRequest(String), BadRequest(String),
/// The client sent a header with invalid value /// The client sent a header with invalid value
#[error(display = "Invalid header value: {}", _0)] #[error("Invalid header value: {0}")]
InvalidHeader(#[error(source)] hyper::header::ToStrError), InvalidHeader(#[from] hyper::header::ToStrError),
// ---- SPECIFIC ERROR CONDITIONS ---- // ---- SPECIFIC ERROR CONDITIONS ----
// These have to be error codes referenced in the S3 spec here: // These have to be error codes referenced in the S3 spec here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList // https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
/// The bucket requested don't exists /// The bucket requested don't exists
#[error(display = "Bucket not found: {}", _0)] #[error("Bucket not found: {0}")]
NoSuchBucket(String), NoSuchBucket(String),
/// Tried to create a bucket that already exist /// Tried to create a bucket that already exist
#[error(display = "Bucket already exists")] #[error("Bucket already exists")]
BucketAlreadyExists, BucketAlreadyExists,
/// Tried to delete a non-empty bucket /// Tried to delete a non-empty bucket
#[error(display = "Tried to delete a non-empty bucket")] #[error("Tried to delete a non-empty bucket")]
BucketNotEmpty, BucketNotEmpty,
// Category: bad request // Category: bad request
/// Bucket name is not valid according to AWS S3 specs /// Bucket name is not valid according to AWS S3 specs
#[error(display = "Invalid bucket name: {}", _0)] #[error("Invalid bucket name: {0}")]
InvalidBucketName(String), InvalidBucketName(String),
} }

View file

@ -58,6 +58,12 @@ pub trait ApiHandler: Send + Sync + 'static {
req: Request<IncomingBody>, req: Request<IncomingBody>,
endpoint: Self::Endpoint, endpoint: Self::Endpoint,
) -> impl Future<Output = Result<Response<BoxBody<Self::Error>>, Self::Error>> + Send; ) -> impl Future<Output = Result<Response<BoxBody<Self::Error>>, Self::Error>> + Send;
/// Returns the key id used to authenticate this request. The ID returned must be safe to
/// log.
fn key_id_from_request(&self, _req: &Request<IncomingBody>) -> Option<String> {
None
}
} }
pub struct ApiServer<A: ApiHandler> { pub struct ApiServer<A: ApiHandler> {
@ -142,19 +148,20 @@ impl<A: ApiHandler> ApiServer<A> {
) -> Result<Response<BoxBody<A::Error>>, http::Error> { ) -> Result<Response<BoxBody<A::Error>>, http::Error> {
let uri = req.uri().clone(); let uri = req.uri().clone();
if let Ok(forwarded_for_ip_addr) = let source = if let Ok(forwarded_for_ip_addr) =
forwarded_headers::handle_forwarded_for_headers(req.headers()) forwarded_headers::handle_forwarded_for_headers(req.headers())
{ {
info!( format!("{forwarded_for_ip_addr} (via {addr})")
"{} (via {}) {} {}",
forwarded_for_ip_addr,
addr,
req.method(),
uri
);
} else { } else {
info!("{} {} {}", addr, req.method(), uri); format!("{addr}")
} };
// we only do this to log the access key, so we can discard any error
let key = self
.api_handler
.key_id_from_request(&req)
.map(|k| format!("(key {k}) "))
.unwrap_or_default();
info!("{source} {key}{} {uri}", req.method());
debug!("{:?}", req); debug!("{:?}", req);
let tracer = opentelemetry::global::tracer("garage"); let tracer = opentelemetry::global::tracer("garage");
@ -343,7 +350,11 @@ where
while !*must_exit.borrow() { while !*must_exit.borrow() {
let (stream, client_addr) = tokio::select! { let (stream, client_addr) = tokio::select! {
acc = listener.accept() => acc?, acc = listener.accept() => match acc {
Ok(r) => r,
Err(e) if e.kind() == std::io::ErrorKind::ConnectionAborted => continue,
Err(e) => return Err(e.into()),
},
_ = must_exit.changed() => continue, _ = must_exit.changed() => continue,
}; };

View file

@ -8,7 +8,6 @@ use hyper::{
body::{Body, Bytes}, body::{Body, Bytes},
Request, Response, Request, Response,
}; };
use idna::domain_to_unicode;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_model::bucket_table::BucketParams; use garage_model::bucket_table::BucketParams;
@ -97,7 +96,7 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
authority authority
))), ))),
}; };
authority.map(|h| domain_to_unicode(h).0) authority.map(|h| h.to_ascii_lowercase())
} }
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in /// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in

View file

@ -1,4 +1,4 @@
use err_derive::Error; use thiserror::Error;
use crate::common_error::CommonError; use crate::common_error::CommonError;
pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError}; pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError};
@ -6,21 +6,21 @@ pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInterna
/// Errors of this crate /// Errors of this crate
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "{}", _0)] #[error("{0}")]
/// Error from common error /// Error from common error
Common(CommonError), Common(CommonError),
/// Authorization Header Malformed /// Authorization Header Malformed
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)] #[error("Authorization header malformed, unexpected scope: {0}")]
AuthorizationHeaderMalformed(String), AuthorizationHeaderMalformed(String),
// Category: bad request // Category: bad request
/// The request contained an invalid UTF-8 sequence in its path or in other parameters /// The request contained an invalid UTF-8 sequence in its path or in other parameters
#[error(display = "Invalid UTF-8: {}", _0)] #[error("Invalid UTF-8: {0}")]
InvalidUtf8Str(#[error(source)] std::str::Utf8Error), InvalidUtf8Str(#[from] std::str::Utf8Error),
/// The provided digest (checksum) value was invalid /// The provided digest (checksum) value was invalid
#[error(display = "Invalid digest: {}", _0)] #[error("Invalid digest: {0}")]
InvalidDigest(String), InvalidDigest(String),
} }

View file

@ -104,7 +104,7 @@ async fn check_standard_signature(
// Verify that all necessary request headers are included in signed_headers // Verify that all necessary request headers are included in signed_headers
// The following must be included for all signatures: // The following must be included for all signatures:
// - the Host header (mandatory) // - the Host header (mandatory)
// - all x-amz-* headers used in the request // - all x-amz-* headers used in the request (except x-amz-content-sha256)
// AWS also indicates that the Content-Type header should be signed if // AWS also indicates that the Content-Type header should be signed if
// it is used, but Minio client doesn't sign it so we don't check it for compatibility. // it is used, but Minio client doesn't sign it so we don't check it for compatibility.
let signed_headers = split_signed_headers(&authorization)?; let signed_headers = split_signed_headers(&authorization)?;
@ -151,7 +151,7 @@ async fn check_presigned_signature(
// Verify that all necessary request headers are included in signed_headers // Verify that all necessary request headers are included in signed_headers
// For AWSv4 pre-signed URLs, the following must be included: // For AWSv4 pre-signed URLs, the following must be included:
// - the Host header (mandatory) // - the Host header (mandatory)
// - all x-amz-* headers used in the request // - all x-amz-* headers used in the request (except x-amz-content-sha256)
let signed_headers = split_signed_headers(&authorization)?; let signed_headers = split_signed_headers(&authorization)?;
verify_signed_headers(request.headers(), &signed_headers)?; verify_signed_headers(request.headers(), &signed_headers)?;
@ -268,7 +268,9 @@ fn verify_signed_headers(headers: &HeaderMap, signed_headers: &[HeaderName]) ->
return Err(Error::bad_request("Header `Host` should be signed")); return Err(Error::bad_request("Header `Host` should be signed"));
} }
for (name, _) in headers.iter() { for (name, _) in headers.iter() {
if name.as_str().starts_with("x-amz-") { // Enforce signature of all x-amz-* headers, except x-amz-content-sh256
// because it is included in the canonical request in all cases
if name.as_str().starts_with("x-amz-") && name != X_AMZ_CONTENT_SHA256 {
if !signed_headers.contains(name) { if !signed_headers.contains(name) {
return Err(Error::bad_request(format!( return Err(Error::bad_request(format!(
"Header `{}` should be signed", "Header `{}` should be signed",
@ -417,7 +419,7 @@ pub async fn verify_v4(
// ============ Authorization header, or X-Amz-* query params ========= // ============ Authorization header, or X-Amz-* query params =========
pub struct Authorization { pub struct Authorization {
key_id: String, pub key_id: String,
scope: String, scope: String,
signed_headers: String, signed_headers: String,
signature: String, signature: String,
@ -426,7 +428,7 @@ pub struct Authorization {
} }
impl Authorization { impl Authorization {
fn parse_header(headers: &HeaderMap) -> Result<Self, Error> { pub fn parse_header(headers: &HeaderMap) -> Result<Self, Error> {
let authorization = headers let authorization = headers
.get(AUTHORIZATION) .get(AUTHORIZATION)
.ok_or_bad_request("Missing authorization header")? .ok_or_bad_request("Missing authorization header")?
@ -468,8 +470,7 @@ impl Authorization {
let date = headers let date = headers
.get(X_AMZ_DATE) .get(X_AMZ_DATE)
.ok_or_bad_request("Missing X-Amz-Date field") .ok_or_bad_request("Missing X-Amz-Date field")?
.map_err(Error::from)?
.to_str()?; .to_str()?;
let date = parse_date(date)?; let date = parse_date(date)?;

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api_k2v" name = "garage_api_k2v"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -20,7 +20,7 @@ garage_util = { workspace = true, features = [ "k2v" ] }
garage_api_common.workspace = true garage_api_common.workspace = true
base64.workspace = true base64.workspace = true
err-derive.workspace = true thiserror.workspace = true
tracing.workspace = true tracing.workspace = true
futures.workspace = true futures.workspace = true

View file

@ -176,6 +176,12 @@ impl ApiHandler for K2VApiServer {
Ok(resp_ok) Ok(resp_ok)
} }
fn key_id_from_request(&self, req: &Request<IncomingBody>) -> Option<String> {
garage_api_common::signature::payload::Authorization::parse_header(req.headers())
.map(|auth| auth.key_id)
.ok()
}
} }
impl ApiEndpoint for K2VApiEndpoint { impl ApiEndpoint for K2VApiEndpoint {

View file

@ -1,6 +1,6 @@
use err_derive::Error;
use hyper::header::HeaderValue; use hyper::header::HeaderValue;
use hyper::{HeaderMap, StatusCode}; use hyper::{HeaderMap, StatusCode};
use thiserror::Error;
use garage_api_common::common_error::{commonErrorDerivative, CommonError}; use garage_api_common::common_error::{commonErrorDerivative, CommonError};
pub(crate) use garage_api_common::common_error::{helper_error_as_internal, pass_helper_error}; pub(crate) use garage_api_common::common_error::{helper_error_as_internal, pass_helper_error};
@ -14,38 +14,38 @@ use garage_api_common::signature::error::Error as SignatureError;
/// Errors of this crate /// Errors of this crate
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "{}", _0)] #[error("{0}")]
/// Error from common error /// Error from common error
Common(#[error(source)] CommonError), Common(#[from] CommonError),
// Category: cannot process // Category: cannot process
/// Authorization Header Malformed /// Authorization Header Malformed
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)] #[error("Authorization header malformed, unexpected scope: {0}")]
AuthorizationHeaderMalformed(String), AuthorizationHeaderMalformed(String),
/// The provided digest (checksum) value was invalid /// The provided digest (checksum) value was invalid
#[error(display = "Invalid digest: {}", _0)] #[error("Invalid digest: {0}")]
InvalidDigest(String), InvalidDigest(String),
/// The object requested don't exists /// The object requested don't exists
#[error(display = "Key not found")] #[error("Key not found")]
NoSuchKey, NoSuchKey,
/// Some base64 encoded data was badly encoded /// Some base64 encoded data was badly encoded
#[error(display = "Invalid base64: {}", _0)] #[error("Invalid base64: {0}")]
InvalidBase64(#[error(source)] base64::DecodeError), InvalidBase64(#[from] base64::DecodeError),
/// Invalid causality token /// Invalid causality token
#[error(display = "Invalid causality token")] #[error("Invalid causality token")]
InvalidCausalityToken, InvalidCausalityToken,
/// The client asked for an invalid return format (invalid Accept header) /// The client asked for an invalid return format (invalid Accept header)
#[error(display = "Not acceptable: {}", _0)] #[error("Not acceptable: {0}")]
NotAcceptable(String), NotAcceptable(String),
/// The request contained an invalid UTF-8 sequence in its path or in other parameters /// The request contained an invalid UTF-8 sequence in its path or in other parameters
#[error(display = "Invalid UTF-8: {}", _0)] #[error("Invalid UTF-8: {0}")]
InvalidUtf8Str(#[error(source)] std::str::Utf8Error), InvalidUtf8Str(#[from] std::str::Utf8Error),
} }
commonErrorDerivative!(Error); commonErrorDerivative!(Error);

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api_s3" name = "garage_api_s3"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -29,7 +29,7 @@ bytes.workspace = true
chrono.workspace = true chrono.workspace = true
crc32fast.workspace = true crc32fast.workspace = true
crc32c.workspace = true crc32c.workspace = true
err-derive.workspace = true thiserror.workspace = true
hex.workspace = true hex.workspace = true
tracing.workspace = true tracing.workspace = true
md-5.workspace = true md-5.workspace = true

View file

@ -226,6 +226,7 @@ impl ApiHandler for S3ApiServer {
Endpoint::DeleteBucket {} => handle_delete_bucket(ctx).await, Endpoint::DeleteBucket {} => handle_delete_bucket(ctx).await,
Endpoint::GetBucketLocation {} => handle_get_bucket_location(ctx), Endpoint::GetBucketLocation {} => handle_get_bucket_location(ctx),
Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(), Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(),
Endpoint::GetBucketAcl {} => handle_get_bucket_acl(ctx),
Endpoint::ListObjects { Endpoint::ListObjects {
delimiter, delimiter,
encoding_type, encoding_type,
@ -342,6 +343,12 @@ impl ApiHandler for S3ApiServer {
Ok(resp_ok) Ok(resp_ok)
} }
fn key_id_from_request(&self, req: &Request<IncomingBody>) -> Option<String> {
garage_api_common::signature::payload::Authorization::parse_header(req.headers())
.map(|auth| auth.key_id)
.ok()
}
} }
impl ApiEndpoint for S3ApiEndpoint { impl ApiEndpoint for S3ApiEndpoint {

View file

@ -5,7 +5,7 @@ use hyper::{Request, Response, StatusCode};
use garage_model::bucket_alias_table::*; use garage_model::bucket_alias_table::*;
use garage_model::bucket_table::Bucket; use garage_model::bucket_table::Bucket;
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_model::key_table::Key; use garage_model::key_table::{Key, KeyParams};
use garage_model::permission::BucketKeyPerm; use garage_model::permission::BucketKeyPerm;
use garage_table::util::*; use garage_table::util::*;
use garage_util::crdt::*; use garage_util::crdt::*;
@ -44,6 +44,55 @@ pub fn handle_get_bucket_versioning() -> Result<Response<ResBody>, Error> {
.body(string_body(xml))?) .body(string_body(xml))?)
} }
pub fn handle_get_bucket_acl(ctx: ReqCtx) -> Result<Response<ResBody>, Error> {
let ReqCtx {
bucket_id, api_key, ..
} = ctx;
let key_p = api_key.params().ok_or_internal_error(
"Key should not be in deleted state at this point (in handle_get_bucket_acl)",
)?;
let mut grants: Vec<s3_xml::Grant> = vec![];
let kp = api_key.bucket_permissions(&bucket_id);
if kp.allow_owner {
grants.push(s3_xml::Grant {
grantee: create_grantee(&key_p, &api_key),
permission: s3_xml::Value("FULL_CONTROL".to_string()),
});
} else {
if kp.allow_read {
grants.push(s3_xml::Grant {
grantee: create_grantee(&key_p, &api_key),
permission: s3_xml::Value("READ".to_string()),
});
grants.push(s3_xml::Grant {
grantee: create_grantee(&key_p, &api_key),
permission: s3_xml::Value("READ_ACP".to_string()),
});
}
if kp.allow_write {
grants.push(s3_xml::Grant {
grantee: create_grantee(&key_p, &api_key),
permission: s3_xml::Value("WRITE".to_string()),
});
}
}
let access_control_policy = s3_xml::AccessControlPolicy {
xmlns: (),
owner: None,
acl: s3_xml::AccessControlList { entries: grants },
};
let xml = s3_xml::to_xml_with_header(&access_control_policy)?;
trace!("xml: {}", xml);
Ok(Response::builder()
.header("Content-Type", "application/xml")
.body(string_body(xml))?)
}
pub async fn handle_list_buckets( pub async fn handle_list_buckets(
garage: &Garage, garage: &Garage,
api_key: &Key, api_key: &Key,
@ -172,7 +221,7 @@ pub async fn handle_create_bucket(
} }
// Create the bucket! // Create the bucket!
if !is_valid_bucket_name(&bucket_name) { if !is_valid_bucket_name(&bucket_name, garage.config.allow_punycode) {
return Err(Error::bad_request(format!( return Err(Error::bad_request(format!(
"{}: {}", "{}: {}",
bucket_name, INVALID_BUCKET_NAME_MESSAGE bucket_name, INVALID_BUCKET_NAME_MESSAGE
@ -241,11 +290,11 @@ pub async fn handle_delete_bucket(ctx: ReqCtx) -> Result<Response<ResBody>, Erro
// 1. delete bucket alias // 1. delete bucket alias
if is_local_alias { if is_local_alias {
helper helper
.unset_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name) .purge_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name)
.await?; .await?;
} else { } else {
helper helper
.unset_global_bucket_alias(*bucket_id, bucket_name) .purge_global_bucket_alias(*bucket_id, bucket_name)
.await?; .await?;
} }
@ -311,6 +360,15 @@ fn parse_create_bucket_xml(xml_bytes: &[u8]) -> Option<Option<String>> {
Some(ret) Some(ret)
} }
fn create_grantee(key_params: &KeyParams, api_key: &Key) -> s3_xml::Grantee {
s3_xml::Grantee {
xmlns_xsi: (),
typ: "CanonicalUser".to_string(),
display_name: Some(s3_xml::Value(key_params.name.get().to_string())),
id: Some(s3_xml::Value(api_key.key_id.to_string())),
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -26,9 +26,10 @@ use garage_api_common::signature::checksum::*;
use crate::api_server::{ReqBody, ResBody}; use crate::api_server::{ReqBody, ResBody};
use crate::encryption::EncryptionParams; use crate::encryption::EncryptionParams;
use crate::error::*; use crate::error::*;
use crate::get::{full_object_byte_stream, PreconditionHeaders}; use crate::get::{check_version_not_deleted, full_object_byte_stream, PreconditionHeaders};
use crate::multipart; use crate::multipart;
use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult}; use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult};
use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION;
use crate::xml::{self as s3_xml, xmlns_tag}; use crate::xml::{self as s3_xml, xmlns_tag};
pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName = pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName =
@ -84,7 +85,18 @@ pub async fn handle_copy(
Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => { Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => {
extract_metadata_headers(req.headers())? extract_metadata_headers(req.headers())?
} }
_ => source_object_meta_inner.into_owned().headers, _ => {
// The x-amz-website-redirect-location header is not copied, instead
// it is replaced by the value from the request (or removed if no
// value was specified)
let is_redirect =
|(key, _): &(String, String)| key == X_AMZ_WEBSITE_REDIRECT_LOCATION.as_str();
let mut headers: Vec<_> = source_object_meta_inner.headers.clone();
headers.retain(|h| !is_redirect(h));
let new_headers = extract_metadata_headers(req.headers())?;
headers.extend(new_headers.into_iter().filter(is_redirect));
headers
}
}, },
checksum: source_checksum, checksum: source_checksum,
}; };
@ -225,6 +237,7 @@ async fn handle_copy_metaonly(
.get(&source_version.uuid, &EmptyKey) .get(&source_version.uuid, &EmptyKey)
.await?; .await?;
let source_version = source_version.ok_or(Error::NoSuchKey)?; let source_version = source_version.ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&source_version)?;
// Write an "uploading" marker in Object table // Write an "uploading" marker in Object table
// This holds a reference to the object in the Version table // This holds a reference to the object in the Version table
@ -416,6 +429,7 @@ pub async fn handle_upload_part_copy(
.get(&source_object_version.uuid, &EmptyKey) .get(&source_object_version.uuid, &EmptyKey)
.await? .await?
.ok_or(Error::NoSuchKey)?; .ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&source_version)?;
// We want to reuse blocks from the source version as much as possible. // We want to reuse blocks from the source version as much as possible.
// However, we still need to get the data from these blocks // However, we still need to get the data from these blocks
@ -547,6 +561,7 @@ pub async fn handle_upload_part_copy(
let mut current_offset = 0; let mut current_offset = 0;
let mut next_block = defragmenter.next().await?; let mut next_block = defragmenter.next().await?;
let mut blocks_to_dup = dest_version.clone();
// TODO this could be optimized similarly to read_and_put_blocks // TODO this could be optimized similarly to read_and_put_blocks
// low priority because uploadpartcopy is rarely used // low priority because uploadpartcopy is rarely used
@ -576,8 +591,7 @@ pub async fn handle_upload_part_copy(
.unwrap()?; .unwrap()?;
checksummer = checksummer_updated; checksummer = checksummer_updated;
dest_version.blocks.clear(); let (version_block_key, version_block) = (
dest_version.blocks.put(
VersionBlockKey { VersionBlockKey {
part_number, part_number,
offset: current_offset, offset: current_offset,
@ -589,25 +603,23 @@ pub async fn handle_upload_part_copy(
); );
current_offset += data_len; current_offset += data_len;
let next = if let Some(final_data) = data_to_upload {
dest_version.blocks.clear();
dest_version.blocks.put(version_block_key, version_block);
let block_ref = BlockRef { let block_ref = BlockRef {
block: final_hash, block: final_hash,
version: dest_version_id, version: dest_version_id,
deleted: false.into(), deleted: false.into(),
}; };
let (_, _, _, next) = futures::try_join!( let (_, _, _, next) = futures::try_join!(
// Thing 1: if the block is not exactly a block that existed before, // Thing 1: if the block is not exactly a block that existed before,
// we need to insert that data as a new block. // we need to insert that data as a new block.
async { garage.block_manager.rpc_put_block(
if let Some(final_data) = data_to_upload { final_hash,
garage final_data,
.block_manager dest_encryption.is_encrypted(),
.rpc_put_block(final_hash, final_data, dest_encryption.is_encrypted(), None) None
.await ),
} else {
Ok(())
}
},
// Thing 2: we need to insert the block in the version // Thing 2: we need to insert the block in the version
garage.version_table.insert(&dest_version), garage.version_table.insert(&dest_version),
// Thing 3: we need to add a block reference // Thing 3: we need to add a block reference
@ -615,11 +627,32 @@ pub async fn handle_upload_part_copy(
// Thing 4: we need to read the next block // Thing 4: we need to read the next block
defragmenter.next(), defragmenter.next(),
)?; )?;
next
} else {
blocks_to_dup.blocks.put(version_block_key, version_block);
defragmenter.next().await?
};
next_block = next; next_block = next;
} }
assert_eq!(current_offset, source_range.length); assert_eq!(current_offset, source_range.length);
// Put the duplicated blocks into the version & block_refs tables
let block_refs_to_put = blocks_to_dup
.blocks
.items()
.iter()
.map(|b| BlockRef {
block: b.1.hash,
version: dest_version_id,
deleted: false.into(),
})
.collect::<Vec<_>>();
futures::try_join!(
garage.version_table.insert(&blocks_to_dup),
garage.block_ref_table.insert_many(&block_refs_to_put[..]),
)?;
let checksums = checksummer.finalize(); let checksums = checksummer.finalize();
let etag = dest_encryption.etag_from_md5(&checksums.md5); let etag = dest_encryption.etag_from_md5(&checksums.md5);
let checksum = checksums.extract(dest_object_checksum_algorithm); let checksum = checksums.extract(dest_object_checksum_algorithm);

View file

@ -88,7 +88,9 @@ pub async fn handle_put_cors(
pub struct CorsConfiguration { pub struct CorsConfiguration {
#[serde(serialize_with = "xmlns_tag", skip_deserializing)] #[serde(serialize_with = "xmlns_tag", skip_deserializing)]
pub xmlns: (), pub xmlns: (),
#[serde(rename = "CORSRule")] // "default" is required to be able to parse an empty list of rules,
// cf https://docs.rs/quick-xml/latest/quick_xml/de/#sequences-xsall-and-xssequence-xml-schema-types
#[serde(rename = "CORSRule", default)]
pub cors_rules: Vec<CorsRule>, pub cors_rules: Vec<CorsRule>,
} }
@ -270,4 +272,26 @@ mod tests {
Ok(()) Ok(())
} }
#[test]
fn test_deserialize_norules() -> Result<(), Error> {
let message = r#"<?xml version="1.0" encoding="UTF-8"?>
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/" />"#;
let conf: CorsConfiguration = from_str(message).unwrap();
let ref_value = CorsConfiguration {
xmlns: (),
cors_rules: vec![],
};
assert_eq! {
ref_value,
conf
};
let message2 = to_xml_with_header(&ref_value)?;
let cleanup = |c: &str| c.replace(char::is_whitespace, "");
assert_eq!(cleanup(message), cleanup(&message2));
Ok(())
}
} }

View file

@ -1,8 +1,8 @@
use std::convert::TryInto; use std::convert::TryInto;
use err_derive::Error;
use hyper::header::HeaderValue; use hyper::header::HeaderValue;
use hyper::{HeaderMap, StatusCode}; use hyper::{HeaderMap, StatusCode};
use thiserror::Error;
use garage_model::helper::error::Error as HelperError; use garage_model::helper::error::Error as HelperError;
@ -25,67 +25,67 @@ use crate::xml as s3_xml;
/// Errors of this crate /// Errors of this crate
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "{}", _0)] #[error("{0}")]
/// Error from common error /// Error from common error
Common(#[error(source)] CommonError), Common(#[from] CommonError),
// Category: cannot process // Category: cannot process
/// Authorization Header Malformed /// Authorization Header Malformed
#[error(display = "Authorization header malformed, unexpected scope: {}", _0)] #[error("Authorization header malformed, unexpected scope: {0}")]
AuthorizationHeaderMalformed(String), AuthorizationHeaderMalformed(String),
/// The object requested don't exists /// The object requested don't exists
#[error(display = "Key not found")] #[error("Key not found")]
NoSuchKey, NoSuchKey,
/// The multipart upload requested don't exists /// The multipart upload requested don't exists
#[error(display = "Upload not found")] #[error("Upload not found")]
NoSuchUpload, NoSuchUpload,
/// Precondition failed (e.g. x-amz-copy-source-if-match) /// Precondition failed (e.g. x-amz-copy-source-if-match)
#[error(display = "At least one of the preconditions you specified did not hold")] #[error("At least one of the preconditions you specified did not hold")]
PreconditionFailed, PreconditionFailed,
/// Parts specified in CMU request do not match parts actually uploaded /// Parts specified in CMU request do not match parts actually uploaded
#[error(display = "Parts given to CompleteMultipartUpload do not match uploaded parts")] #[error("Parts given to CompleteMultipartUpload do not match uploaded parts")]
InvalidPart, InvalidPart,
/// Parts given to CompleteMultipartUpload were not in ascending order /// Parts given to CompleteMultipartUpload were not in ascending order
#[error(display = "Parts given to CompleteMultipartUpload were not in ascending order")] #[error("Parts given to CompleteMultipartUpload were not in ascending order")]
InvalidPartOrder, InvalidPartOrder,
/// In CompleteMultipartUpload: not enough data /// In CompleteMultipartUpload: not enough data
/// (here we are more lenient than AWS S3) /// (here we are more lenient than AWS S3)
#[error(display = "Proposed upload is smaller than the minimum allowed object size")] #[error("Proposed upload is smaller than the minimum allowed object size")]
EntityTooSmall, EntityTooSmall,
// Category: bad request // Category: bad request
/// The request contained an invalid UTF-8 sequence in its path or in other parameters /// The request contained an invalid UTF-8 sequence in its path or in other parameters
#[error(display = "Invalid UTF-8: {}", _0)] #[error("Invalid UTF-8: {0}")]
InvalidUtf8Str(#[error(source)] std::str::Utf8Error), InvalidUtf8Str(#[from] std::str::Utf8Error),
/// The request used an invalid path /// The request used an invalid path
#[error(display = "Invalid UTF-8: {}", _0)] #[error("Invalid UTF-8: {0}")]
InvalidUtf8String(#[error(source)] std::string::FromUtf8Error), InvalidUtf8String(#[from] std::string::FromUtf8Error),
/// The client sent invalid XML data /// The client sent invalid XML data
#[error(display = "Invalid XML: {}", _0)] #[error("Invalid XML: {0}")]
InvalidXml(String), InvalidXml(String),
/// The client sent a range header with invalid value /// The client sent a range header with invalid value
#[error(display = "Invalid HTTP range: {:?}", _0)] #[error("Invalid HTTP range: {0:?}")]
InvalidRange(#[error(from)] (http_range::HttpRangeParseError, u64)), InvalidRange((http_range::HttpRangeParseError, u64)),
/// The client sent a range header with invalid value /// The client sent a range header with invalid value
#[error(display = "Invalid encryption algorithm: {:?}, should be AES256", _0)] #[error("Invalid encryption algorithm: {0:?}, should be AES256")]
InvalidEncryptionAlgorithm(String), InvalidEncryptionAlgorithm(String),
/// The provided digest (checksum) value was invalid /// The provided digest (checksum) value was invalid
#[error(display = "Invalid digest: {}", _0)] #[error("Invalid digest: {0}")]
InvalidDigest(String), InvalidDigest(String),
/// The client sent a request for an action not supported by garage /// The client sent a request for an action not supported by garage
#[error(display = "Unimplemented action: {}", _0)] #[error("Unimplemented action: {0}")]
NotImplemented(String), NotImplemented(String),
} }
@ -99,6 +99,12 @@ impl From<HelperError> for Error {
} }
} }
impl From<(http_range::HttpRangeParseError, u64)> for Error {
fn from(err: (http_range::HttpRangeParseError, u64)) -> Error {
Error::InvalidRange(err)
}
}
impl From<roxmltree::Error> for Error { impl From<roxmltree::Error> for Error {
fn from(err: roxmltree::Error) -> Self { fn from(err: roxmltree::Error) -> Self {
Self::InvalidXml(format!("{}", err)) Self::InvalidXml(format!("{}", err))

View file

@ -19,12 +19,13 @@ use garage_net::stream::ByteStream;
use garage_rpc::rpc_helper::OrderTag; use garage_rpc::rpc_helper::OrderTag;
use garage_table::EmptyKey; use garage_table::EmptyKey;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::OkOrMessage; use garage_util::error::{Error as UtilError, OkOrMessage};
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_model::s3::object_table::*; use garage_model::s3::object_table::*;
use garage_model::s3::version_table::*; use garage_model::s3::version_table::*;
use garage_api_common::common_error::CommonError;
use garage_api_common::helpers::*; use garage_api_common::helpers::*;
use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE}; use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE};
@ -215,6 +216,7 @@ pub async fn handle_head_without_ctx(
.get(&object_version.uuid, &EmptyKey) .get(&object_version.uuid, &EmptyKey)
.await? .await?
.ok_or(Error::NoSuchKey)?; .ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&version)?;
let (part_offset, part_end) = let (part_offset, part_end) =
calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?; calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?;
@ -365,6 +367,21 @@ pub async fn handle_get_without_ctx(
} }
} }
pub(crate) fn check_version_not_deleted(version: &Version) -> Result<(), Error> {
if version.deleted.get() {
// the version was deleted between when the object_table was consulted
// and now, this could mean the object was deleted, or overriden.
// Rather than say the key doesn't exist, return a transient error
// to signal the client to try again.
return Err(CommonError::InternalError(UtilError::Message(
"conflict/inconsistency between object and version state, version is deleted"
.to_string(),
))
.into());
}
Ok(())
}
async fn handle_get_full( async fn handle_get_full(
garage: Arc<Garage>, garage: Arc<Garage>,
version: &ObjectVersion, version: &ObjectVersion,
@ -431,6 +448,7 @@ pub fn full_object_byte_stream(
.ok_or_message("channel closed")?; .ok_or_message("channel closed")?;
let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?; let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&version)?;
for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) { for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
let stream_block_i = encryption let stream_block_i = encryption
.get_block(&garage, &vb.hash, Some(order_stream.order(i as u64))) .get_block(&garage, &vb.hash, Some(order_stream.order(i as u64)))
@ -446,6 +464,14 @@ pub fn full_object_byte_stream(
{ {
Ok(()) => (), Ok(()) => (),
Err(e) => { Err(e) => {
// TODO i think this is a bad idea, we should log
// an error and stop there. If the error happens to
// be exactly the size of what hasn't been streamed
// yet, the client will see the request as a
// success
// instead truncating the output notify the client
// something happened with their download, so that
// they can retry it
let _ = tx.send(error_stream_item(e)).await; let _ = tx.send(error_stream_item(e)).await;
} }
} }
@ -497,7 +523,7 @@ async fn handle_get_range(
.get(&version.uuid, &EmptyKey) .get(&version.uuid, &EmptyKey)
.await? .await?
.ok_or(Error::NoSuchKey)?; .ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&version)?;
let body = let body =
body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end); body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end);
Ok(resp_builder.body(body)?) Ok(resp_builder.body(body)?)
@ -548,6 +574,8 @@ async fn handle_get_part(
.await? .await?
.ok_or(Error::NoSuchKey)?; .ok_or(Error::NoSuchKey)?;
check_version_not_deleted(&version)?;
let (begin, end) = let (begin, end) =
calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?; calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?;
@ -817,7 +845,9 @@ impl PreconditionHeaders {
} }
fn check(&self, v: &ObjectVersion, etag: &str) -> Result<Option<StatusCode>, Error> { fn check(&self, v: &ObjectVersion, etag: &str) -> Result<Option<StatusCode>, Error> {
let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp); // we store date with ms precision, but headers are precise to the second: truncate
// the timestamp to handle the same-second edge case
let v_date = UNIX_EPOCH + Duration::from_secs(v.timestamp / 1000);
// Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6 // Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6

View file

@ -27,7 +27,7 @@ pub async fn handle_get_lifecycle(ctx: ReqCtx) -> Result<Response<ResBody>, Erro
.body(string_body(xml))?) .body(string_body(xml))?)
} else { } else {
Ok(Response::builder() Ok(Response::builder()
.status(StatusCode::NO_CONTENT) .status(StatusCode::NOT_FOUND)
.body(empty_body())?) .body(empty_body())?)
} }
} }

View file

@ -141,10 +141,26 @@ pub async fn handle_post_object(
let mut conditions = decoded_policy.into_conditions()?; let mut conditions = decoded_policy.into_conditions()?;
// If there are conditions on the bucket name, check these against the actual bucket_name rather
// than the one in params, which is allowed to be absent.
if let Some(conds) = conditions.params.remove("bucket") {
for cond in conds {
let ok = match cond {
Operation::Equal(s) => s.as_str() == bucket_name,
Operation::StartsWith(s) => bucket_name.starts_with(&s),
};
if !ok {
return Err(Error::bad_request(
"Key 'bucket' has value not allowed in policy",
));
}
}
}
for (param_key, value) in params.iter() { for (param_key, value) in params.iter() {
let param_key = param_key.as_str(); let param_key = param_key.as_str();
match param_key { match param_key {
"policy" | "x-amz-signature" => (), // this is always accepted, as it's required to validate other fields "policy" | "x-amz-signature" | "bucket" => (), // this is always accepted, as it's required to validate other fields
"content-type" => { "content-type" => {
let conds = conditions.params.remove("content-type").ok_or_else(|| { let conds = conditions.params.remove("content-type").ok_or_else(|| {
Error::bad_request(format!("Key '{}' is not allowed in policy", param_key)) Error::bad_request(format!("Key '{}' is not allowed in policy", param_key))

View file

@ -39,8 +39,6 @@ use crate::encryption::EncryptionParams;
use crate::error::*; use crate::error::*;
use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION; use crate::website::X_AMZ_WEBSITE_REDIRECT_LOCATION;
const PUT_BLOCKS_MAX_PARALLEL: usize = 3;
pub(crate) struct SaveStreamResult { pub(crate) struct SaveStreamResult {
pub(crate) version_uuid: Uuid, pub(crate) version_uuid: Uuid,
pub(crate) version_timestamp: u64, pub(crate) version_timestamp: u64,
@ -493,7 +491,7 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
}; };
let recv_next = async { let recv_next = async {
// If more than a maximum number of writes are in progress, don't add more for now // If more than a maximum number of writes are in progress, don't add more for now
if currently_running >= PUT_BLOCKS_MAX_PARALLEL { if currently_running >= ctx.garage.config.block_max_concurrent_writes_per_request {
futures::future::pending().await futures::future::pending().await
} else { } else {
block_rx3.recv().await block_rx3.recv().await

View file

@ -13,6 +13,10 @@ pub fn xmlns_tag<S: Serializer>(_v: &(), s: S) -> Result<S::Ok, S::Error> {
s.serialize_str("http://s3.amazonaws.com/doc/2006-03-01/") s.serialize_str("http://s3.amazonaws.com/doc/2006-03-01/")
} }
pub fn xmlns_xsi_tag<S: Serializer>(_v: &(), s: S) -> Result<S::Ok, S::Error> {
s.serialize_str("http://www.w3.org/2001/XMLSchema-instance")
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct Value(#[serde(rename = "$value")] pub String); pub struct Value(#[serde(rename = "$value")] pub String);
@ -319,6 +323,42 @@ pub struct PostObject {
pub etag: Value, pub etag: Value,
} }
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Grantee {
#[serde(rename = "xmlns:xsi", serialize_with = "xmlns_xsi_tag")]
pub xmlns_xsi: (),
#[serde(rename = "xsi:type")]
pub typ: String,
#[serde(rename = "DisplayName")]
pub display_name: Option<Value>,
#[serde(rename = "ID")]
pub id: Option<Value>,
}
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Grant {
#[serde(rename = "Grantee")]
pub grantee: Grantee,
#[serde(rename = "Permission")]
pub permission: Value,
}
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct AccessControlList {
#[serde(rename = "Grant")]
pub entries: Vec<Grant>,
}
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct AccessControlPolicy {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
#[serde(rename = "Owner")]
pub owner: Option<Owner>,
#[serde(rename = "AccessControlList")]
pub acl: AccessControlList,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -427,6 +467,43 @@ mod tests {
Ok(()) Ok(())
} }
#[test]
fn get_bucket_acl_result() -> Result<(), ApiError> {
let grant = Grant {
grantee: Grantee {
xmlns_xsi: (),
typ: "CanonicalUser".to_string(),
display_name: Some(Value("owner_name".to_string())),
id: Some(Value("qsdfjklm".to_string())),
},
permission: Value("FULL_CONTROL".to_string()),
};
let get_bucket_acl = AccessControlPolicy {
xmlns: (),
owner: None,
acl: AccessControlList {
entries: vec![grant],
},
};
assert_eq!(
to_xml_with_header(&get_bucket_acl)?,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
<AccessControlPolicy xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">\
<AccessControlList>\
<Grant>\
<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"CanonicalUser\">\
<DisplayName>owner_name</DisplayName>\
<ID>qsdfjklm</ID>\
</Grantee>\
<Permission>FULL_CONTROL</Permission>\
</Grant>\
</AccessControlList>\
</AccessControlPolicy>"
);
Ok(())
}
#[test] #[test]
fn delete_result() -> Result<(), ApiError> { fn delete_result() -> Result<(), ApiError> {
let delete_result = DeleteResult { let delete_result = DeleteResult {

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_block" name = "garage_block"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"

View file

@ -50,6 +50,8 @@ pub const INLINE_THRESHOLD: usize = 3072;
// to delete the block locally. // to delete the block locally.
pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600); pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600);
const BLOCK_READ_SEMAPHORE_TIMEOUT: Duration = Duration::from_secs(15);
/// RPC messages used to share blocks of data between nodes /// RPC messages used to share blocks of data between nodes
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum BlockRpc { pub enum BlockRpc {
@ -87,6 +89,7 @@ pub struct BlockManager {
disable_scrub: bool, disable_scrub: bool,
mutation_lock: Vec<Mutex<BlockManagerLocked>>, mutation_lock: Vec<Mutex<BlockManagerLocked>>,
read_semaphore: Semaphore,
pub rc: BlockRc, pub rc: BlockRc,
pub resync: BlockResyncManager, pub resync: BlockResyncManager,
@ -176,6 +179,8 @@ impl BlockManager {
.iter() .iter()
.map(|_| Mutex::new(BlockManagerLocked())) .map(|_| Mutex::new(BlockManagerLocked()))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
read_semaphore: Semaphore::new(config.block_max_concurrent_reads),
rc, rc,
resync, resync,
system, system,
@ -408,8 +413,8 @@ impl BlockManager {
} }
/// Get number of items in the refcount table /// Get number of items in the refcount table
pub fn rc_len(&self) -> Result<usize, Error> { pub fn rc_approximate_len(&self) -> Result<usize, Error> {
Ok(self.rc.rc_table.len()?) Ok(self.rc.rc_table.approximate_len()?)
} }
/// Send command to start/stop/manager scrub worker /// Send command to start/stop/manager scrub worker
@ -427,7 +432,7 @@ impl BlockManager {
/// List all resync errors /// List all resync errors
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> { pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
let mut blocks = Vec::with_capacity(self.resync.errors.len()?); let mut blocks = Vec::with_capacity(self.resync.errors.approximate_len()?);
for ent in self.resync.errors.iter()? { for ent in self.resync.errors.iter()? {
let (hash, cnt) = ent?; let (hash, cnt) = ent?;
let cnt = ErrorCounter::decode(&cnt); let cnt = ErrorCounter::decode(&cnt);
@ -557,9 +562,6 @@ impl BlockManager {
match self.find_block(hash).await { match self.find_block(hash).await {
Some(p) => self.read_block_from(hash, &p).await, Some(p) => self.read_block_from(hash, &p).await,
None => { None => {
// Not found but maybe we should have had it ??
self.resync
.put_to_resync(hash, 2 * self.system.rpc_helper().rpc_timeout())?;
return Err(Error::Message(format!( return Err(Error::Message(format!(
"block {:?} not found on node", "block {:?} not found on node",
hash hash
@ -581,6 +583,15 @@ impl BlockManager {
) -> Result<DataBlock, Error> { ) -> Result<DataBlock, Error> {
let (header, path) = block_path.as_parts_ref(); let (header, path) = block_path.as_parts_ref();
let permit = tokio::select! {
sem = self.read_semaphore.acquire() => sem.ok_or_message("acquire read semaphore")?,
_ = tokio::time::sleep(BLOCK_READ_SEMAPHORE_TIMEOUT) => {
self.metrics.block_read_semaphore_timeouts.add(1);
debug!("read block {:?}: read_semaphore acquire timeout", hash);
return Err(Error::Message("read block: read_semaphore acquire timeout".into()));
}
};
let mut f = fs::File::open(&path).await?; let mut f = fs::File::open(&path).await?;
let mut data = vec![]; let mut data = vec![];
f.read_to_end(&mut data).await?; f.read_to_end(&mut data).await?;
@ -605,6 +616,8 @@ impl BlockManager {
return Err(Error::CorruptData(*hash)); return Err(Error::CorruptData(*hash));
} }
drop(permit);
Ok(data) Ok(data)
} }
@ -770,6 +783,7 @@ impl BlockManagerLocked {
let mut f = fs::File::create(&path_tmp).await?; let mut f = fs::File::create(&path_tmp).await?;
f.write_all(data).await?; f.write_all(data).await?;
f.flush().await?;
mgr.metrics.bytes_written.add(data.len() as u64); mgr.metrics.bytes_written.add(data.len() as u64);
if mgr.data_fsync { if mgr.data_fsync {

View file

@ -22,6 +22,7 @@ pub struct BlockManagerMetrics {
pub(crate) bytes_read: BoundCounter<u64>, pub(crate) bytes_read: BoundCounter<u64>,
pub(crate) block_read_duration: BoundValueRecorder<f64>, pub(crate) block_read_duration: BoundValueRecorder<f64>,
pub(crate) block_read_semaphore_timeouts: BoundCounter<u64>,
pub(crate) bytes_written: BoundCounter<u64>, pub(crate) bytes_written: BoundCounter<u64>,
pub(crate) block_write_duration: BoundValueRecorder<f64>, pub(crate) block_write_duration: BoundValueRecorder<f64>,
pub(crate) delete_counter: BoundCounter<u64>, pub(crate) delete_counter: BoundCounter<u64>,
@ -50,7 +51,7 @@ impl BlockManagerMetrics {
.init(), .init(),
_rc_size: meter _rc_size: meter
.u64_value_observer("block.rc_size", move |observer| { .u64_value_observer("block.rc_size", move |observer| {
if let Ok(value) = rc_tree.len() { if let Ok(value) = rc_tree.approximate_len() {
observer.observe(value as u64, &[]) observer.observe(value as u64, &[])
} }
}) })
@ -58,7 +59,7 @@ impl BlockManagerMetrics {
.init(), .init(),
_resync_queue_len: meter _resync_queue_len: meter
.u64_value_observer("block.resync_queue_length", move |observer| { .u64_value_observer("block.resync_queue_length", move |observer| {
if let Ok(value) = resync_queue.len() { if let Ok(value) = resync_queue.approximate_len() {
observer.observe(value as u64, &[]); observer.observe(value as u64, &[]);
} }
}) })
@ -68,7 +69,7 @@ impl BlockManagerMetrics {
.init(), .init(),
_resync_errored_blocks: meter _resync_errored_blocks: meter
.u64_value_observer("block.resync_errored_blocks", move |observer| { .u64_value_observer("block.resync_errored_blocks", move |observer| {
if let Ok(value) = resync_errors.len() { if let Ok(value) = resync_errors.approximate_len() {
observer.observe(value as u64, &[]); observer.observe(value as u64, &[]);
} }
}) })
@ -119,6 +120,11 @@ impl BlockManagerMetrics {
.with_description("Duration of block read operations") .with_description("Duration of block read operations")
.init() .init()
.bind(&[]), .bind(&[]),
block_read_semaphore_timeouts: meter
.u64_counter("block.read_semaphore_timeouts")
.with_description("Number of block reads that failed due to semaphore acquire timeout")
.init()
.bind(&[]),
bytes_written: meter bytes_written: meter
.u64_counter("block.bytes_written") .u64_counter("block.bytes_written")
.with_description("Number of bytes written to disk") .with_description("Number of bytes written to disk")

View file

@ -106,13 +106,13 @@ impl BlockResyncManager {
} }
/// Get length of resync queue /// Get length of resync queue
pub fn queue_len(&self) -> Result<usize, Error> { pub fn queue_approximate_len(&self) -> Result<usize, Error> {
Ok(self.queue.len()?) Ok(self.queue.approximate_len()?)
} }
/// Get number of blocks that have an error /// Get number of blocks that have an error
pub fn errors_len(&self) -> Result<usize, Error> { pub fn errors_approximate_len(&self) -> Result<usize, Error> {
Ok(self.errors.len()?) Ok(self.errors.approximate_len()?)
} }
/// Clear the error counter for a block and put it in queue immediately /// Clear the error counter for a block and put it in queue immediately
@ -133,6 +133,14 @@ impl BlockResyncManager {
))) )))
} }
/// Clear the entire resync queue and list of errored blocks
/// Corresponds to `garage repair clear-resync-queue`
pub fn clear_resync_queue(&self) -> Result<(), Error> {
self.queue.clear()?;
self.errors.clear()?;
Ok(())
}
pub fn register_bg_vars(&self, vars: &mut vars::BgVars) { pub fn register_bg_vars(&self, vars: &mut vars::BgVars) {
let notify = self.notify.clone(); let notify = self.notify.clone();
vars.register_rw( vars.register_rw(
@ -548,9 +556,11 @@ impl Worker for ResyncWorker {
} }
WorkerStatus { WorkerStatus {
queue_length: Some(self.manager.resync.queue_len().unwrap_or(0) as u64), queue_length: Some(self.manager.resync.queue_approximate_len().unwrap_or(0) as u64),
tranquility: Some(tranquility), tranquility: Some(tranquility),
persistent_errors: Some(self.manager.resync.errors_len().unwrap_or(0) as u64), persistent_errors: Some(
self.manager.resync.errors_approximate_len().unwrap_or(0) as u64
),
..Default::default() ..Default::default()
} }
} }

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_db" name = "garage_db"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -12,14 +12,18 @@ readme = "../../README.md"
path = "lib.rs" path = "lib.rs"
[dependencies] [dependencies]
err-derive.workspace = true thiserror.workspace = true
tracing.workspace = true tracing.workspace = true
heed = { workspace = true, optional = true } heed = { workspace = true, optional = true }
rusqlite = { workspace = true, optional = true, features = ["backup"] } rusqlite = { workspace = true, optional = true, features = ["backup"] }
r2d2 = { workspace = true, optional = true } r2d2 = { workspace = true, optional = true }
r2d2_sqlite = { workspace = true, optional = true } r2d2_sqlite = { workspace = true, optional = true }
fjall = { workspace = true, optional = true }
parking_lot = { workspace = true, optional = true }
[dev-dependencies] [dev-dependencies]
mktemp.workspace = true mktemp.workspace = true
@ -27,4 +31,5 @@ mktemp.workspace = true
default = [ "lmdb", "sqlite" ] default = [ "lmdb", "sqlite" ]
bundled-libs = [ "rusqlite?/bundled" ] bundled-libs = [ "rusqlite?/bundled" ]
lmdb = [ "heed" ] lmdb = [ "heed" ]
fjall = [ "dep:fjall", "dep:parking_lot" ]
sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ] sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ]

453
src/db/fjall_adapter.rs Normal file
View file

@ -0,0 +1,453 @@
use core::ops::Bound;
use std::path::PathBuf;
use std::sync::Arc;
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
use fjall::{
PartitionCreateOptions, PersistMode, TransactionalKeyspace, TransactionalPartitionHandle,
WriteTransaction,
};
use crate::{
open::{Engine, OpenOpt},
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
TxResult, TxValueIter, Value, ValueIter,
};
pub use fjall;
// --
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
info!("Opening Fjall database at: {}", path.display());
if opt.fsync {
return Err(Error(
"metadata_fsync is not supported with the Fjall database engine".into(),
));
}
let mut config = fjall::Config::new(path);
if let Some(block_cache_size) = opt.fjall_block_cache_size {
config = config.cache_size(block_cache_size as u64);
}
let keyspace = config.open_transactional()?;
Ok(FjallDb::init(keyspace))
}
// -- err
impl From<fjall::Error> for Error {
fn from(e: fjall::Error) -> Error {
Error(format!("fjall: {}", e).into())
}
}
impl From<fjall::LsmError> for Error {
fn from(e: fjall::LsmError) -> Error {
Error(format!("fjall lsm_tree: {}", e).into())
}
}
impl From<fjall::Error> for TxOpError {
fn from(e: fjall::Error) -> TxOpError {
TxOpError(e.into())
}
}
// -- db
pub struct FjallDb {
keyspace: TransactionalKeyspace,
trees: RwLock<Vec<(String, TransactionalPartitionHandle)>>,
}
type ByteRefRangeBound<'r> = (Bound<&'r [u8]>, Bound<&'r [u8]>);
impl FjallDb {
pub fn init(keyspace: TransactionalKeyspace) -> Db {
let s = Self {
keyspace,
trees: RwLock::new(Vec::new()),
};
Db(Arc::new(s))
}
fn get_tree(
&self,
i: usize,
) -> Result<MappedRwLockReadGuard<'_, TransactionalPartitionHandle>> {
RwLockReadGuard::try_map(self.trees.read(), |trees: &Vec<_>| {
trees.get(i).map(|tup| &tup.1)
})
.map_err(|_| Error("invalid tree id".into()))
}
}
impl IDb for FjallDb {
fn engine(&self) -> String {
"Fjall (EXPERIMENTAL!)".into()
}
fn open_tree(&self, name: &str) -> Result<usize> {
let mut trees = self.trees.write();
let safe_name = encode_name(name)?;
if let Some(i) = trees.iter().position(|(name, _)| *name == safe_name) {
Ok(i)
} else {
let tree = self
.keyspace
.open_partition(&safe_name, PartitionCreateOptions::default())?;
let i = trees.len();
trees.push((safe_name, tree));
Ok(i)
}
}
fn list_trees(&self) -> Result<Vec<String>> {
Ok(self
.keyspace
.list_partitions()
.iter()
.map(|n| decode_name(&n))
.collect::<Result<Vec<_>>>()?)
}
fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
std::fs::create_dir_all(base_path)?;
let path = Engine::Fjall.db_path(base_path);
let source_state = self.keyspace.read_tx();
let copy_keyspace = fjall::Config::new(path).open()?;
for partition_name in self.keyspace.list_partitions() {
let source_partition = self
.keyspace
.open_partition(&partition_name, PartitionCreateOptions::default())?;
let copy_partition =
copy_keyspace.open_partition(&partition_name, PartitionCreateOptions::default())?;
for entry in source_state.iter(&source_partition) {
let (key, value) = entry?;
copy_partition.insert(key, value)?;
}
}
copy_keyspace.persist(PersistMode::SyncAll)?;
Ok(())
}
// ----
fn get(&self, tree_idx: usize, key: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
let val = tx.get(&tree, key)?;
match val {
None => Ok(None),
Some(v) => Ok(Some(v.to_vec())),
}
}
fn approximate_len(&self, tree_idx: usize) -> Result<usize> {
let tree = self.get_tree(tree_idx)?;
Ok(tree.approximate_len())
}
fn is_empty(&self, tree_idx: usize) -> Result<bool> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
Ok(tx.is_empty(&tree)?)
}
fn insert(&self, tree_idx: usize, key: &[u8], value: &[u8]) -> Result<()> {
let tree = self.get_tree(tree_idx)?;
let mut tx = self.keyspace.write_tx();
tx.insert(&tree, key, value);
tx.commit()?;
Ok(())
}
fn remove(&self, tree_idx: usize, key: &[u8]) -> Result<()> {
let tree = self.get_tree(tree_idx)?;
let mut tx = self.keyspace.write_tx();
tx.remove(&tree, key);
tx.commit()?;
Ok(())
}
fn clear(&self, tree_idx: usize) -> Result<()> {
let mut trees = self.trees.write();
if tree_idx >= trees.len() {
return Err(Error("invalid tree id".into()));
}
let (name, tree) = trees.remove(tree_idx);
self.keyspace.delete_partition(tree)?;
let tree = self
.keyspace
.open_partition(&name, PartitionCreateOptions::default())?;
trees.insert(tree_idx, (name, tree));
Ok(())
}
fn iter(&self, tree_idx: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
Ok(Box::new(tx.iter(&tree).map(iterator_remap)))
}
fn iter_rev(&self, tree_idx: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
Ok(Box::new(tx.iter(&tree).rev().map(iterator_remap)))
}
fn range<'r>(
&self,
tree_idx: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
Ok(Box::new(
tx.range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high))
.map(iterator_remap),
))
}
fn range_rev<'r>(
&self,
tree_idx: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let tx = self.keyspace.read_tx();
Ok(Box::new(
tx.range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high))
.rev()
.map(iterator_remap),
))
}
// ----
fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> {
let trees = self.trees.read();
let mut tx = FjallTx {
trees: &trees[..],
tx: self.keyspace.write_tx(),
};
let res = f.try_on(&mut tx);
match res {
TxFnResult::Ok(on_commit) => {
tx.tx.commit().map_err(Error::from).map_err(TxError::Db)?;
Ok(on_commit)
}
TxFnResult::Abort => {
tx.tx.rollback();
Err(TxError::Abort(()))
}
TxFnResult::DbErr => {
tx.tx.rollback();
Err(TxError::Db(Error(
"(this message will be discarded)".into(),
)))
}
}
}
}
// ----
struct FjallTx<'a> {
trees: &'a [(String, TransactionalPartitionHandle)],
tx: WriteTransaction<'a>,
}
impl<'a> FjallTx<'a> {
fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalPartitionHandle> {
self.trees.get(i).map(|tup| &tup.1).ok_or_else(|| {
TxOpError(Error(
"invalid tree id (it might have been openned after the transaction started)".into(),
))
})
}
}
impl<'a> ITx for FjallTx<'a> {
fn get(&self, tree_idx: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
let tree = self.get_tree(tree_idx)?;
match self.tx.get(tree, key)? {
Some(v) => Ok(Some(v.to_vec())),
None => Ok(None),
}
}
fn len(&self, tree_idx: usize) -> TxOpResult<usize> {
let tree = self.get_tree(tree_idx)?;
Ok(self.tx.len(tree)? as usize)
}
fn insert(&mut self, tree_idx: usize, key: &[u8], value: &[u8]) -> TxOpResult<()> {
let tree = self.get_tree(tree_idx)?.clone();
self.tx.insert(&tree, key, value);
Ok(())
}
fn remove(&mut self, tree_idx: usize, key: &[u8]) -> TxOpResult<()> {
let tree = self.get_tree(tree_idx)?.clone();
self.tx.remove(&tree, key);
Ok(())
}
fn clear(&mut self, _tree_idx: usize) -> TxOpResult<()> {
unimplemented!("LSM tree clearing in cross-partition transaction is not supported")
}
fn iter(&self, tree_idx: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree_idx)?.clone();
Ok(Box::new(self.tx.iter(&tree).map(iterator_remap_tx)))
}
fn iter_rev(&self, tree_idx: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree_idx)?.clone();
Ok(Box::new(self.tx.iter(&tree).rev().map(iterator_remap_tx)))
}
fn range<'r>(
&self,
tree_idx: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let low = clone_bound(low);
let high = clone_bound(high);
Ok(Box::new(
self.tx
.range::<Vec<u8>, ByteVecRangeBounds>(&tree, (low, high))
.map(iterator_remap_tx),
))
}
fn range_rev<'r>(
&self,
tree_idx: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree_idx)?;
let low = clone_bound(low);
let high = clone_bound(high);
Ok(Box::new(
self.tx
.range::<Vec<u8>, ByteVecRangeBounds>(&tree, (low, high))
.rev()
.map(iterator_remap_tx),
))
}
}
// -- maps fjall's (k, v) to ours
fn iterator_remap(r: fjall::Result<(fjall::Slice, fjall::Slice)>) -> Result<(Value, Value)> {
r.map(|(k, v)| (k.to_vec(), v.to_vec()))
.map_err(|e| e.into())
}
fn iterator_remap_tx(r: fjall::Result<(fjall::Slice, fjall::Slice)>) -> TxOpResult<(Value, Value)> {
r.map(|(k, v)| (k.to_vec(), v.to_vec()))
.map_err(|e| e.into())
}
// -- utils to deal with Garage's tightness on Bound lifetimes
type ByteVecBound = Bound<Vec<u8>>;
type ByteVecRangeBounds = (ByteVecBound, ByteVecBound);
fn clone_bound(bound: Bound<&[u8]>) -> ByteVecBound {
let value = match bound {
Bound::Excluded(v) | Bound::Included(v) => v.to_vec(),
Bound::Unbounded => vec![],
};
match bound {
Bound::Included(_) => Bound::Included(value),
Bound::Excluded(_) => Bound::Excluded(value),
Bound::Unbounded => Bound::Unbounded,
}
}
// -- utils to encode table names --
fn encode_name(s: &str) -> Result<String> {
let base = 'A' as u32;
let mut ret = String::with_capacity(s.len() + 10);
for c in s.chars() {
if c.is_alphanumeric() || c == '_' || c == '-' || c == '#' {
ret.push(c);
} else if c <= u8::MAX as char {
ret.push('$');
let c_hi = c as u32 / 16;
let c_lo = c as u32 % 16;
ret.push(char::from_u32(base + c_hi).unwrap());
ret.push(char::from_u32(base + c_lo).unwrap());
} else {
return Err(Error(
format!("table name {} could not be safely encoded", s).into(),
));
}
}
Ok(ret)
}
fn decode_name(s: &str) -> Result<String> {
use std::convert::TryFrom;
let errfn = || Error(format!("encoded table name {} is invalid", s).into());
let c_map = |c: char| {
let c = c as u32;
let base = 'A' as u32;
if (base..base + 16).contains(&c) {
Some(c - base)
} else {
None
}
};
let mut ret = String::with_capacity(s.len());
let mut it = s.chars();
while let Some(c) = it.next() {
if c == '$' {
let c_hi = it.next().and_then(c_map).ok_or_else(errfn)?;
let c_lo = it.next().and_then(c_map).ok_or_else(errfn)?;
let c_dec = char::try_from(c_hi * 16 + c_lo).map_err(|_| errfn())?;
ret.push(c_dec);
} else {
ret.push(c);
}
}
Ok(ret)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encdec_name() {
for name in [
"testname",
"test_name",
"test name",
"test$name",
"test:name@help.me$get/this**right",
] {
let encname = encode_name(name).unwrap();
assert!(!encname.contains(' '));
assert!(!encname.contains('.'));
assert!(!encname.contains('*'));
assert_eq!(*name, decode_name(&encname).unwrap());
}
}
}

View file

@ -1,6 +1,8 @@
#[macro_use] #[macro_use]
extern crate tracing; extern crate tracing;
#[cfg(feature = "fjall")]
pub mod fjall_adapter;
#[cfg(feature = "lmdb")] #[cfg(feature = "lmdb")]
pub mod lmdb_adapter; pub mod lmdb_adapter;
#[cfg(feature = "sqlite")] #[cfg(feature = "sqlite")]
@ -18,7 +20,7 @@ use std::cell::Cell;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use err_derive::Error; use thiserror::Error;
pub use open::*; pub use open::*;
@ -42,7 +44,7 @@ pub type TxValueIter<'a> = Box<dyn std::iter::Iterator<Item = TxOpResult<(Value,
// ---- // ----
#[derive(Debug, Error)] #[derive(Debug, Error)]
#[error(display = "{}", _0)] #[error("{0}")]
pub struct Error(pub Cow<'static, str>); pub struct Error(pub Cow<'static, str>);
impl From<std::io::Error> for Error { impl From<std::io::Error> for Error {
@ -54,7 +56,7 @@ impl From<std::io::Error> for Error {
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Error)] #[derive(Debug, Error)]
#[error(display = "{}", _0)] #[error("{0}")]
pub struct TxOpError(pub(crate) Error); pub struct TxOpError(pub(crate) Error);
pub type TxOpResult<T> = std::result::Result<T, TxOpError>; pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
@ -104,32 +106,44 @@ impl Db {
result: Cell::new(None), result: Cell::new(None),
}; };
let tx_res = self.0.transaction(&f); let tx_res = self.0.transaction(&f);
let ret = f let fn_res = f.result.into_inner();
.result
.into_inner()
.expect("Transaction did not store result");
match tx_res { match (tx_res, fn_res) {
Ok(on_commit) => match ret { (Ok(on_commit), Some(Ok(value))) => {
Ok(value) => { // Transaction succeeded
// TxFn stored the value to return to the user in fn_res
// tx_res contains the on_commit list of callbacks, run them now
on_commit.into_iter().for_each(|f| f()); on_commit.into_iter().for_each(|f| f());
Ok(value) Ok(value)
} }
_ => unreachable!(), (Err(TxError::Abort(())), Some(Err(TxError::Abort(e)))) => {
}, // Transaction was aborted by user code
Err(TxError::Abort(())) => match ret { // The abort error value is stored in fn_res
Err(TxError::Abort(e)) => Err(TxError::Abort(e)), Err(TxError::Abort(e))
_ => unreachable!(), }
}, (Err(TxError::Db(_tx_e)), Some(Err(TxError::Db(fn_e)))) => {
Err(TxError::Db(e2)) => match ret { // Transaction encountered a DB error in user code
// Ok was stored -> the error occurred when finalizing // The error value encountered is the one in fn_res,
// transaction // tx_res contains only a dummy error message
Ok(_) => Err(TxError::Db(e2)), Err(TxError::Db(fn_e))
// An error was already stored: that's the one we want to }
// return (Err(TxError::Db(tx_e)), None) => {
Err(TxError::Db(e)) => Err(TxError::Db(e)), // Transaction encounterred a DB error when initializing the transaction,
_ => unreachable!(), // before user code was called
}, Err(TxError::Db(tx_e))
}
(Err(TxError::Db(tx_e)), Some(Ok(_))) => {
// Transaction encounterred a DB error when commiting the transaction,
// after user code was called
Err(TxError::Db(tx_e))
}
(tx_res, fn_res) => {
panic!(
"unexpected error case: tx_res={:?}, fn_res={:?}",
tx_res.map(|_| "..."),
fn_res.map(|x| x.map(|_| "...").map_err(|_| "..."))
);
}
} }
} }
@ -152,7 +166,7 @@ impl Db {
let tree_names = other.list_trees()?; let tree_names = other.list_trees()?;
for name in tree_names { for name in tree_names {
let tree = self.open_tree(&name)?; let tree = self.open_tree(&name)?;
if tree.len()? > 0 { if !tree.is_empty()? {
return Err(Error(format!("tree {} already contains data", name).into())); return Err(Error(format!("tree {} already contains data", name).into()));
} }
@ -194,8 +208,12 @@ impl Tree {
self.0.get(self.1, key.as_ref()) self.0.get(self.1, key.as_ref())
} }
#[inline] #[inline]
pub fn len(&self) -> Result<usize> { pub fn approximate_len(&self) -> Result<usize> {
self.0.len(self.1) self.0.approximate_len(self.1)
}
#[inline]
pub fn is_empty(&self) -> Result<bool> {
self.0.is_empty(self.1)
} }
#[inline] #[inline]
@ -333,7 +351,8 @@ pub(crate) trait IDb: Send + Sync {
fn snapshot(&self, path: &PathBuf) -> Result<()>; fn snapshot(&self, path: &PathBuf) -> Result<()>;
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>; fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
fn len(&self, tree: usize) -> Result<usize>; fn approximate_len(&self, tree: usize) -> Result<usize>;
fn is_empty(&self, tree: usize) -> Result<bool>;
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()>; fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()>;
fn remove(&self, tree: usize, key: &[u8]) -> Result<()>; fn remove(&self, tree: usize, key: &[u8]) -> Result<()>;

View file

@ -1,8 +1,8 @@
use core::ops::Bound; use core::ops::Bound;
use core::ptr::NonNull;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::marker::PhantomPinned;
use std::path::PathBuf; use std::path::PathBuf;
use std::pin::Pin; use std::pin::Pin;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
@ -11,12 +11,55 @@ use heed::types::ByteSlice;
use heed::{BytesDecode, Env, RoTxn, RwTxn, UntypedDatabase as Database}; use heed::{BytesDecode, Env, RoTxn, RwTxn, UntypedDatabase as Database};
use crate::{ use crate::{
open::{Engine, OpenOpt},
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
TxResult, TxValueIter, Value, ValueIter, TxResult, TxValueIter, Value, ValueIter,
}; };
pub use heed; pub use heed;
// ---- top-level open function
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
info!("Opening LMDB database at: {}", path.display());
if let Err(e) = std::fs::create_dir_all(&path) {
return Err(Error(
format!("Unable to create LMDB data directory: {}", e).into(),
));
}
let map_size = match opt.lmdb_map_size {
None => recommended_map_size(),
Some(v) => v - (v % 4096),
};
let mut env_builder = heed::EnvOpenOptions::new();
env_builder.max_dbs(100);
env_builder.map_size(map_size);
env_builder.max_readers(2048);
unsafe {
env_builder.flag(heed::flags::Flags::MdbNoRdAhead);
env_builder.flag(heed::flags::Flags::MdbNoMetaSync);
if !opt.fsync {
env_builder.flag(heed::flags::Flags::MdbNoSync);
}
}
match env_builder.open(&path) {
Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => {
return Err(Error(
"OutOfMemory error while trying to open LMDB database. This can happen \
if your operating system is not allowing you to use sufficient virtual \
memory address space. Please check that no limit is set (ulimit -v). \
You may also try to set a smaller `lmdb_map_size` configuration parameter. \
On 32-bit machines, you should probably switch to another database engine."
.into(),
))
}
Err(e) => Err(Error(format!("Cannot open LMDB database: {}", e).into())),
Ok(db) => Ok(LmdbDb::init(db)),
}
}
// -- err // -- err
impl From<heed::Error> for Error { impl From<heed::Error> for Error {
@ -104,12 +147,11 @@ impl IDb for LmdbDb {
Ok(ret2) Ok(ret2)
} }
fn snapshot(&self, to: &PathBuf) -> Result<()> { fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
std::fs::create_dir_all(to)?; std::fs::create_dir_all(base_path)?;
let mut path = to.clone(); let path = Engine::Lmdb.db_path(base_path);
path.push("data.mdb");
self.db self.db
.copy_to_path(path, heed::CompactionOption::Disabled)?; .copy_to_path(path, heed::CompactionOption::Enabled)?;
Ok(()) Ok(())
} }
@ -126,11 +168,16 @@ impl IDb for LmdbDb {
} }
} }
fn len(&self, tree: usize) -> Result<usize> { fn approximate_len(&self, tree: usize) -> Result<usize> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?; let tx = self.db.read_txn()?;
Ok(tree.len(&tx)?.try_into().unwrap()) Ok(tree.len(&tx)?.try_into().unwrap())
} }
fn is_empty(&self, tree: usize) -> Result<bool> {
let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?;
Ok(tree.is_empty(&tx)?)
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> { fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
@ -159,13 +206,15 @@ impl IDb for LmdbDb {
fn iter(&self, tree: usize) -> Result<ValueIter<'_>> { fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?; let tx = self.db.read_txn()?;
TxAndIterator::make(tx, |tx| Ok(tree.iter(tx)?)) // Safety: the cloture does not store its argument anywhere,
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.iter(tx)?)) }
} }
fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> { fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?; let tx = self.db.read_txn()?;
TxAndIterator::make(tx, |tx| Ok(tree.rev_iter(tx)?)) // Safety: the cloture does not store its argument anywhere,
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.rev_iter(tx)?)) }
} }
fn range<'r>( fn range<'r>(
@ -176,7 +225,8 @@ impl IDb for LmdbDb {
) -> Result<ValueIter<'_>> { ) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?; let tx = self.db.read_txn()?;
TxAndIterator::make(tx, |tx| Ok(tree.range(tx, &(low, high))?)) // Safety: the cloture does not store its argument anywhere,
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.range(tx, &(low, high))?)) }
} }
fn range_rev<'r>( fn range_rev<'r>(
&self, &self,
@ -186,7 +236,8 @@ impl IDb for LmdbDb {
) -> Result<ValueIter<'_>> { ) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let tx = self.db.read_txn()?; let tx = self.db.read_txn()?;
TxAndIterator::make(tx, |tx| Ok(tree.rev_range(tx, &(low, high))?)) // Safety: the cloture does not store its argument anywhere,
unsafe { TxAndIterator::make(tx, |tx| Ok(tree.rev_range(tx, &(low, high))?)) }
} }
// ---- // ----
@ -316,28 +367,41 @@ where
{ {
tx: RoTxn<'a>, tx: RoTxn<'a>,
iter: Option<I>, iter: Option<I>,
_pin: PhantomPinned,
} }
impl<'a, I> TxAndIterator<'a, I> impl<'a, I> TxAndIterator<'a, I>
where where
I: Iterator<Item = IteratorItem<'a>> + 'a, I: Iterator<Item = IteratorItem<'a>> + 'a,
{ {
fn make<F>(tx: RoTxn<'a>, iterfun: F) -> Result<ValueIter<'a>> fn iter(self: Pin<&mut Self>) -> &mut Option<I> {
// Safety: iter is not structural
unsafe { &mut self.get_unchecked_mut().iter }
}
/// Safety: iterfun must not store its argument anywhere but in its result.
unsafe fn make<F>(tx: RoTxn<'a>, iterfun: F) -> Result<ValueIter<'a>>
where where
F: FnOnce(&'a RoTxn<'a>) -> Result<I>, F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
{ {
let res = TxAndIterator { tx, iter: None }; let res = TxAndIterator {
tx,
iter: None,
_pin: PhantomPinned,
};
let mut boxed = Box::pin(res); let mut boxed = Box::pin(res);
// This unsafe allows us to bypass lifetime checks let tx_lifetime_overextended: &'a RoTxn<'a> = {
let tx = unsafe { NonNull::from(&boxed.tx).as_ref() }; let tx = &boxed.tx;
let iter = iterfun(tx)?; // Safety: Artificially extending the lifetime because
// this reference will only be stored and accessed from the
// returned ValueIter which guarantees that it is destroyed
// before the tx it is pointing to.
unsafe { &*&raw const *tx }
};
let iter = iterfun(&tx_lifetime_overextended)?;
let mut_ref = Pin::as_mut(&mut boxed); *boxed.as_mut().iter() = Some(iter);
// This unsafe allows us to write in a field of the pinned struct
unsafe {
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
}
Ok(Box::new(TxAndIteratorPin(boxed))) Ok(Box::new(TxAndIteratorPin(boxed)))
} }
@ -348,8 +412,10 @@ where
I: Iterator<Item = IteratorItem<'a>> + 'a, I: Iterator<Item = IteratorItem<'a>> + 'a,
{ {
fn drop(&mut self) { fn drop(&mut self) {
// ensure the iterator is dropped before the RoTxn it references // Safety: `new_unchecked` is okay because we know this value is never
drop(self.iter.take()); // used again after being dropped.
let this = unsafe { Pin::new_unchecked(self) };
drop(this.iter().take());
} }
} }
@ -365,13 +431,12 @@ where
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let mut_ref = Pin::as_mut(&mut self.0); let mut_ref = Pin::as_mut(&mut self.0);
// This unsafe allows us to mutably access the iterator field let next = mut_ref.iter().as_mut()?.next()?;
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; let res = match next {
match next { Err(e) => Err(e.into()),
None => None, Ok((k, v)) => Ok((k.to_vec(), v.to_vec())),
Some(Err(e)) => Some(Err(e.into())), };
Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))), Some(res)
}
} }
} }

View file

@ -11,6 +11,7 @@ use crate::{Db, Error, Result};
pub enum Engine { pub enum Engine {
Lmdb, Lmdb,
Sqlite, Sqlite,
Fjall,
} }
impl Engine { impl Engine {
@ -19,8 +20,26 @@ impl Engine {
match self { match self {
Self::Lmdb => "lmdb", Self::Lmdb => "lmdb",
Self::Sqlite => "sqlite", Self::Sqlite => "sqlite",
Self::Fjall => "fjall",
} }
} }
/// Return engine-specific DB path from base path
pub fn db_path(&self, base_path: &PathBuf) -> PathBuf {
let mut ret = base_path.clone();
match self {
Self::Lmdb => {
ret.push("db.lmdb");
}
Self::Sqlite => {
ret.push("db.sqlite");
}
Self::Fjall => {
ret.push("db.fjall");
}
}
ret
}
} }
impl std::fmt::Display for Engine { impl std::fmt::Display for Engine {
@ -36,10 +55,11 @@ impl std::str::FromStr for Engine {
match text { match text {
"lmdb" | "heed" => Ok(Self::Lmdb), "lmdb" | "heed" => Ok(Self::Lmdb),
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite), "sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
"fjall" => Ok(Self::Fjall),
"sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.4).".into())), "sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.4).".into())),
kind => Err(Error( kind => Err(Error(
format!( format!(
"Invalid DB engine: {} (options are: lmdb, sqlite)", "Invalid DB engine: {} (options are: lmdb, sqlite, fjall)",
kind kind
) )
.into(), .into(),
@ -51,6 +71,7 @@ impl std::str::FromStr for Engine {
pub struct OpenOpt { pub struct OpenOpt {
pub fsync: bool, pub fsync: bool,
pub lmdb_map_size: Option<usize>, pub lmdb_map_size: Option<usize>,
pub fjall_block_cache_size: Option<usize>,
} }
impl Default for OpenOpt { impl Default for OpenOpt {
@ -58,6 +79,7 @@ impl Default for OpenOpt {
Self { Self {
fsync: false, fsync: false,
lmdb_map_size: None, lmdb_map_size: None,
fjall_block_cache_size: None,
} }
} }
} }
@ -66,53 +88,15 @@ pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> {
match engine { match engine {
// ---- Sqlite DB ---- // ---- Sqlite DB ----
#[cfg(feature = "sqlite")] #[cfg(feature = "sqlite")]
Engine::Sqlite => { Engine::Sqlite => crate::sqlite_adapter::open_db(path, opt),
info!("Opening Sqlite database at: {}", path.display());
let manager = r2d2_sqlite::SqliteConnectionManager::file(path);
Ok(crate::sqlite_adapter::SqliteDb::new(manager, opt.fsync)?)
}
// ---- LMDB DB ---- // ---- LMDB DB ----
#[cfg(feature = "lmdb")] #[cfg(feature = "lmdb")]
Engine::Lmdb => { Engine::Lmdb => crate::lmdb_adapter::open_db(path, opt),
info!("Opening LMDB database at: {}", path.display());
if let Err(e) = std::fs::create_dir_all(&path) {
return Err(Error(
format!("Unable to create LMDB data directory: {}", e).into(),
));
}
let map_size = match opt.lmdb_map_size { // ---- Fjall DB ----
None => crate::lmdb_adapter::recommended_map_size(), #[cfg(feature = "fjall")]
Some(v) => v - (v % 4096), Engine::Fjall => crate::fjall_adapter::open_db(path, opt),
};
let mut env_builder = heed::EnvOpenOptions::new();
env_builder.max_dbs(100);
env_builder.map_size(map_size);
env_builder.max_readers(2048);
unsafe {
env_builder.flag(crate::lmdb_adapter::heed::flags::Flags::MdbNoRdAhead);
env_builder.flag(crate::lmdb_adapter::heed::flags::Flags::MdbNoMetaSync);
if !opt.fsync {
env_builder.flag(heed::flags::Flags::MdbNoSync);
}
}
match env_builder.open(&path) {
Err(heed::Error::Io(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => {
return Err(Error(
"OutOfMemory error while trying to open LMDB database. This can happen \
if your operating system is not allowing you to use sufficient virtual \
memory address space. Please check that no limit is set (ulimit -v). \
You may also try to set a smaller `lmdb_map_size` configuration parameter. \
On 32-bit machines, you should probably switch to another database engine."
.into(),
))
}
Err(e) => Err(Error(format!("Cannot open LMDB database: {}", e).into())),
Ok(db) => Ok(crate::lmdb_adapter::LmdbDb::init(db)),
}
}
// Pattern is unreachable when all supported DB engines are compiled into binary. The allow // Pattern is unreachable when all supported DB engines are compiled into binary. The allow
// attribute is added so that we won't have to change this match in case stop building // attribute is added so that we won't have to change this match in case stop building

View file

@ -11,12 +11,23 @@ use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::{params, Rows, Statement, Transaction}; use rusqlite::{params, Rows, Statement, Transaction};
use crate::{ use crate::{
open::{Engine, OpenOpt},
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
TxResult, TxValueIter, Value, ValueIter, TxResult, TxValueIter, Value, ValueIter,
}; };
pub use rusqlite; pub use rusqlite;
// ---- top-level open function
pub(crate) fn open_db(path: &PathBuf, opt: &OpenOpt) -> Result<Db> {
info!("Opening Sqlite database at: {}", path.display());
let manager = r2d2_sqlite::SqliteConnectionManager::file(path);
Ok(SqliteDb::new(manager, opt.fsync)?)
}
// ----
type Connection = r2d2::PooledConnection<SqliteConnectionManager>; type Connection = r2d2::PooledConnection<SqliteConnectionManager>;
// --- err // --- err
@ -139,17 +150,18 @@ impl IDb for SqliteDb {
Ok(trees) Ok(trees)
} }
fn snapshot(&self, to: &PathBuf) -> Result<()> { fn snapshot(&self, base_path: &PathBuf) -> Result<()> {
fn progress(p: rusqlite::backup::Progress) { std::fs::create_dir_all(base_path)?;
let percent = (p.pagecount - p.remaining) * 100 / p.pagecount; let path = Engine::Sqlite
info!("Sqlite snapshot progress: {}%", percent); .db_path(&base_path)
} .into_os_string()
std::fs::create_dir_all(to)?; .into_string()
let mut path = to.clone(); .map_err(|_| Error("invalid sqlite path string".into()))?;
path.push("db.sqlite");
self.db info!("Start sqlite VACUUM INTO `{}`", path);
.get()? self.db.get()?.execute("VACUUM INTO ?1", params![path])?;
.backup(rusqlite::DatabaseName::Main, path, Some(progress))?; info!("Finished sqlite VACUUM INTO `{}`", path);
Ok(()) Ok(())
} }
@ -160,7 +172,7 @@ impl IDb for SqliteDb {
self.internal_get(&self.db.get()?, &tree, key) self.internal_get(&self.db.get()?, &tree, key)
} }
fn len(&self, tree: usize) -> Result<usize> { fn approximate_len(&self, tree: usize) -> Result<usize> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let db = self.db.get()?; let db = self.db.get()?;
@ -172,6 +184,10 @@ impl IDb for SqliteDb {
} }
} }
fn is_empty(&self, tree: usize) -> Result<bool> {
Ok(self.approximate_len(tree)? == 0)
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> { fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<()> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let db = self.db.get()?; let db = self.db.get()?;

View file

@ -1,7 +1,7 @@
use crate::*; use crate::*;
fn test_suite(db: Db) { fn test_suite(db: Db) {
let tree = db.open_tree("tree").unwrap(); let tree = db.open_tree("tree:this_is_a_tree").unwrap();
let ka: &[u8] = &b"test"[..]; let ka: &[u8] = &b"test"[..];
let kb: &[u8] = &b"zwello"[..]; let kb: &[u8] = &b"zwello"[..];
@ -14,7 +14,7 @@ fn test_suite(db: Db) {
assert!(tree.insert(ka, va).is_ok()); assert!(tree.insert(ka, va).is_ok());
assert_eq!(tree.get(ka).unwrap().unwrap(), va); assert_eq!(tree.get(ka).unwrap().unwrap(), va);
assert_eq!(tree.len().unwrap(), 1); assert_eq!(tree.iter().unwrap().count(), 1);
// ---- test transaction logic ---- // ---- test transaction logic ----
@ -148,3 +148,15 @@ fn test_sqlite_db() {
let db = SqliteDb::new(manager, false).unwrap(); let db = SqliteDb::new(manager, false).unwrap();
test_suite(db); test_suite(db);
} }
#[test]
#[cfg(feature = "fjall")]
fn test_fjall_db() {
use crate::fjall_adapter::{fjall, FjallDb};
let path = mktemp::Temp::new_dir().unwrap();
let config = fjall::Config::new(path).temporary(true);
let keyspace = config.open_transactional().unwrap();
let db = FjallDb::init(keyspace);
test_suite(db);
}

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage" name = "garage"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -57,6 +57,7 @@ opentelemetry.workspace = true
opentelemetry-prometheus = { workspace = true, optional = true } opentelemetry-prometheus = { workspace = true, optional = true }
opentelemetry-otlp = { workspace = true, optional = true } opentelemetry-otlp = { workspace = true, optional = true }
syslog-tracing = { workspace = true, optional = true } syslog-tracing = { workspace = true, optional = true }
tracing-journald = { workspace = true, optional = true }
[dev-dependencies] [dev-dependencies]
garage_api_common.workspace = true garage_api_common.workspace = true
@ -90,6 +91,7 @@ k2v = [ "garage_util/k2v", "garage_api_k2v" ]
# Database engines # Database engines
lmdb = [ "garage_model/lmdb" ] lmdb = [ "garage_model/lmdb" ]
sqlite = [ "garage_model/sqlite" ] sqlite = [ "garage_model/sqlite" ]
fjall = [ "garage_model/fjall" ]
# Automatic registration and discovery via Consul API # Automatic registration and discovery via Consul API
consul-discovery = [ "garage_rpc/consul-discovery" ] consul-discovery = [ "garage_rpc/consul-discovery" ]
@ -101,6 +103,8 @@ metrics = [ "garage_api_admin/metrics", "opentelemetry-prometheus" ]
telemetry-otlp = [ "opentelemetry-otlp" ] telemetry-otlp = [ "opentelemetry-otlp" ]
# Logging to syslog # Logging to syslog
syslog = [ "syslog-tracing" ] syslog = [ "syslog-tracing" ]
# Logging to journald
journald = [ "tracing-journald" ]
# NOTE: bundled-libs and system-libs should be treat as mutually exclusive; # NOTE: bundled-libs and system-libs should be treat as mutually exclusive;
# exactly one of them should be enabled. # exactly one of them should be enabled.

View file

@ -101,6 +101,7 @@ impl AdminRpcHandler {
let mut obj_dels = 0; let mut obj_dels = 0;
let mut mpu_dels = 0; let mut mpu_dels = 0;
let mut ver_dels = 0; let mut ver_dels = 0;
let mut br_dels = 0;
for hash in blocks { for hash in blocks {
let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?; let hash = hex::decode(hash).ok_or_bad_request("invalid hash")?;
@ -131,12 +132,19 @@ impl AdminRpcHandler {
ver_dels += 1; ver_dels += 1;
} }
} }
if !br.deleted.get() {
let mut br = br;
br.deleted.set();
self.garage.block_ref_table.insert(&br).await?;
br_dels += 1;
}
} }
} }
Ok(AdminRpc::Ok(format!( Ok(AdminRpc::Ok(format!(
"Purged {} blocks, {} versions, {} objects, {} multipart uploads", "Purged {} blocks: marked {} block refs, {} versions, {} objects and {} multipart uploads as deleted",
blocks.len(), blocks.len(),
br_dels,
ver_dels, ver_dels,
obj_dels, obj_dels,
mpu_dels, mpu_dels,

View file

@ -126,7 +126,7 @@ impl AdminRpcHandler {
#[allow(clippy::ptr_arg)] #[allow(clippy::ptr_arg)]
async fn handle_create_bucket(&self, name: &String) -> Result<AdminRpc, Error> { async fn handle_create_bucket(&self, name: &String) -> Result<AdminRpc, Error> {
if !is_valid_bucket_name(name) { if !is_valid_bucket_name(name, self.garage.config.allow_punycode) {
return Err(Error::BadRequest(format!( return Err(Error::BadRequest(format!(
"{}: {}", "{}: {}",
name, INVALID_BUCKET_NAME_MESSAGE name, INVALID_BUCKET_NAME_MESSAGE

View file

@ -219,7 +219,7 @@ impl AdminRpcHandler {
// Gather block manager statistics // Gather block manager statistics
writeln!(&mut ret, "\nBlock manager stats:").unwrap(); writeln!(&mut ret, "\nBlock manager stats:").unwrap();
let rc_len = self.garage.block_manager.rc_len()?.to_string(); let rc_len = self.garage.block_manager.rc_approximate_len()?.to_string();
writeln!( writeln!(
&mut ret, &mut ret,
@ -230,13 +230,13 @@ impl AdminRpcHandler {
writeln!( writeln!(
&mut ret, &mut ret,
" resync queue length: {}", " resync queue length: {}",
self.garage.block_manager.resync.queue_len()? self.garage.block_manager.resync.queue_approximate_len()?
) )
.unwrap(); .unwrap();
writeln!( writeln!(
&mut ret, &mut ret,
" blocks with resync errors: {}", " blocks with resync errors: {}",
self.garage.block_manager.resync.errors_len()? self.garage.block_manager.resync.errors_approximate_len()?
) )
.unwrap(); .unwrap();
@ -346,16 +346,21 @@ impl AdminRpcHandler {
F: TableSchema + 'static, F: TableSchema + 'static,
R: TableReplication + 'static, R: TableReplication + 'static,
{ {
let data_len = t.data.store.len().map_err(GarageError::from)?.to_string(); let data_len = t
let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string(); .data
.store
.approximate_len()
.map_err(GarageError::from)?
.to_string();
let mkl_len = t.merkle_updater.merkle_tree_approximate_len()?.to_string();
Ok(format!( Ok(format!(
" {}\t{}\t{}\t{}\t{}", " {}\t{}\t{}\t{}\t{}",
F::TABLE_NAME, F::TABLE_NAME,
data_len, data_len,
mkl_len, mkl_len,
t.merkle_updater.todo_len()?, t.merkle_updater.todo_approximate_len()?,
t.data.gc_todo_len()? t.data.gc_todo_approximate_len()?
)) ))
} }

View file

@ -466,6 +466,10 @@ pub enum RepairWhat {
/// Repair (resync/rebalance) the set of stored blocks in the cluster /// Repair (resync/rebalance) the set of stored blocks in the cluster
#[structopt(name = "blocks", version = garage_version())] #[structopt(name = "blocks", version = garage_version())]
Blocks, Blocks,
/// Clear the block resync queue. The list of blocks in errored state
/// is cleared as well. You MUST run `garage repair blocks` after invoking this.
#[structopt(name = "clear-resync-queue", version = garage_version())]
ClearResyncQueue,
/// Repropagate object deletions to the version table /// Repropagate object deletions to the version table
#[structopt(name = "versions", version = garage_version())] #[structopt(name = "versions", version = garage_version())]
Versions, Versions,
@ -478,6 +482,9 @@ pub enum RepairWhat {
/// Recalculate block reference counters /// Recalculate block reference counters
#[structopt(name = "block-rc", version = garage_version())] #[structopt(name = "block-rc", version = garage_version())]
BlockRc, BlockRc,
/// Fix inconsistency in bucket aliases (WARNING: EXPERIMENTAL)
#[structopt(name = "aliases", version = garage_version())]
Aliases,
/// Verify integrity of all blocks on disc /// Verify integrity of all blocks on disc
#[structopt(name = "scrub", version = garage_version())] #[structopt(name = "scrub", version = garage_version())]
Scrub { Scrub {

View file

@ -208,6 +208,43 @@ fn init_logging(opt: &Opt) {
} }
} }
if std::env::var("GARAGE_LOG_TO_JOURNALD")
.map(|x| x == "1" || x == "true")
.unwrap_or(false)
{
#[cfg(feature = "journald")]
{
use tracing_journald::{Priority, PriorityMappings};
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
let registry = tracing_subscriber::registry()
.with(tracing_subscriber::fmt::layer().with_writer(std::io::sink))
.with(env_filter);
match tracing_journald::layer() {
Ok(layer) => {
registry
.with(layer.with_priority_mappings(PriorityMappings {
info: Priority::Informational,
debug: Priority::Debug,
..PriorityMappings::new()
}))
.init();
}
Err(e) => {
eprintln!("Couldn't connect to journald: {}.", e);
std::process::exit(1);
}
}
return;
}
#[cfg(not(feature = "journald"))]
{
eprintln!("Journald support is not enabled in this build.");
std::process::exit(1);
}
}
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_writer(std::io::stderr) .with_writer(std::io::stderr)
.with_env_filter(env_filter) .with_env_filter(env_filter)

View file

@ -88,6 +88,15 @@ pub async fn launch_online_repair(
garage.block_manager.clone(), garage.block_manager.clone(),
)); ));
} }
RepairWhat::Aliases => {
info!("Repairing bucket aliases (foreground)");
garage.locked_helper().await.repair_aliases().await?;
}
RepairWhat::ClearResyncQueue => {
let garage = garage.clone();
tokio::task::spawn_blocking(move || garage.block_manager.resync.clear_resync_queue())
.await??
}
} }
Ok(()) Ok(())
} }

View file

@ -183,10 +183,21 @@ fn watch_shutdown_signal() -> watch::Receiver<bool> {
let mut sigterm = let mut sigterm =
signal(SignalKind::terminate()).expect("Failed to install SIGTERM handler"); signal(SignalKind::terminate()).expect("Failed to install SIGTERM handler");
let mut sighup = signal(SignalKind::hangup()).expect("Failed to install SIGHUP handler"); let mut sighup = signal(SignalKind::hangup()).expect("Failed to install SIGHUP handler");
loop {
tokio::select! { tokio::select! {
_ = sigint.recv() => info!("Received SIGINT, shutting down."), _ = sigint.recv() => {
_ = sigterm.recv() => info!("Received SIGTERM, shutting down."), info!("Received SIGINT, shutting down.");
_ = sighup.recv() => info!("Received SIGHUP, shutting down."), break
}
_ = sigterm.recv() => {
info!("Received SIGTERM, shutting down.");
break
}
_ = sighup.recv() => {
info!("Received SIGHUP, reload not supported.");
continue
}
}
} }
send_cancel.send(true).unwrap(); send_cancel.send(true).unwrap();
}); });

View file

@ -63,6 +63,8 @@ rpc_bind_addr = "127.0.0.1:{rpc_port}"
rpc_public_addr = "127.0.0.1:{rpc_port}" rpc_public_addr = "127.0.0.1:{rpc_port}"
rpc_secret = "{secret}" rpc_secret = "{secret}"
allow_punycode = true
[s3_api] [s3_api]
s3_region = "{region}" s3_region = "{region}"
api_bind_addr = "127.0.0.1:{s3_port}" api_bind_addr = "127.0.0.1:{s3_port}"

View file

@ -198,6 +198,7 @@ async fn test_precondition() {
); );
} }
let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0); let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0);
let same_date = DateTime::from_secs_f64(last_modified.as_secs_f64());
let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0); let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0);
{ {
let err = ctx let err = ctx
@ -212,6 +213,18 @@ async fn test_precondition() {
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304) matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
); );
let err = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_modified_since(same_date)
.send()
.await;
assert!(
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
);
let o = ctx let o = ctx
.client .client
.get_object() .get_object()
@ -236,6 +249,17 @@ async fn test_precondition() {
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412) matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412)
); );
let o = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_unmodified_since(same_date)
.send()
.await
.unwrap();
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
let o = ctx let o = ctx
.client .client
.get_object() .get_object()

View file

@ -533,3 +533,118 @@ async fn test_website_check_domain() {
}) })
); );
} }
#[tokio::test]
async fn test_website_puny() {
const BCKT_NAME: &str = "xn--pda.eu";
let ctx = common::context();
let bucket = ctx.create_bucket(BCKT_NAME);
let data = ByteStream::from_static(BODY);
ctx.client
.put_object()
.bucket(&bucket)
.key("index.html")
.body(data)
.send()
.await
.unwrap();
let client = Client::builder(TokioExecutor::new()).build_http();
let req = |suffix| {
Request::builder()
.method("GET")
.uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port))
.header("Host", format!("{}{}", BCKT_NAME, suffix))
.body(Body::new(Bytes::new()))
.unwrap()
};
ctx.garage
.command()
.args(["bucket", "website", "--allow", BCKT_NAME])
.quiet()
.expect_success_status("Could not allow website on bucket");
let mut resp = client.request(req("")).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
assert_eq!(
resp.into_body().collect().await.unwrap().to_bytes(),
BODY.as_ref()
);
resp = client.request(req(".web.garage")).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
assert_eq!(
resp.into_body().collect().await.unwrap().to_bytes(),
BODY.as_ref()
);
for bname in [
BCKT_NAME.to_string(),
format!("{BCKT_NAME}.web.garage"),
format!("{BCKT_NAME}.s3.garage"),
] {
let admin_req = || {
Request::builder()
.method("GET")
.uri(format!(
"http://127.0.0.1:{0}/check?domain={1}",
ctx.garage.admin_port, bname
))
.body(Body::new(Bytes::new()))
.unwrap()
};
let admin_resp = client.request(admin_req()).await.unwrap();
assert_eq!(admin_resp.status(), StatusCode::OK);
assert_eq!(
admin_resp.into_body().collect().await.unwrap().to_bytes(),
format!("Domain '{bname}' is managed by Garage").as_bytes()
);
}
}
#[tokio::test]
async fn test_website_object_not_found() {
const BCKT_NAME: &str = "not-found";
let ctx = common::context();
let _bucket = ctx.create_bucket(BCKT_NAME);
let client = Client::builder(TokioExecutor::new()).build_http();
let req = |suffix| {
Request::builder()
.method("GET")
.uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port))
.header("Host", format!("{}{}", BCKT_NAME, suffix))
.body(Body::new(Bytes::new()))
.unwrap()
};
ctx.garage
.command()
.args(["bucket", "website", "--allow", BCKT_NAME])
.quiet()
.expect_success_status("Could not allow website on bucket");
let resp = client.request(req("")).await.unwrap();
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
// the error we return by default are *not* xml
assert_eq!(
resp.headers().get(http::header::CONTENT_TYPE).unwrap(),
"text/html; charset=utf-8"
);
let result = String::from_utf8(
resp.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert!(result.contains("not found"));
}

View file

@ -72,6 +72,16 @@ impl K2vClient {
.enable_http2() .enable_http2()
.build(); .build();
let client = HttpClient::builder(TokioExecutor::new()).build(connector); let client = HttpClient::builder(TokioExecutor::new()).build(connector);
Self::new_with_client(config, client)
}
/// Create a new K2V client with an external client.
/// Useful for example if you plan on creating many clients but you want to mutualize the
/// underlying thread pools & co.
pub fn new_with_client(
config: K2vClientConfig,
client: HttpClient<HttpsConnector<HttpConnector>, Body>,
) -> Result<Self, Error> {
let user_agent: std::borrow::Cow<str> = match &config.user_agent { let user_agent: std::borrow::Cow<str> = match &config.user_agent {
Some(ua) => ua.into(), Some(ua) => ua.into(),
None => format!("k2v/{}", env!("CARGO_PKG_VERSION")).into(), None => format!("k2v/{}", env!("CARGO_PKG_VERSION")).into(),

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_model" name = "garage_model"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -24,7 +24,7 @@ garage_net.workspace = true
async-trait.workspace = true async-trait.workspace = true
blake2.workspace = true blake2.workspace = true
chrono.workspace = true chrono.workspace = true
err-derive.workspace = true thiserror.workspace = true
hex.workspace = true hex.workspace = true
http.workspace = true http.workspace = true
base64.workspace = true base64.workspace = true
@ -44,3 +44,4 @@ default = [ "lmdb", "sqlite" ]
k2v = [ "garage_util/k2v" ] k2v = [ "garage_util/k2v" ]
lmdb = [ "garage_db/lmdb" ] lmdb = [ "garage_db/lmdb" ]
sqlite = [ "garage_db/sqlite" ] sqlite = [ "garage_db/sqlite" ]
fjall = [ "garage_db/fjall" ]

View file

@ -22,14 +22,10 @@ mod v08 {
pub use v08::*; pub use v08::*;
impl BucketAlias { impl BucketAlias {
pub fn new(name: String, ts: u64, bucket_id: Option<Uuid>) -> Option<Self> { pub fn new(name: String, ts: u64, bucket_id: Option<Uuid>) -> Self {
if !is_valid_bucket_name(&name) { BucketAlias {
None
} else {
Some(BucketAlias {
name, name,
state: crdt::Lww::raw(ts, bucket_id), state: crdt::Lww::raw(ts, bucket_id),
})
} }
} }
@ -80,7 +76,7 @@ impl TableSchema for BucketAliasTable {
/// In the case of Garage, bucket names must not be hex-encoded /// In the case of Garage, bucket names must not be hex-encoded
/// 32 byte string, which is excluded thanks to the /// 32 byte string, which is excluded thanks to the
/// maximum length of 63 bytes given in the spec. /// maximum length of 63 bytes given in the spec.
pub fn is_valid_bucket_name(n: &str) -> bool { pub fn is_valid_bucket_name(n: &str, puny: bool) -> bool {
// Bucket names must be between 3 and 63 characters // Bucket names must be between 3 and 63 characters
n.len() >= 3 && n.len() <= 63 n.len() >= 3 && n.len() <= 63
// Bucket names must be composed of lowercase letters, numbers, // Bucket names must be composed of lowercase letters, numbers,
@ -92,7 +88,9 @@ pub fn is_valid_bucket_name(n: &str) -> bool {
// Bucket names must not be formatted as an IP address // Bucket names must not be formatted as an IP address
&& n.parse::<std::net::IpAddr>().is_err() && n.parse::<std::net::IpAddr>().is_err()
// Bucket names must not start with "xn--" // Bucket names must not start with "xn--"
&& !n.starts_with("xn--") && (!n.starts_with("xn--") || puny)
// We are a bit stricter, to properly restrict punycode in all labels
&& (!n.contains(".xn--") || puny)
// Bucket names must not end with "-s3alias" // Bucket names must not end with "-s3alias"
&& !n.ends_with("-s3alias") && !n.ends_with("-s3alias")
} }

View file

@ -116,21 +116,17 @@ impl Garage {
info!("Opening database..."); info!("Opening database...");
let db_engine = db::Engine::from_str(&config.db_engine) let db_engine = db::Engine::from_str(&config.db_engine)
.ok_or_message("Invalid `db_engine` value in configuration file")?; .ok_or_message("Invalid `db_engine` value in configuration file")?;
let mut db_path = config.metadata_dir.clone(); let db_path = db_engine.db_path(&config.metadata_dir);
match db_engine {
db::Engine::Sqlite => {
db_path.push("db.sqlite");
}
db::Engine::Lmdb => {
db_path.push("db.lmdb");
}
}
let db_opt = db::OpenOpt { let db_opt = db::OpenOpt {
fsync: config.metadata_fsync, fsync: config.metadata_fsync,
lmdb_map_size: match config.lmdb_map_size { lmdb_map_size: match config.lmdb_map_size {
v if v == usize::default() => None, v if v == usize::default() => None,
v => Some(v), v => Some(v),
}, },
fjall_block_cache_size: match config.fjall_block_cache_size {
v if v == usize::default() => None,
v => Some(v),
},
}; };
let db = db::open_db(&db_path, db_engine, &db_opt) let db = db::open_db(&db_path, db_engine, &db_opt)
.ok_or_message("Unable to open metadata db")?; .ok_or_message("Unable to open metadata db")?;
@ -319,15 +315,15 @@ impl Garage {
Ok(()) Ok(())
} }
pub fn bucket_helper(&self) -> helper::bucket::BucketHelper { pub fn bucket_helper(&self) -> helper::bucket::BucketHelper<'_> {
helper::bucket::BucketHelper(self) helper::bucket::BucketHelper(self)
} }
pub fn key_helper(&self) -> helper::key::KeyHelper { pub fn key_helper(&self) -> helper::key::KeyHelper<'_> {
helper::key::KeyHelper(self) helper::key::KeyHelper(self)
} }
pub async fn locked_helper(&self) -> helper::locked::LockedHelper { pub async fn locked_helper(&self) -> helper::locked::LockedHelper<'_> {
let lock = self.bucket_lock.lock().await; let lock = self.bucket_lock.lock().await;
helper::locked::LockedHelper(self, Some(lock)) helper::locked::LockedHelper(self, Some(lock))
} }

View file

@ -1,24 +1,24 @@
use err_derive::Error;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error;
use garage_util::error::Error as GarageError; use garage_util::error::Error as GarageError;
#[derive(Debug, Error, Serialize, Deserialize)] #[derive(Debug, Error, Serialize, Deserialize)]
pub enum Error { pub enum Error {
#[error(display = "Internal error: {}", _0)] #[error("Internal error: {0}")]
Internal(#[error(source)] GarageError), Internal(#[from] GarageError),
#[error(display = "Bad request: {}", _0)] #[error("Bad request: {0}")]
BadRequest(String), BadRequest(String),
/// Bucket name is not valid according to AWS S3 specs /// Bucket name is not valid according to AWS S3 specs
#[error(display = "Invalid bucket name: {}", _0)] #[error("Invalid bucket name: {0}")]
InvalidBucketName(String), InvalidBucketName(String),
#[error(display = "Access key not found: {}", _0)] #[error("Access key not found: {0}")]
NoSuchAccessKey(String), NoSuchAccessKey(String),
#[error(display = "Bucket not found: {}", _0)] #[error("Bucket not found: {0}")]
NoSuchBucket(String), NoSuchBucket(String),
} }

View file

@ -1,3 +1,7 @@
use std::collections::{HashMap, HashSet};
use garage_db as db;
use garage_util::crdt::*; use garage_util::crdt::*;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::{Error as GarageError, OkOrMessage}; use garage_util::error::{Error as GarageError, OkOrMessage};
@ -47,6 +51,10 @@ impl<'a> LockedHelper<'a> {
KeyHelper(self.0) KeyHelper(self.0)
} }
// ================================================
// global bucket aliases
// ================================================
/// Sets a new alias for a bucket in global namespace. /// Sets a new alias for a bucket in global namespace.
/// This function fails if: /// This function fails if:
/// - alias name is not valid according to S3 spec /// - alias name is not valid according to S3 spec
@ -57,7 +65,7 @@ impl<'a> LockedHelper<'a> {
bucket_id: Uuid, bucket_id: Uuid,
alias_name: &String, alias_name: &String,
) -> Result<(), Error> { ) -> Result<(), Error> {
if !is_valid_bucket_name(alias_name) { if !is_valid_bucket_name(alias_name, self.0.config.allow_punycode) {
return Err(Error::InvalidBucketName(alias_name.to_string())); return Err(Error::InvalidBucketName(alias_name.to_string()));
} }
@ -88,8 +96,7 @@ impl<'a> LockedHelper<'a> {
// writes are now done and all writes use timestamp alias_ts // writes are now done and all writes use timestamp alias_ts
let alias = match alias { let alias = match alias {
None => BucketAlias::new(alias_name.clone(), alias_ts, Some(bucket_id)) None => BucketAlias::new(alias_name.clone(), alias_ts, Some(bucket_id)),
.ok_or_else(|| Error::InvalidBucketName(alias_name.clone()))?,
Some(mut a) => { Some(mut a) => {
a.state = Lww::raw(alias_ts, Some(bucket_id)); a.state = Lww::raw(alias_ts, Some(bucket_id));
a a
@ -180,13 +187,14 @@ impl<'a> LockedHelper<'a> {
.ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?; .ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?;
// Checks ok, remove alias // Checks ok, remove alias
let alias_ts = match bucket.state.as_option() { let alias_ts = increment_logical_clock_2(
Some(bucket_state) => increment_logical_clock_2(
alias.state.timestamp(), alias.state.timestamp(),
bucket_state.aliases.get_timestamp(alias_name), bucket
), .state
None => increment_logical_clock(alias.state.timestamp()), .as_option()
}; .map(|p| p.aliases.get_timestamp(alias_name))
.unwrap_or(0),
);
// ---- timestamp-ensured causality barrier ---- // ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts // writes are now done and all writes use timestamp alias_ts
@ -204,6 +212,10 @@ impl<'a> LockedHelper<'a> {
Ok(()) Ok(())
} }
// ================================================
// local bucket aliases
// ================================================
/// Sets a new alias for a bucket in the local namespace of a key. /// Sets a new alias for a bucket in the local namespace of a key.
/// This function fails if: /// This function fails if:
/// - alias name is not valid according to S3 spec /// - alias name is not valid according to S3 spec
@ -216,14 +228,12 @@ impl<'a> LockedHelper<'a> {
key_id: &String, key_id: &String,
alias_name: &String, alias_name: &String,
) -> Result<(), Error> { ) -> Result<(), Error> {
let key_helper = KeyHelper(self.0); if !is_valid_bucket_name(alias_name, self.0.config.allow_punycode) {
if !is_valid_bucket_name(alias_name) {
return Err(Error::InvalidBucketName(alias_name.to_string())); return Err(Error::InvalidBucketName(alias_name.to_string()));
} }
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?; let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
let mut key = key_helper.get_existing_key(key_id).await?; let mut key = self.key().get_existing_key(key_id).await?;
let key_param = key.state.as_option_mut().unwrap(); let key_param = key.state.as_option_mut().unwrap();
@ -272,23 +282,13 @@ impl<'a> LockedHelper<'a> {
key_id: &String, key_id: &String,
alias_name: &String, alias_name: &String,
) -> Result<(), Error> { ) -> Result<(), Error> {
let key_helper = KeyHelper(self.0);
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?; let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
let mut key = key_helper.get_existing_key(key_id).await?; let mut key = self.key().get_existing_key(key_id).await?;
let key_p = key.state.as_option().unwrap();
let bucket_p = bucket.state.as_option_mut().unwrap(); let bucket_p = bucket.state.as_option_mut().unwrap();
if key if key_p.local_aliases.get(alias_name).cloned().flatten() != Some(bucket_id) {
.state
.as_option()
.unwrap()
.local_aliases
.get(alias_name)
.cloned()
.flatten()
!= Some(bucket_id)
{
return Err(GarageError::Message(format!( return Err(GarageError::Message(format!(
"Bucket {:?} does not have alias {} in namespace of key {}", "Bucket {:?} does not have alias {} in namespace of key {}",
bucket_id, alias_name, key_id bucket_id, alias_name, key_id
@ -305,17 +305,17 @@ impl<'a> LockedHelper<'a> {
.local_aliases .local_aliases
.items() .items()
.iter() .iter()
.any(|((k, n), _, active)| *k == key.key_id && n == alias_name && *active); .any(|((k, n), _, active)| (*k != key.key_id || n != alias_name) && *active);
if !has_other_global_aliases && !has_other_local_aliases { if !has_other_global_aliases && !has_other_local_aliases {
return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name))); return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name)));
} }
// Checks ok, remove alias // Checks ok, remove alias
let key_param = key.state.as_option_mut().unwrap();
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone()); let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
let alias_ts = increment_logical_clock_2( let alias_ts = increment_logical_clock_2(
key_param.local_aliases.get_timestamp(alias_name), key_p.local_aliases.get_timestamp(alias_name),
bucket_p bucket_p
.local_aliases .local_aliases
.get_timestamp(&bucket_p_local_alias_key), .get_timestamp(&bucket_p_local_alias_key),
@ -324,7 +324,8 @@ impl<'a> LockedHelper<'a> {
// ---- timestamp-ensured causality barrier ---- // ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts // writes are now done and all writes use timestamp alias_ts
key_param.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None); key.state.as_option_mut().unwrap().local_aliases =
LwwMap::raw_item(alias_name.clone(), alias_ts, None);
self.0.key_table.insert(&key).await?; self.0.key_table.insert(&key).await?;
bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false); bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
@ -333,21 +334,68 @@ impl<'a> LockedHelper<'a> {
Ok(()) Ok(())
} }
/// Ensures a bucket does not have a certain local alias.
/// Contrarily to unset_local_bucket_alias, this does not
/// fail on any condition other than:
/// - bucket cannot be found (its fine if it is in deleted state)
/// - key cannot be found (its fine if alias in key points to nothing
/// or to another bucket)
pub async fn purge_local_bucket_alias(
&self,
bucket_id: Uuid,
key_id: &String,
alias_name: &String,
) -> Result<(), Error> {
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
let mut key = self.key().get_internal_key(key_id).await?;
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
let alias_ts = increment_logical_clock_2(
key.state
.as_option()
.map(|p| p.local_aliases.get_timestamp(alias_name))
.unwrap_or(0),
bucket
.state
.as_option()
.map(|p| p.local_aliases.get_timestamp(&bucket_p_local_alias_key))
.unwrap_or(0),
);
// ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts
if let Some(kp) = key.state.as_option_mut() {
kp.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None);
self.0.key_table.insert(&key).await?;
}
if let Some(bp) = bucket.state.as_option_mut() {
bp.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
self.0.bucket_table.insert(&bucket).await?;
}
Ok(())
}
// ================================================
// permissions
// ================================================
/// Sets permissions for a key on a bucket. /// Sets permissions for a key on a bucket.
/// This function fails if: /// This function fails if:
/// - bucket or key cannot be found at all (its ok if they are in deleted state) /// - bucket or key cannot be found at all (its ok if they are in deleted state)
/// - bucket or key is in deleted state and we are trying to set permissions other than "deny /// - bucket or key is in deleted state and we are trying to set
/// all" /// permissions other than "deny all"
pub async fn set_bucket_key_permissions( pub async fn set_bucket_key_permissions(
&self, &self,
bucket_id: Uuid, bucket_id: Uuid,
key_id: &String, key_id: &String,
mut perm: BucketKeyPerm, mut perm: BucketKeyPerm,
) -> Result<(), Error> { ) -> Result<(), Error> {
let key_helper = KeyHelper(self.0);
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?; let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
let mut key = key_helper.get_internal_key(key_id).await?; let mut key = self.key().get_internal_key(key_id).await?;
if let Some(bstate) = bucket.state.as_option() { if let Some(bstate) = bucket.state.as_option() {
if let Some(kp) = bstate.authorized_keys.get(key_id) { if let Some(kp) = bstate.authorized_keys.get(key_id) {
@ -384,21 +432,20 @@ impl<'a> LockedHelper<'a> {
Ok(()) Ok(())
} }
// ---- // ================================================
// keys
// ================================================
/// Deletes an API access key /// Deletes an API access key
pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> { pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> {
let state = key.state.as_option_mut().unwrap(); let state = key.state.as_option_mut().unwrap();
// --- done checking, now commit --- // --- done checking, now commit ---
// (the step at unset_local_bucket_alias will fail if a bucket
// does not have another alias, the deletion will be
// interrupted in the middle if that happens)
// 1. Delete local aliases // 1. Delete local aliases
for (alias, _, to) in state.local_aliases.items().iter() { for (alias, _, to) in state.local_aliases.items().iter() {
if let Some(bucket_id) = to { if let Some(bucket_id) = to {
self.unset_local_bucket_alias(*bucket_id, &key.key_id, alias) self.purge_local_bucket_alias(*bucket_id, &key.key_id, alias)
.await?; .await?;
} }
} }
@ -415,4 +462,193 @@ impl<'a> LockedHelper<'a> {
Ok(()) Ok(())
} }
// ================================================
// repair procedure
// ================================================
pub async fn repair_aliases(&self) -> Result<(), GarageError> {
self.0.db.transaction(|tx| {
info!("--- begin repair_aliases transaction ----");
// 1. List all non-deleted buckets, so that we can fix bad aliases
let mut all_buckets: HashSet<Uuid> = HashSet::new();
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
let bucket = self
.0
.bucket_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
if !bucket.is_deleted() {
all_buckets.insert(bucket.id);
}
}
info!("number of buckets: {}", all_buckets.len());
// 2. List all aliases declared in bucket_alias_table and key_table
// Take note of aliases that point to non-existing buckets
let mut global_aliases: HashMap<String, Uuid> = HashMap::new();
{
let mut delete_global = vec![];
for item in tx.range::<&[u8], _>(&self.0.bucket_alias_table.data.store, ..)? {
let mut alias = self
.0
.bucket_alias_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
if let Some(id) = alias.state.get() {
if all_buckets.contains(id) {
// keep aliases
global_aliases.insert(alias.name().to_string(), *id);
} else {
// delete alias
warn!(
"global alias: remove {} -> {:?} (bucket is deleted)",
alias.name(),
id
);
alias.state.update(None);
delete_global.push(alias);
}
}
}
info!("number of global aliases: {}", global_aliases.len());
info!("global alias table: {} entries fixed", delete_global.len());
for ga in delete_global {
debug!("Enqueue update to global alias table: {:?}", ga);
self.0.bucket_alias_table.queue_insert(tx, &ga)?;
}
}
let mut local_aliases: HashMap<(String, String), Uuid> = HashMap::new();
{
let mut delete_local = vec![];
for item in tx.range::<&[u8], _>(&self.0.key_table.data.store, ..)? {
let mut key = self
.0
.key_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
let Some(p) = key.state.as_option_mut() else {
continue;
};
let mut has_changes = false;
for (name, _, to) in p.local_aliases.items().to_vec() {
if let Some(id) = to {
if all_buckets.contains(&id) {
local_aliases.insert((key.key_id.clone(), name), id);
} else {
warn!(
"local alias: remove ({}, {}) -> {:?} (bucket is deleted)",
key.key_id, name, id
);
p.local_aliases.update_in_place(name, None);
has_changes = true;
}
}
}
if has_changes {
delete_local.push(key);
}
}
info!("number of local aliases: {}", local_aliases.len());
info!("key table: {} entries fixed", delete_local.len());
for la in delete_local {
debug!("Enqueue update to key table: {:?}", la);
self.0.key_table.queue_insert(tx, &la)?;
}
}
// 4. Reverse the alias maps to determine the aliases per-bucket
let mut bucket_global: HashMap<Uuid, Vec<String>> = HashMap::new();
let mut bucket_local: HashMap<Uuid, Vec<(String, String)>> = HashMap::new();
for (name, bucket) in global_aliases {
bucket_global.entry(bucket).or_default().push(name);
}
for ((key, name), bucket) in local_aliases {
bucket_local.entry(bucket).or_default().push((key, name));
}
// 5. Fix the bucket table to ensure consistency
let mut bucket_updates = vec![];
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
let bucket = self
.0
.bucket_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
let mut bucket2 = bucket.clone();
let Some(param) = bucket2.state.as_option_mut() else {
continue;
};
// fix global aliases
{
let ga = bucket_global.remove(&bucket.id).unwrap_or_default();
for (name, _, active) in param.aliases.items().to_vec() {
if active && !ga.contains(&name) {
warn!("bucket {:?}: remove global alias {}", bucket.id, name);
param.aliases.update_in_place(name, false);
}
}
for name in ga {
if param.aliases.get(&name).copied() != Some(true) {
warn!("bucket {:?}: add global alias {}", bucket.id, name);
param.aliases.update_in_place(name, true);
}
}
}
// fix local aliases
{
let la = bucket_local.remove(&bucket.id).unwrap_or_default();
for (pair, _, active) in param.local_aliases.items().to_vec() {
if active && !la.contains(&pair) {
warn!("bucket {:?}: remove local alias {:?}", bucket.id, pair);
param.local_aliases.update_in_place(pair, false);
}
}
for pair in la {
if param.local_aliases.get(&pair).copied() != Some(true) {
warn!("bucket {:?}: add local alias {:?}", bucket.id, pair);
param.local_aliases.update_in_place(pair, true);
}
}
}
if bucket2 != bucket {
bucket_updates.push(bucket2);
}
}
info!("bucket table: {} entries fixed", bucket_updates.len());
for b in bucket_updates {
debug!("Enqueue update to bucket table: {:?}", b);
self.0.bucket_table.queue_insert(tx, &b)?;
}
info!("--- end repair_aliases transaction ----");
Ok(())
})?;
info!("repair_aliases is done");
Ok(())
}
} }

View file

@ -121,13 +121,13 @@ impl Worker for LifecycleWorker {
mpu_aborted, mpu_aborted,
.. ..
} => { } => {
let n_objects = self.garage.object_table.data.store.len().ok(); let n_objects = self.garage.object_table.data.store.approximate_len().ok();
let progress = match n_objects { let progress = match n_objects {
None => "...".to_string(), Some(total) if total > 0 => format!(
Some(total) => format!(
"~{:.2}%", "~{:.2}%",
100. * std::cmp::min(*counter, total) as f32 / total as f32 100. * std::cmp::min(*counter, total) as f32 / total as f32
), ),
_ => "...".to_string(),
}; };
WorkerStatus { WorkerStatus {
progress: Some(progress), progress: Some(progress),

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_net" name = "garage_net"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -30,7 +30,7 @@ rand.workspace = true
log.workspace = true log.workspace = true
arc-swap.workspace = true arc-swap.workspace = true
err-derive.workspace = true thiserror.workspace = true
bytes.workspace = true bytes.workspace = true
cfg-if.workspace = true cfg-if.workspace = true

View file

@ -159,7 +159,7 @@ where
pub(crate) type DynEndpoint = Box<dyn GenericEndpoint + Send + Sync>; pub(crate) type DynEndpoint = Box<dyn GenericEndpoint + Send + Sync>;
pub(crate) trait GenericEndpoint { pub(crate) trait GenericEndpoint {
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<Result<RespEnc, Error>>; fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<'_, Result<RespEnc, Error>>;
fn drop_handler(&self); fn drop_handler(&self);
fn clone_endpoint(&self) -> DynEndpoint; fn clone_endpoint(&self) -> DynEndpoint;
} }
@ -175,7 +175,7 @@ where
M: Message, M: Message,
H: StreamingEndpointHandler<M> + 'static, H: StreamingEndpointHandler<M> + 'static,
{ {
fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<Result<RespEnc, Error>> { fn handle(&self, req_enc: ReqEnc, from: NodeID) -> BoxFuture<'_, Result<RespEnc, Error>> {
async move { async move {
match self.0.handler.load_full() { match self.0.handler.load_full() {
None => Err(Error::NoHandler), None => Err(Error::NoHandler),

View file

@ -1,49 +1,49 @@
use std::io; use std::io;
use err_derive::Error;
use log::error; use log::error;
use thiserror::Error;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "IO error: {}", _0)] #[error("IO error: {0}")]
Io(#[error(source)] io::Error), Io(#[from] io::Error),
#[error(display = "Messagepack encode error: {}", _0)] #[error("Messagepack encode error: {0}")]
RMPEncode(#[error(source)] rmp_serde::encode::Error), RMPEncode(#[from] rmp_serde::encode::Error),
#[error(display = "Messagepack decode error: {}", _0)] #[error("Messagepack decode error: {0}")]
RMPDecode(#[error(source)] rmp_serde::decode::Error), RMPDecode(#[from] rmp_serde::decode::Error),
#[error(display = "Tokio join error: {}", _0)] #[error("Tokio join error: {0}")]
TokioJoin(#[error(source)] tokio::task::JoinError), TokioJoin(#[from] tokio::task::JoinError),
#[error(display = "oneshot receive error: {}", _0)] #[error("oneshot receive error: {0}")]
OneshotRecv(#[error(source)] tokio::sync::oneshot::error::RecvError), OneshotRecv(#[from] tokio::sync::oneshot::error::RecvError),
#[error(display = "Handshake error: {}", _0)] #[error("Handshake error: {0}")]
Handshake(#[error(source)] kuska_handshake::async_std::Error), Handshake(#[from] kuska_handshake::async_std::Error),
#[error(display = "UTF8 error: {}", _0)] #[error("UTF8 error: {0}")]
UTF8(#[error(source)] std::string::FromUtf8Error), UTF8(#[from] std::string::FromUtf8Error),
#[error(display = "Framing protocol error")] #[error("Framing protocol error")]
Framing, Framing,
#[error(display = "Remote error ({:?}): {}", _0, _1)] #[error("Remote error ({0:?}): {1}")]
Remote(io::ErrorKind, String), Remote(io::ErrorKind, String),
#[error(display = "Request ID collision")] #[error("Request ID collision")]
IdCollision, IdCollision,
#[error(display = "{}", _0)] #[error("{0}")]
Message(String), Message(String),
#[error(display = "No handler / shutting down")] #[error("No handler / shutting down")]
NoHandler, NoHandler,
#[error(display = "Connection closed")] #[error("Connection closed")]
ConnectionClosed, ConnectionClosed,
#[error(display = "Version mismatch: {}", _0)] #[error("Version mismatch: {0}")]
VersionMismatch(String), VersionMismatch(String),
} }

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_rpc" name = "garage_rpc"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -33,7 +33,7 @@ async-trait.workspace = true
serde.workspace = true serde.workspace = true
serde_bytes.workspace = true serde_bytes.workspace = true
serde_json.workspace = true serde_json.workspace = true
err-derive = { workspace = true, optional = true } thiserror = { workspace = true, optional = true }
# newer version requires rust edition 2021 # newer version requires rust edition 2021
kube = { workspace = true, optional = true } kube = { workspace = true, optional = true }
@ -49,5 +49,5 @@ opentelemetry.workspace = true
[features] [features]
kubernetes-discovery = [ "kube", "k8s-openapi", "schemars" ] kubernetes-discovery = [ "kube", "k8s-openapi", "schemars" ]
consul-discovery = [ "reqwest", "err-derive" ] consul-discovery = [ "reqwest", "thiserror" ]
system-libs = [ "sodiumoxide/use-pkg-config" ] system-libs = [ "sodiumoxide/use-pkg-config" ]

View file

@ -3,8 +3,8 @@ use std::fs::File;
use std::io::Read; use std::io::Read;
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use err_derive::Error;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error;
use garage_net::NodeID; use garage_net::NodeID;
@ -219,12 +219,12 @@ impl ConsulDiscovery {
/// Regroup all Consul discovery errors /// Regroup all Consul discovery errors
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum ConsulError { pub enum ConsulError {
#[error(display = "IO error: {}", _0)] #[error("IO error: {0}")]
Io(#[error(source)] std::io::Error), Io(#[from] std::io::Error),
#[error(display = "HTTP error: {}", _0)] #[error("HTTP error: {0}")]
Reqwest(#[error(source)] reqwest::Error), Reqwest(#[from] reqwest::Error),
#[error(display = "Invalid Consul TLS configuration")] #[error("Invalid Consul TLS configuration")]
InvalidTLSConfig, InvalidTLSConfig,
#[error(display = "Token error: {}", _0)] #[error("Token error: {0}")]
Token(#[error(source)] reqwest::header::InvalidHeaderValue), Token(#[from] reqwest::header::InvalidHeaderValue),
} }

View file

@ -229,13 +229,11 @@ impl LayoutManager {
} }
/// Save cluster layout data to disk /// Save cluster layout data to disk
async fn save_cluster_layout(&self) -> Result<(), Error> { async fn save_cluster_layout(&self) {
let layout = self.layout.read().unwrap().inner().clone(); let layout = self.layout.read().unwrap().inner().clone();
self.persist_cluster_layout if let Err(e) = self.persist_cluster_layout.save_async(&layout).await {
.save_async(&layout) error!("Failed to save cluster_layout: {}", e);
.await }
.expect("Cannot save current cluster layout");
Ok(())
} }
fn broadcast_update(self: &Arc<Self>, rpc: SystemRpc) { fn broadcast_update(self: &Arc<Self>, rpc: SystemRpc) {
@ -313,7 +311,7 @@ impl LayoutManager {
self.change_notify.notify_waiters(); self.change_notify.notify_waiters();
self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout)); self.broadcast_update(SystemRpc::AdvertiseClusterLayout(new_layout));
self.save_cluster_layout().await?; self.save_cluster_layout().await;
} }
Ok(SystemRpc::Ok) Ok(SystemRpc::Ok)
@ -328,7 +326,7 @@ impl LayoutManager {
if let Some(new_trackers) = self.merge_layout_trackers(trackers) { if let Some(new_trackers) = self.merge_layout_trackers(trackers) {
self.change_notify.notify_waiters(); self.change_notify.notify_waiters();
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers)); self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(new_trackers));
self.save_cluster_layout().await?; self.save_cluster_layout().await;
} }
Ok(SystemRpc::Ok) Ok(SystemRpc::Ok)

View file

@ -507,7 +507,7 @@ impl LayoutVersion {
g.compute_maximal_flow()?; g.compute_maximal_flow()?;
if g.get_flow_value()? < (NB_PARTITIONS * self.replication_factor) as i64 { if g.get_flow_value()? < (NB_PARTITIONS * self.replication_factor) as i64 {
return Err(Error::Message( return Err(Error::Message(
"The storage capacity of he cluster is to small. It is \ "The storage capacity of the cluster is too small. It is \
impossible to store partitions of size 1." impossible to store partitions of size 1."
.into(), .into(),
)); ));

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_table" name = "garage_table"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"

View file

@ -367,7 +367,7 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
} }
} }
pub fn gc_todo_len(&self) -> Result<usize, Error> { pub fn gc_todo_approximate_len(&self) -> Result<usize, Error> {
Ok(self.gc_todo.len()?) Ok(self.gc_todo.approximate_len()?)
} }
} }

View file

@ -313,7 +313,7 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
fn status(&self) -> WorkerStatus { fn status(&self) -> WorkerStatus {
WorkerStatus { WorkerStatus {
queue_length: Some(self.gc.data.gc_todo_len().unwrap_or(0) as u64), queue_length: Some(self.gc.data.gc_todo_approximate_len().unwrap_or(0) as u64),
..Default::default() ..Default::default()
} }
} }

View file

@ -287,12 +287,12 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
MerkleNode::decode_opt(&ent) MerkleNode::decode_opt(&ent)
} }
pub fn merkle_tree_len(&self) -> Result<usize, Error> { pub fn merkle_tree_approximate_len(&self) -> Result<usize, Error> {
Ok(self.data.merkle_tree.len()?) Ok(self.data.merkle_tree.approximate_len()?)
} }
pub fn todo_len(&self) -> Result<usize, Error> { pub fn todo_approximate_len(&self) -> Result<usize, Error> {
Ok(self.data.merkle_todo.len()?) Ok(self.data.merkle_todo.approximate_len()?)
} }
} }
@ -306,7 +306,7 @@ impl<F: TableSchema, R: TableReplication> Worker for MerkleWorker<F, R> {
fn status(&self) -> WorkerStatus { fn status(&self) -> WorkerStatus {
WorkerStatus { WorkerStatus {
queue_length: Some(self.0.todo_len().unwrap_or(0) as u64), queue_length: Some(self.0.todo_approximate_len().unwrap_or(0) as u64),
..Default::default() ..Default::default()
} }
} }

View file

@ -34,7 +34,7 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.size", "table.size",
move |observer| { move |observer| {
if let Ok(value) = store.len() { if let Ok(value) = store.approximate_len() {
observer.observe( observer.observe(
value as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
@ -48,7 +48,7 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.merkle_tree_size", "table.merkle_tree_size",
move |observer| { move |observer| {
if let Ok(value) = merkle_tree.len() { if let Ok(value) = merkle_tree.approximate_len() {
observer.observe( observer.observe(
value as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
@ -62,7 +62,7 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.merkle_updater_todo_queue_length", "table.merkle_updater_todo_queue_length",
move |observer| { move |observer| {
if let Ok(v) = merkle_todo.len() { if let Ok(v) = merkle_todo.approximate_len() {
observer.observe( observer.observe(
v as u64, v as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
@ -76,7 +76,7 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.gc_todo_queue_length", "table.gc_todo_queue_length",
move |observer| { move |observer| {
if let Ok(value) = gc_todo.len() { if let Ok(value) = gc_todo.approximate_len() {
observer.observe( observer.observe(
value as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],

View file

@ -27,7 +27,7 @@ impl<F: TableSchema, R: TableReplication> Worker for InsertQueueWorker<F, R> {
fn status(&self) -> WorkerStatus { fn status(&self) -> WorkerStatus {
WorkerStatus { WorkerStatus {
queue_length: Some(self.0.data.insert_queue.len().unwrap_or(0) as u64), queue_length: Some(self.0.data.insert_queue.approximate_len().unwrap_or(0) as u64),
..Default::default() ..Default::default()
} }
} }

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_util" name = "garage_util"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -21,7 +21,7 @@ arc-swap.workspace = true
async-trait.workspace = true async-trait.workspace = true
blake2.workspace = true blake2.workspace = true
bytesize.workspace = true bytesize.workspace = true
err-derive.workspace = true thiserror.workspace = true
hexdump.workspace = true hexdump.workspace = true
xxhash-rust.workspace = true xxhash-rust.workspace = true
hex.workspace = true hex.workspace = true

View file

@ -115,6 +115,7 @@ impl WorkerProcessor {
trace!("{} (TID {}): {:?}", worker.worker.name(), worker.task_id, worker.state); trace!("{} (TID {}): {:?}", worker.worker.name(), worker.task_id, worker.state);
// Save worker info // Save worker info
{
let mut wi = self.worker_info.lock().unwrap(); let mut wi = self.worker_info.lock().unwrap();
match wi.get_mut(&worker.task_id) { match wi.get_mut(&worker.task_id) {
Some(i) => { Some(i) => {
@ -137,10 +138,16 @@ impl WorkerProcessor {
}); });
} }
} }
}
if worker.state == WorkerState::Done { if worker.state == WorkerState::Done {
info!("Worker {} (TID {}) exited", worker.worker.name(), worker.task_id); info!("Worker {} (TID {}) exited", worker.worker.name(), worker.task_id);
} else { } else {
// Yield to the Tokio scheduler between consecutive Busy steps so
// that a worker which never suspends on its own cannot starve other tasks.
if worker.state == WorkerState::Busy {
tokio::task::yield_now().await;
}
workers.push(async move { workers.push(async move {
worker.step().await; worker.step().await;
worker worker

View file

@ -45,6 +45,11 @@ pub struct Config {
)] )]
pub block_size: usize, pub block_size: usize,
/// Maximum number of parallel block writes per PUT request
/// Higher values improve throughput but increase memory usage
/// Default: 3, Recommended: 10-30 for NVMe, 3-10 for HDD
#[serde(default = "default_block_max_concurrent_writes_per_request")]
pub block_max_concurrent_writes_per_request: usize,
/// Number of replicas. Can be any positive integer, but uneven numbers are more favorable. /// Number of replicas. Can be any positive integer, but uneven numbers are more favorable.
/// - 1 for single-node clusters, or to disable replication /// - 1 for single-node clusters, or to disable replication
/// - 3 is the recommended and supported setting. /// - 3 is the recommended and supported setting.
@ -75,6 +80,10 @@ pub struct Config {
)] )]
pub block_ram_buffer_max: usize, pub block_ram_buffer_max: usize,
/// Maximum number of concurrent reads of block files on disk
#[serde(default = "default_block_max_concurrent_reads")]
pub block_max_concurrent_reads: usize,
/// Skip the permission check of secret files. Useful when /// Skip the permission check of secret files. Useful when
/// POSIX ACLs (or more complex chmods) are used. /// POSIX ACLs (or more complex chmods) are used.
#[serde(default)] #[serde(default)]
@ -122,6 +131,10 @@ pub struct Config {
#[serde(deserialize_with = "deserialize_capacity", default)] #[serde(deserialize_with = "deserialize_capacity", default)]
pub lmdb_map_size: usize, pub lmdb_map_size: usize,
/// Fjall block cache size
#[serde(deserialize_with = "deserialize_capacity", default)]
pub fjall_block_cache_size: usize,
// -- APIs // -- APIs
/// Configuration for S3 api /// Configuration for S3 api
pub s3_api: S3ApiConfig, pub s3_api: S3ApiConfig,
@ -135,6 +148,10 @@ pub struct Config {
/// Configuration for the admin API endpoint /// Configuration for the admin API endpoint
#[serde(default = "Default::default")] #[serde(default = "Default::default")]
pub admin: AdminConfig, pub admin: AdminConfig,
/// Allow punycode in bucket names
#[serde(default)]
pub allow_punycode: bool,
} }
/// Value for data_dir: either a single directory or a list of dirs with attributes /// Value for data_dir: either a single directory or a list of dirs with attributes
@ -255,6 +272,9 @@ pub struct KubernetesDiscoveryConfig {
pub skip_crd: bool, pub skip_crd: bool,
} }
pub fn default_block_max_concurrent_writes_per_request() -> usize {
3
}
/// Read and parse configuration /// Read and parse configuration
pub fn read_config(config_file: PathBuf) -> Result<Config, Error> { pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
let config = std::fs::read_to_string(config_file)?; let config = std::fs::read_to_string(config_file)?;
@ -272,6 +292,9 @@ fn default_block_size() -> usize {
fn default_block_ram_buffer_max() -> usize { fn default_block_ram_buffer_max() -> usize {
256 * 1024 * 1024 256 * 1024 * 1024
} }
fn default_block_max_concurrent_reads() -> usize {
16
}
fn default_consistency_mode() -> String { fn default_consistency_mode() -> String {
"consistent".into() "consistent".into()

View file

@ -2,7 +2,7 @@
use std::fmt; use std::fmt;
use std::io; use std::io;
use err_derive::Error; use thiserror::Error;
use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
@ -12,68 +12,61 @@ use crate::encode::debug_serialize;
/// Regroup all Garage errors /// Regroup all Garage errors
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "IO error: {}", _0)] #[error("IO error: {0}")]
Io(#[error(source)] io::Error), Io(#[from] io::Error),
#[error(display = "Hyper error: {}", _0)] #[error("Hyper error: {0}")]
Hyper(#[error(source)] hyper::Error), Hyper(#[from] hyper::Error),
#[error(display = "HTTP error: {}", _0)] #[error("HTTP error: {0}")]
Http(#[error(source)] http::Error), Http(#[from] http::Error),
#[error(display = "Invalid HTTP header value: {}", _0)] #[error("Invalid HTTP header value: {0}")]
HttpHeader(#[error(source)] http::header::ToStrError), HttpHeader(#[from] http::header::ToStrError),
#[error(display = "Network error: {}", _0)] #[error("Network error: {0}")]
Net(#[error(source)] garage_net::error::Error), Net(#[from] garage_net::error::Error),
#[error(display = "DB error: {}", _0)] #[error("DB error: {0}")]
Db(#[error(source)] garage_db::Error), Db(#[from] garage_db::Error),
#[error(display = "Messagepack encode error: {}", _0)] #[error("Messagepack encode error: {0}")]
RmpEncode(#[error(source)] rmp_serde::encode::Error), RmpEncode(#[from] rmp_serde::encode::Error),
#[error(display = "Messagepack decode error: {}", _0)] #[error("Messagepack decode error: {0}")]
RmpDecode(#[error(source)] rmp_serde::decode::Error), RmpDecode(#[from] rmp_serde::decode::Error),
#[error(display = "JSON error: {}", _0)] #[error("JSON error: {0}")]
Json(#[error(source)] serde_json::error::Error), Json(#[from] serde_json::error::Error),
#[error(display = "TOML decode error: {}", _0)] #[error("TOML decode error: {0}")]
TomlDecode(#[error(source)] toml::de::Error), TomlDecode(#[from] toml::de::Error),
#[error(display = "Tokio join error: {}", _0)] #[error("Tokio join error: {0}")]
TokioJoin(#[error(source)] tokio::task::JoinError), TokioJoin(#[from] tokio::task::JoinError),
#[error(display = "Tokio semaphore acquire error: {}", _0)] #[error("Tokio semaphore acquire error: {0}")]
TokioSemAcquire(#[error(source)] tokio::sync::AcquireError), TokioSemAcquire(#[from] tokio::sync::AcquireError),
#[error(display = "Tokio broadcast receive error: {}", _0)] #[error("Tokio broadcast receive error: {0}")]
TokioBcastRecv(#[error(source)] tokio::sync::broadcast::error::RecvError), TokioBcastRecv(#[from] tokio::sync::broadcast::error::RecvError),
#[error(display = "Remote error: {}", _0)] #[error("Remote error: {0}")]
RemoteError(String), RemoteError(String),
#[error(display = "Timeout")] #[error("Timeout")]
Timeout, Timeout,
#[error( #[error("Could not reach quorum of {0} (sets={1:?}). {2} of {3} request succeeded, others returned errors: {4:?}")]
display = "Could not reach quorum of {} (sets={:?}). {} of {} request succeeded, others returned errors: {:?}",
_0,
_1,
_2,
_3,
_4
)]
Quorum(usize, Option<usize>, usize, usize, Vec<String>), Quorum(usize, Option<usize>, usize, usize, Vec<String>),
#[error(display = "Unexpected RPC message: {}", _0)] #[error("Unexpected RPC message: {0}")]
UnexpectedRpcMessage(String), UnexpectedRpcMessage(String),
#[error(display = "Corrupt data: does not match hash {:?}", _0)] #[error("Corrupt data: does not match hash {0:?}")]
CorruptData(Hash), CorruptData(Hash),
#[error(display = "Missing block {:?}: no node returned a valid block", _0)] #[error("Missing block {0:?}: no node returned a valid block")]
MissingBlock(Hash), MissingBlock(Hash),
#[error(display = "{}", _0)] #[error("{0}")]
Message(String), Message(String),
} }

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_web" name = "garage_web"
version = "1.1.0" version = "1.3.1"
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"] authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -20,7 +20,7 @@ garage_model.workspace = true
garage_util.workspace = true garage_util.workspace = true
garage_table.workspace = true garage_table.workspace = true
err-derive.workspace = true thiserror.workspace = true
tracing.workspace = true tracing.workspace = true
percent-encoding.workspace = true percent-encoding.workspace = true

View file

@ -1,6 +1,6 @@
use err_derive::Error;
use hyper::header::HeaderValue; use hyper::header::HeaderValue;
use hyper::{HeaderMap, StatusCode}; use hyper::{HeaderMap, StatusCode};
use thiserror::Error;
use garage_api_common::generic_server::ApiError; use garage_api_common::generic_server::ApiError;
@ -8,15 +8,15 @@ use garage_api_common::generic_server::ApiError;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
/// An error received from the API crate /// An error received from the API crate
#[error(display = "API error: {}", _0)] #[error("API error: {0}")]
ApiError(garage_api_s3::error::Error), ApiError(garage_api_s3::error::Error),
/// The file does not exist /// The file does not exist
#[error(display = "Not found")] #[error("Not found")]
NotFound, NotFound,
/// The client sent a request without host, or with unsupported method /// The client sent a request without host, or with unsupported method
#[error(display = "Bad request: {}", _0)] #[error("Bad request: {0}")]
BadRequest(String), BadRequest(String),
} }

Some files were not shown because too many files have changed in this diff Show more