mirror of
https://github.com/Zygo/bees.git
synced 2025-08-02 13:53:28 +02:00
Compare commits
196 Commits
v0.9
...
ba11d733c0
Author | SHA1 | Date | |
---|---|---|---|
|
ba11d733c0 | ||
|
e87f6e9649 | ||
|
fb63bd7e06 | ||
|
27b5b4e113 | ||
|
e9e6870de8 | ||
|
16e3dd7f60 | ||
|
c658831852 | ||
|
e852e3998a | ||
|
5c0480ec59 | ||
|
1b8b7557b6 | ||
|
f9f3913c8b | ||
|
ee5c971d77 | ||
|
d37f916507 | ||
|
3a17a4dcdd | ||
|
4039ef229e | ||
|
e9d4aa4586 | ||
|
504f4cda80 | ||
|
6c36f4973f | ||
|
b1bd99c077 | ||
|
d5e805ab8d | ||
|
337bbffac1 | ||
|
527396e5cb | ||
|
bc7c35aa2d | ||
|
0953160584 | ||
|
80f9c147f7 | ||
|
50e012ad6d | ||
|
9a9644659c | ||
|
fd53bff959 | ||
|
9439dad93a | ||
|
ef9b4b3a50 | ||
|
7ca857dff0 | ||
|
8331f70db7 | ||
|
a844024395 | ||
|
47243aef14 | ||
|
a670aa5a71 | ||
|
51b3bcdbe4 | ||
|
ae58401d53 | ||
|
3e7eb43b51 | ||
|
962d94567c | ||
|
6dbef5f27b | ||
|
88b1e4ca6e | ||
|
c1d7fa13a5 | ||
|
aa39bddb2d | ||
|
1aea2d2f96 | ||
|
673b450671 | ||
|
183b6a5361 | ||
|
b6446d7316 | ||
|
d32f31f411 | ||
|
dd08f6379f | ||
|
58ee297cde | ||
|
a3c0ba0d69 | ||
|
75040789c6 | ||
|
f9a697518d | ||
|
c4ba6ec269 | ||
|
440740201a | ||
|
f6908420ad | ||
|
925b12823e | ||
|
561e604edc | ||
|
30cd375d03 | ||
|
48b7fbda9c | ||
|
85aba7b695 | ||
|
de38b46dd8 | ||
|
0abf6ebb3d | ||
|
360ce7e125 | ||
|
ad11db2ee1 | ||
|
874832dc58 | ||
|
5fe89d85c3 | ||
|
a2b3e1e0c2 | ||
|
aaec931081 | ||
|
c53fa04a2f | ||
|
d4a681c8a2 | ||
|
a819d623f7 | ||
|
de9d72da80 | ||
|
74d8bdd60f | ||
|
a5d078d48b | ||
|
e2587cae9b | ||
|
ac581273d3 | ||
|
7fcde97b70 | ||
|
e457f502b7 | ||
|
46815f1a9d | ||
|
0d251d30f4 | ||
|
b8dd9a2db0 | ||
|
8bc90b743b | ||
|
2f2a68be3d | ||
|
82f1fd8054 | ||
|
a9b07d7684 | ||
|
613ddc3c71 | ||
|
c3a39b7691 | ||
|
58db4071de | ||
|
0d3e13cc5f | ||
|
1af5fcdf34 | ||
|
87472b6086 | ||
|
ca351d389f | ||
|
1f0b8c623c | ||
|
74296c644a | ||
|
231593bfbc | ||
|
d4900cc5d5 | ||
|
81bbf7e1d4 | ||
|
bd9dc0229b | ||
|
2a1ed0b455 | ||
|
d160edc15a | ||
|
e79b242ce2 | ||
|
ea45982293 | ||
|
f209cafcd8 | ||
|
c4b31bdd5c | ||
|
08fe145988 | ||
|
bb09b1ab0e | ||
|
94d9945d04 | ||
|
a02588b16f | ||
|
21cedfb13e | ||
|
b9abcceacb | ||
|
31f3a8d67d | ||
|
9beb602b16 | ||
|
0580c10082 | ||
|
1cbc894e6f | ||
|
d74862f1fc | ||
|
e40339856f | ||
|
1dd96f20c6 | ||
|
cd7a71aba3 | ||
|
e99a505b3b | ||
|
3e89fe34ed | ||
|
dc74766179 | ||
|
3a33a5386b | ||
|
69e9bdfb0f | ||
|
7a197e2f33 | ||
|
43d38ca536 | ||
|
7b0ed6a411 | ||
|
8d4d153d1d | ||
|
d5a6c30623 | ||
|
25f7ced27b | ||
|
c1af219246 | ||
|
9c183c2c22 | ||
|
59f8a467c3 | ||
|
9987aa8583 | ||
|
da32667e02 | ||
|
8080abac97 | ||
|
1e139d0ccc | ||
|
6542917ffa | ||
|
b99d80b40f | ||
|
099ad2ce7c | ||
|
a59a02174f | ||
|
e22653e2c6 | ||
|
44810d6df8 | ||
|
8f92b1dacc | ||
|
0b974b5485 | ||
|
ce0367dafe | ||
|
54ed6e1cff | ||
|
24b08ef7b7 | ||
|
97eab9655c | ||
|
05bf1ebf76 | ||
|
606ac01d56 | ||
|
72c3bf8438 | ||
|
72958a5e47 | ||
|
f25b4c81ba | ||
|
a64603568b | ||
|
33cde5de97 | ||
|
5414c7344f | ||
|
8bac00433d | ||
|
088cbc951a | ||
|
e78e05e212 | ||
|
8d08a3c06f | ||
|
cdcdf8e218 | ||
|
37f5b1bfa8 | ||
|
abe2afaeb2 | ||
|
792fdbbb13 | ||
|
30a4fb52cb | ||
|
90d7075358 | ||
|
faac895568 | ||
|
a7baa565e4 | ||
|
b408eac98e | ||
|
75131f396f | ||
|
cfb7592859 | ||
|
3839690ba3 | ||
|
124507232f | ||
|
3c5e13c885 | ||
|
a6ca2fa2f6 | ||
|
3f23a0c73f | ||
|
d6732c58e2 | ||
|
75b2067cef | ||
|
da3ef216b1 | ||
|
b7665d49d9 | ||
|
717bdf5eb5 | ||
|
9b60f2b94d | ||
|
8978d63e75 | ||
|
82474b4ef4 | ||
|
73834beb5a | ||
|
c92ba117d8 | ||
|
c354e77634 | ||
|
f21569e88c | ||
|
3d5ebe4d40 | ||
|
3430f16998 | ||
|
7c764a73c8 | ||
|
a9a5cd03a5 | ||
|
299509ce32 | ||
|
d5a99c2f5e | ||
|
fd6c3b3769 |
@@ -4,6 +4,7 @@ define TEMPLATE_COMPILER =
|
|||||||
sed $< >$@ \
|
sed $< >$@ \
|
||||||
-e's#@DESTDIR@#$(DESTDIR)#' \
|
-e's#@DESTDIR@#$(DESTDIR)#' \
|
||||||
-e's#@PREFIX@#$(PREFIX)#' \
|
-e's#@PREFIX@#$(PREFIX)#' \
|
||||||
|
-e's#@BINDIR@#$(BINDIR)#' \
|
||||||
-e's#@ETC_PREFIX@#$(ETC_PREFIX)#' \
|
-e's#@ETC_PREFIX@#$(ETC_PREFIX)#' \
|
||||||
-e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#'
|
-e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#'
|
||||||
endef
|
endef
|
||||||
|
10
Makefile
10
Makefile
@@ -1,6 +1,7 @@
|
|||||||
PREFIX ?= /usr
|
PREFIX ?= /usr
|
||||||
ETC_PREFIX ?= /etc
|
ETC_PREFIX ?= /etc
|
||||||
LIBDIR ?= lib
|
LIBDIR ?= lib
|
||||||
|
BINDIR ?= sbin
|
||||||
|
|
||||||
LIB_PREFIX ?= $(PREFIX)/$(LIBDIR)
|
LIB_PREFIX ?= $(PREFIX)/$(LIBDIR)
|
||||||
LIBEXEC_PREFIX ?= $(LIB_PREFIX)/bees
|
LIBEXEC_PREFIX ?= $(LIB_PREFIX)/bees
|
||||||
@@ -49,25 +50,20 @@ scripts/%: scripts/%.in
|
|||||||
|
|
||||||
scripts: scripts/beesd scripts/beesd@.service
|
scripts: scripts/beesd scripts/beesd@.service
|
||||||
|
|
||||||
install_tools: ## Install support tools + libs
|
|
||||||
install_tools: src
|
|
||||||
install -Dm755 bin/fiemap $(DESTDIR)$(PREFIX)/bin/fiemap
|
|
||||||
install -Dm755 bin/fiewalk $(DESTDIR)$(PREFIX)/sbin/fiewalk
|
|
||||||
|
|
||||||
install_bees: ## Install bees + libs
|
install_bees: ## Install bees + libs
|
||||||
install_bees: src $(RUN_INSTALL_TESTS)
|
install_bees: src $(RUN_INSTALL_TESTS)
|
||||||
install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees
|
install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees
|
||||||
|
|
||||||
install_scripts: ## Install scipts
|
install_scripts: ## Install scipts
|
||||||
install_scripts: scripts
|
install_scripts: scripts
|
||||||
install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd
|
install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/$(BINDIR)/beesd
|
||||||
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample
|
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample
|
||||||
ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
||||||
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
|
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
|
||||||
endif
|
endif
|
||||||
|
|
||||||
install: ## Install distribution
|
install: ## Install distribution
|
||||||
install: install_bees install_scripts $(OPTIONAL_INSTALL_TARGETS)
|
install: install_bees install_scripts
|
||||||
|
|
||||||
help: ## Show help
|
help: ## Show help
|
||||||
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
|
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
|
||||||
|
27
README.md
27
README.md
@@ -6,31 +6,30 @@ Best-Effort Extent-Same, a btrfs deduplication agent.
|
|||||||
About bees
|
About bees
|
||||||
----------
|
----------
|
||||||
|
|
||||||
bees is a block-oriented userspace deduplication agent designed for large
|
bees is a block-oriented userspace deduplication agent designed to scale
|
||||||
btrfs filesystems. It is an offline dedupe combined with an incremental
|
up to large btrfs filesystems. It is an offline dedupe combined with
|
||||||
data scan capability to minimize time data spends on disk from write
|
an incremental data scan capability to minimize time data spends on disk
|
||||||
to dedupe.
|
from write to dedupe.
|
||||||
|
|
||||||
Strengths
|
Strengths
|
||||||
---------
|
---------
|
||||||
|
|
||||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
* Space-efficient hash table - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||||
* Daemon incrementally dedupes new data using btrfs tree search
|
* Daemon mode - incrementally dedupes new data as it appears
|
||||||
|
* Largest extents first - recover more free space during fixed maintenance windows
|
||||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](docs/options.md)
|
* Whole-filesystem dedupe - scans data only once, even with snapshots and reflinks
|
||||||
* Works around btrfs filesystem structure to free more disk space
|
|
||||||
* Persistent hash table for rapid restart after shutdown
|
* Persistent hash table for rapid restart after shutdown
|
||||||
* Whole-filesystem dedupe - including snapshots
|
|
||||||
* Constant hash table size - no increased RAM usage if data set becomes larger
|
* Constant hash table size - no increased RAM usage if data set becomes larger
|
||||||
* Works on live data - no scheduled downtime required
|
* Works on live data - no scheduled downtime required
|
||||||
* Automatic self-throttling based on system load
|
* Automatic self-throttling - reduces system load
|
||||||
|
* btrfs support - recovers more free space from btrfs than naive dedupers
|
||||||
|
|
||||||
Weaknesses
|
Weaknesses
|
||||||
----------
|
----------
|
||||||
|
|
||||||
* Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists
|
* Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists
|
||||||
* Requires root privilege (or `CAP_SYS_ADMIN`)
|
* Requires root privilege (`CAP_SYS_ADMIN` plus the usual filesystem read/modify caps)
|
||||||
* First run may require temporary disk space for extent reorganization
|
|
||||||
* [First run may increase metadata space usage if many snapshots exist](docs/gotchas.md)
|
* [First run may increase metadata space usage if many snapshots exist](docs/gotchas.md)
|
||||||
* Constant hash table size - no decreased RAM usage if data set becomes smaller
|
* Constant hash table size - no decreased RAM usage if data set becomes smaller
|
||||||
* btrfs only
|
* btrfs only
|
||||||
@@ -47,7 +46,7 @@ Recommended Reading
|
|||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
* [bees Gotchas](docs/gotchas.md)
|
* [bees Gotchas](docs/gotchas.md)
|
||||||
* [btrfs kernel bugs](docs/btrfs-kernel.md) - especially DATA CORRUPTION WARNING
|
* [btrfs kernel bugs](docs/btrfs-kernel.md) - especially DATA CORRUPTION WARNING for old kernels
|
||||||
* [bees vs. other btrfs features](docs/btrfs-other.md)
|
* [bees vs. other btrfs features](docs/btrfs-other.md)
|
||||||
* [What to do when something goes wrong](docs/wrong.md)
|
* [What to do when something goes wrong](docs/wrong.md)
|
||||||
|
|
||||||
@@ -70,6 +69,6 @@ You can also use Github:
|
|||||||
Copyright & License
|
Copyright & License
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
Copyright 2015-2025 Zygo Blaxell <bees@furryterror.org>.
|
||||||
|
|
||||||
GPL (version 3 or later).
|
GPL (version 3 or later).
|
||||||
|
@@ -1,30 +1,24 @@
|
|||||||
Recommended Kernel Version for bees
|
Recommended Linux Kernel Version for bees
|
||||||
===================================
|
=========================================
|
||||||
|
|
||||||
First, a warning that is not specific to bees:
|
First, a warning about old Linux kernel versions:
|
||||||
|
|
||||||
> **Kernel 5.1, 5.2, and 5.3 should not be used with btrfs due to a
|
> **Linux kernel version 5.1, 5.2, and 5.3 should not be used with btrfs
|
||||||
severe regression that can lead to fatal metadata corruption.**
|
due to a severe regression that can lead to fatal metadata corruption.**
|
||||||
This issue is fixed in kernel 5.4.14 and later.
|
This issue is fixed in version 5.4.14 and later.
|
||||||
|
|
||||||
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
|
**Recommended Linux kernel versions for bees are 5.4, 5.10, 5.15, 6.1,
|
||||||
with recent LTS and -stable updates.** The latest released kernel as
|
6.6, or 6.12 with recent LTS and -stable updates.** The latest released
|
||||||
of this writing is 5.18.18.
|
kernel as of this writing is 6.12.9, and the earliest supported LTS
|
||||||
|
kernel is 5.4.
|
||||||
|
|
||||||
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
|
Some optional bees features use kernel APIs introduced in kernel 4.15
|
||||||
some issues. Older kernels will be slower (a little slower or a lot
|
(extent scan) and 5.6 (`openat2` support). These bees features are not
|
||||||
slower depending on which issues are triggered). Not all fixes are
|
available on older kernels. Support for older kernels may be removed
|
||||||
backported.
|
in a future bees release.
|
||||||
|
|
||||||
Obsolete non-LTS kernels have a variety of unfixed issues and should
|
|
||||||
not be used with btrfs. For details see the table below.
|
|
||||||
|
|
||||||
bees requires btrfs kernel API version 4.2 or higher, and does not work
|
|
||||||
on older kernels.
|
|
||||||
|
|
||||||
bees will detect and use btrfs kernel API up to version 4.15 if present.
|
|
||||||
In some future bees release, this API version may become mandatory.
|
|
||||||
|
|
||||||
|
bees will not run at all on kernels before 4.2 due to lack of minimal
|
||||||
|
API support.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -58,14 +52,20 @@ These bugs are particularly popular among bees users, though not all are specifi
|
|||||||
| - | 5.8 | deadlock in `TREE_SEARCH` ioctl (core component of bees filesystem scanner), followed by regression in deadlock fix | 4.4.237, 4.9.237, 4.14.199, 4.19.146, 5.4.66, 5.8.10 and later | a48b73eca4ce btrfs: fix potential deadlock in the search ioctl, 1c78544eaa46 btrfs: fix wrong address when faulting in pages in the search ioctl
|
| - | 5.8 | deadlock in `TREE_SEARCH` ioctl (core component of bees filesystem scanner), followed by regression in deadlock fix | 4.4.237, 4.9.237, 4.14.199, 4.19.146, 5.4.66, 5.8.10 and later | a48b73eca4ce btrfs: fix potential deadlock in the search ioctl, 1c78544eaa46 btrfs: fix wrong address when faulting in pages in the search ioctl
|
||||||
| 5.7 | 5.10 | kernel crash if balance receives fatal signal e.g. Ctrl-C | 5.4.93, 5.10.11, 5.11 and later | 18d3bff411c8 btrfs: don't get an EINTR during drop_snapshot for reloc
|
| 5.7 | 5.10 | kernel crash if balance receives fatal signal e.g. Ctrl-C | 5.4.93, 5.10.11, 5.11 and later | 18d3bff411c8 btrfs: don't get an EINTR during drop_snapshot for reloc
|
||||||
| 5.10 | 5.10 | 20x write performance regression | 5.10.8, 5.11 and later | e076ab2a2ca7 btrfs: shrink delalloc pages instead of full inodes
|
| 5.10 | 5.10 | 20x write performance regression | 5.10.8, 5.11 and later | e076ab2a2ca7 btrfs: shrink delalloc pages instead of full inodes
|
||||||
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.4.125, 5.10.43, 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
||||||
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
||||||
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
||||||
|
| 5.11 | 5.12 | subvols marked for deletion with `btrfs sub del` become permanently undeletable ("ghost" subvols) | 5.12 stopped creation of new ghost subvols | Partially fixed in 8d488a8c7ba2 btrfs: fix subvolume/snapshot deletion not triggered on mount. Qu wrote a [patch](https://github.com/adam900710/linux/commit/9de990fcc8864c376eb28aa7482c54321f94acd4) to allow `btrfs sub del -i` to remove "ghost" subvols, but it was never merged upstream.
|
||||||
| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
|
| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
|
||||||
| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
|
| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
|
||||||
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
||||||
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
||||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl | - | workaround: reduce bees thread count to 1 with `-c1`
|
| 5.12 | 6.0 | space cache corruption and potential double allocations | 5.15.65, 5.19.6, 6.0 and later | ced8ecf026fd btrfs: fix space cache corruption and potential double allocations
|
||||||
|
| 6.0 | 6.5 | suboptimal allocation in multi-device filesystems due to chunk allocator regression | 6.1.60, 6.5.9, 6.6 and later | 8a540e990d7d btrfs: fix stripe length calculation for non-zoned data chunk allocation
|
||||||
|
| 6.3, backported to 5.15.107, 6.1.24, 6.2.11 | 6.3 | vmalloc error, failed to allocate pages | 6.3.10, 6.4 and later. Bug (f349b15e183d "mm: vmalloc: avoid warn_alloc noise caused by fatal signal" in v6.3-rc6) backported to 6.1.24, 6.2.11, and 5.15.107. | 95a301eefa82 mm/vmalloc: do not output a spurious warning when huge vmalloc() fails
|
||||||
|
| 6.2 | 6.3 | `IGNORE_OFFSET` flag ignored in `LOGICAL_INO` ioctl | 6.2.16, 6.3.3, 6.4 and later | 0cad8f14d70c btrfs: fix backref walking not returning all inode refs
|
||||||
|
| 6.10 | 6.11 | `adding refs to an existing tree ref`, `failed to run delayed ref`, then read-only | 6.11.10, 6.12 and later | 7d493a5ecc26 btrfs: fix incorrect comparison for delayed refs
|
||||||
|
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe/clone ioctl on the same extent | - | workaround: avoid doing that
|
||||||
|
|
||||||
"Last bad kernel" refers to that version's last stable update from
|
"Last bad kernel" refers to that version's last stable update from
|
||||||
kernel.org. Distro kernels may backport additional fixes. Consult
|
kernel.org. Distro kernels may backport additional fixes. Consult
|
||||||
@@ -80,73 +80,69 @@ through 5.4.13 inclusive.
|
|||||||
A "-" for "first bad kernel" indicates the bug has been present since
|
A "-" for "first bad kernel" indicates the bug has been present since
|
||||||
the relevant feature first appeared in btrfs.
|
the relevant feature first appeared in btrfs.
|
||||||
|
|
||||||
A "-" for "last bad kernel" indicates the bug has not yet been fixed as
|
A "-" for "last bad kernel" indicates the bug has not yet been fixed in
|
||||||
of 5.18.18.
|
current kernels (see top of this page for which kernel version that is).
|
||||||
|
|
||||||
In cases where issues are fixed by commits spread out over multiple
|
In cases where issues are fixed by commits spread out over multiple
|
||||||
kernel versions, "fixed kernel version" refers to the version that
|
kernel versions, "fixed kernel version" refers to the version that
|
||||||
contains all components of the fix.
|
contains the last committed component of the fix.
|
||||||
|
|
||||||
|
|
||||||
Workarounds for known kernel bugs
|
Workarounds for known kernel bugs
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
* **Hangs with high worker thread counts**: On kernels newer than
|
* **Hangs with concurrent `LOGICAL_INO` and dedupe/clone**: on all
|
||||||
5.4, multiple threads running `LOGICAL_INO` and dedupe ioctls
|
kernel versions so far, multiple threads running `LOGICAL_INO` and
|
||||||
at the same time can lead to a kernel hang. The workaround is
|
dedupe/clone ioctls at the same time on the same inodes or extents
|
||||||
to reduce the thread count to 1 with `-c1`.
|
can lead to a kernel hang. The kernel enters an infinite loop in
|
||||||
|
`add_all_parents`, where `count` is 0, `ref->count` is 1, and
|
||||||
|
`btrfs_next_item` or `btrfs_next_old_item` never find a matching ref.
|
||||||
|
|
||||||
* **Slow backrefs** (aka toxic extents): Under certain conditions,
|
bees has two workarounds for this bug: 1. schedule work so that multiple
|
||||||
if the number of references to a single shared extent grows too
|
threads do not simultaneously access the same inode or the same extent,
|
||||||
high, the kernel consumes more and more CPU while also holding locks
|
and 2. use a brute-force global lock within bees that prevents any
|
||||||
that delay write access to the filesystem. bees avoids this bug
|
thread from running `LOGICAL_INO` while any other thread is running
|
||||||
by measuring the time the kernel spends performing `LOGICAL_INO`
|
dedupe.
|
||||||
operations and permanently blacklisting any extent or hash involved
|
|
||||||
where the kernel starts to get slow. In the bees log, such blocks
|
|
||||||
are labelled as 'toxic' hash/block addresses. Toxic extents are
|
|
||||||
rare (about 1 in 100,000 extents become toxic), but toxic extents can
|
|
||||||
become 8 orders of magnitude more expensive to process than the fastest
|
|
||||||
non-toxic extents. This seems to affect all dedupe agents on btrfs;
|
|
||||||
at this time of writing only bees has a workaround for this bug.
|
|
||||||
|
|
||||||
This workaround is less necessary for kernels 5.4.96, 5.7 and later,
|
Workaround #1 isn't really a workaround, since we want to do the same
|
||||||
though it can still take 2 ms of CPU to resolve each extent ref on a
|
thing for unrelated performance reasons. If multiple threads try to
|
||||||
fast machine on a large, heavily fragmented file.
|
perform dedupe operations on the same extent or inode, btrfs will make
|
||||||
|
all the threads wait for the same locks anyway, so it's better to have
|
||||||
|
bees find some other inode or extent to work on while waiting for btrfs
|
||||||
|
to finish.
|
||||||
|
|
||||||
|
Workaround #2 doesn't seem to be needed after implementing workaround
|
||||||
|
#1, but it's better to be slightly slower than to hang one CPU core
|
||||||
|
and the filesystem until the kernel is rebooted.
|
||||||
|
|
||||||
|
It is still theoretically possible to trigger the kernel bug when
|
||||||
|
running bees at the same time as other dedupers, or other programs
|
||||||
|
that use `LOGICAL_INO` like `btdu`, or when performing a reflink clone
|
||||||
|
operation such as `cp` or `mv`; however, it's extremely difficult to
|
||||||
|
reproduce the bug without closely cooperating threads.
|
||||||
|
|
||||||
|
* **Slow backrefs** (aka toxic extents): On older kernels, under certain
|
||||||
|
conditions, if the number of references to a single shared extent grows
|
||||||
|
too high, the kernel consumes more and more CPU while also holding
|
||||||
|
locks that delay write access to the filesystem. This is no longer
|
||||||
|
a concern on kernels after 5.7 (or an up-to-date 5.4 LTS version),
|
||||||
|
but there are still some remains of earlier workarounds for this issue
|
||||||
|
in bees that have not been fully removed.
|
||||||
|
|
||||||
|
bees avoided this bug by measuring the time the kernel spends performing
|
||||||
|
`LOGICAL_INO` operations and permanently blacklisting any extent or
|
||||||
|
hash involved where the kernel starts to get slow. In the bees log,
|
||||||
|
such blocks are labelled as 'toxic' hash/block addresses.
|
||||||
|
|
||||||
|
Future bees releases will remove toxic extent detection (it only detects
|
||||||
|
false positives now) and clear all previously saved toxic extent bits.
|
||||||
|
|
||||||
* **dedupe breaks `btrfs send` in old kernels**. The bees option
|
* **dedupe breaks `btrfs send` in old kernels**. The bees option
|
||||||
`--workaround-btrfs-send` prevents any modification of read-only subvols
|
`--workaround-btrfs-send` prevents any modification of read-only subvols
|
||||||
in order to avoid breaking `btrfs send`.
|
in order to avoid breaking `btrfs send` on kernels before 5.2.
|
||||||
|
|
||||||
This workaround is no longer necessary to avoid kernel crashes
|
This workaround is no longer necessary to avoid kernel crashes and
|
||||||
and send performance failure on kernel 4.9.207, 4.14.159, 4.19.90,
|
send performance failure on kernel 5.4.4 and later. bees will pause
|
||||||
5.3.17, 5.4.4, 5.5 and later; however, some conflict between send
|
dedupe until the send is finished on current kernels.
|
||||||
and dedupe still remains, so the workaround is still useful.
|
|
||||||
|
|
||||||
`btrfs receive` is not and has never been affected by this issue.
|
`btrfs receive` is not and has never been affected by this issue.
|
||||||
|
|
||||||
Unfixed kernel bugs
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
As of 5.18.18:
|
|
||||||
|
|
||||||
* **The kernel does not permit `btrfs send` and dedupe to run at the
|
|
||||||
same time**. Recent kernels no longer crash, but now refuse one
|
|
||||||
operation with an error if the other operation was already running.
|
|
||||||
|
|
||||||
bees has not been updated to handle the new dedupe behavior optimally.
|
|
||||||
Optimal behavior is to defer dedupe operations when send is detected,
|
|
||||||
and resume after the send is finished. Current bees behavior is to
|
|
||||||
complain loudly about each individual dedupe failure in log messages,
|
|
||||||
and abandon duplicate data references in the snapshot that send is
|
|
||||||
processing. A future bees version shall have better handling for
|
|
||||||
this situation.
|
|
||||||
|
|
||||||
Workaround: send `SIGSTOP` to bees, or terminate the bees process,
|
|
||||||
before running `btrfs send`.
|
|
||||||
|
|
||||||
This workaround is not strictly required if snapshot is deleted after
|
|
||||||
sending. In that case, any duplicate data blocks that were not removed
|
|
||||||
by dedupe will be removed by snapshot delete instead. The workaround
|
|
||||||
still saves some IO.
|
|
||||||
|
|
||||||
`btrfs receive` is not affected by this issue.
|
|
||||||
|
@@ -3,49 +3,34 @@ Good Btrfs Feature Interactions
|
|||||||
|
|
||||||
bees has been tested in combination with the following:
|
bees has been tested in combination with the following:
|
||||||
|
|
||||||
* btrfs compression (zlib, lzo, zstd), mixtures of compressed and uncompressed extents
|
* btrfs compression (zlib, lzo, zstd)
|
||||||
* PREALLOC extents (unconditionally replaced with holes)
|
* PREALLOC extents (unconditionally replaced with holes)
|
||||||
* HOLE extents and btrfs no-holes feature
|
* HOLE extents and btrfs no-holes feature
|
||||||
* Other deduplicators, reflink copies (though bees may decide to redo their work)
|
* Other deduplicators (`duperemove`, `jdupes`)
|
||||||
* btrfs snapshots and non-snapshot subvols (RW and RO)
|
* Reflink copies (modern coreutils `cp` and `mv`)
|
||||||
* Concurrent file modification (e.g. PostgreSQL and sqlite databases, build daemons)
|
* Concurrent file modification (e.g. PostgreSQL and sqlite databases, VMs, build daemons)
|
||||||
* all btrfs RAID profiles
|
* All btrfs RAID profiles: single, dup, raid0, raid1, raid10, raid1c3, raid1c4, raid5, raid6
|
||||||
* IO errors during dedupe (read errors will throw exceptions, bees will catch them and skip over the affected extent)
|
* IO errors during dedupe (affected extents are skipped)
|
||||||
* Filesystems mounted *with* the flushoncommit option ([lots of harmless kernel log warnings on 4.15 and later](btrfs-kernel.md))
|
|
||||||
* Filesystems mounted *without* the flushoncommit option
|
|
||||||
* 4K filesystem data block size / clone alignment
|
* 4K filesystem data block size / clone alignment
|
||||||
* 64-bit and 32-bit LE host CPUs (amd64, x86, arm)
|
* 64-bit and 32-bit LE host CPUs (amd64, x86, arm)
|
||||||
* Huge files (>1TB--although Btrfs performance on such files isn't great in general)
|
* Large files (kernel 5.4 or later strongly recommended)
|
||||||
* filesystems up to 30T+ bytes, 100M+ files
|
* Filesystem data sizes up to 100T+ bytes, 1000M+ files
|
||||||
|
* `open(O_DIRECT)` (seems to work as well--or as poorly--with bees as with any other btrfs feature)
|
||||||
|
* btrfs-convert from ext2/3/4
|
||||||
|
* btrfs `autodefrag` mount option
|
||||||
|
* btrfs balance (data balances cause rescan of relocated data)
|
||||||
|
* btrfs block-group-tree
|
||||||
|
* btrfs `flushoncommit` and `noflushoncommit` mount options
|
||||||
|
* btrfs mixed block groups
|
||||||
|
* btrfs `nodatacow`/`nodatasum` inode attribute or mount option (bees skips all nodatasum files)
|
||||||
|
* btrfs qgroups and quota support (_not_ squotas)
|
||||||
* btrfs receive
|
* btrfs receive
|
||||||
* btrfs nodatacow/nodatasum inode attribute or mount option (bees skips all nodatasum files)
|
* btrfs scrub
|
||||||
* open(O_DIRECT) (seems to work as well--or as poorly--with bees as with any other btrfs feature)
|
* btrfs send (dedupe pauses automatically, kernel 5.4 or later required)
|
||||||
* lvmcache: no problems observed in testing with recent kernels or reported by users in the last year.
|
* btrfs snapshot, non-snapshot subvols (RW and RO), snapshot delete
|
||||||
|
|
||||||
Bad Btrfs Feature Interactions
|
**Note:** some btrfs features have minimum kernel versions which are
|
||||||
------------------------------
|
higher than the minimum kernel version for bees.
|
||||||
|
|
||||||
bees has been tested in combination with the following, and various problems are known:
|
|
||||||
|
|
||||||
* bcache: no data-losing problems observed in testing with recent kernels
|
|
||||||
or reported by users in the last year. Some issues observed with
|
|
||||||
bcache interacting badly with some SSD models' firmware, but so far
|
|
||||||
this only causes temporary loss of service, not filesystem damage.
|
|
||||||
This behavior does not seem to be specific to bees (ordinary filesystem
|
|
||||||
tests with rsync and snapshots will reproduce it), but it does prevent
|
|
||||||
any significant testing of bees on bcache.
|
|
||||||
|
|
||||||
* btrfs send: there are bugs in `btrfs send` that can be triggered by bees.
|
|
||||||
The [`--workaround-btrfs-send` option](options.md) works around this issue
|
|
||||||
by preventing bees from modifying read-only snapshots.
|
|
||||||
|
|
||||||
* btrfs qgroups: very slow, sometimes hangs...and it's even worse when
|
|
||||||
bees is running.
|
|
||||||
|
|
||||||
* btrfs autodefrag mount option: hangs and high CPU usage problems
|
|
||||||
reported by users. bees cannot distinguish autodefrag activity from
|
|
||||||
normal filesystem activity and will likely try to undo the autodefrag
|
|
||||||
if duplicate copies of the defragmented data exist.
|
|
||||||
|
|
||||||
Untested Btrfs Feature Interactions
|
Untested Btrfs Feature Interactions
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
@@ -54,9 +39,6 @@ bees has not been tested with the following, and undesirable interactions may oc
|
|||||||
|
|
||||||
* Non-4K filesystem data block size (should work if recompiled)
|
* Non-4K filesystem data block size (should work if recompiled)
|
||||||
* Non-equal hash (SUM) and filesystem data block (CLONE) sizes (need to fix that eventually)
|
* Non-equal hash (SUM) and filesystem data block (CLONE) sizes (need to fix that eventually)
|
||||||
* btrfs seed filesystems (does anyone even use those?)
|
* btrfs seed filesystems, raid-stripe-tree, squotas (no particular reason these wouldn't work, but no one has reported trying)
|
||||||
* btrfs out-of-tree kernel patches (e.g. in-kernel dedupe or encryption)
|
* btrfs out-of-tree kernel patches (e.g. encryption, extent tree v2)
|
||||||
* btrfs-convert from ext2/3/4 (never tested, might run out of space or ignore significant portions of the filesystem due to sanity checks)
|
|
||||||
* btrfs mixed block groups (don't know a reason why it would *not* work, but never tested)
|
|
||||||
* flashcache: an out-of-tree cache-HDD-on-SSD block layer helper.
|
|
||||||
* Host CPUs with exotic page sizes, alignment requirements, or endianness (ppc, alpha, sparc, strongarm, s390, mips, m68k...)
|
* Host CPUs with exotic page sizes, alignment requirements, or endianness (ppc, alpha, sparc, strongarm, s390, mips, m68k...)
|
||||||
|
301
docs/config.md
301
docs/config.md
@@ -8,9 +8,10 @@ are reasonable in most cases.
|
|||||||
Hash Table Sizing
|
Hash Table Sizing
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
Hash table entries are 16 bytes per data block. The hash table stores
|
Hash table entries are 16 bytes per data block. The hash table stores the
|
||||||
the most recently read unique hashes. Once the hash table is full,
|
most recently read unique hashes. Once the hash table is full, each new
|
||||||
each new entry in the table evicts an old entry.
|
entry added to the table evicts an old entry. This makes the hash table
|
||||||
|
a sliding window over the most recently scanned data from the filesystem.
|
||||||
|
|
||||||
Here are some numbers to estimate appropriate hash table sizes:
|
Here are some numbers to estimate appropriate hash table sizes:
|
||||||
|
|
||||||
@@ -25,9 +26,7 @@ Here are some numbers to estimate appropriate hash table sizes:
|
|||||||
Notes:
|
Notes:
|
||||||
|
|
||||||
* If the hash table is too large, no extra dedupe efficiency is
|
* If the hash table is too large, no extra dedupe efficiency is
|
||||||
obtained, and the extra space just wastes RAM. Extra space can also slow
|
obtained, and the extra space wastes RAM.
|
||||||
bees down by preventing old data from being evicted, so bees wastes time
|
|
||||||
looking for matching data that is no longer present on the filesystem.
|
|
||||||
|
|
||||||
* If the hash table is too small, bees extrapolates from matching
|
* If the hash table is too small, bees extrapolates from matching
|
||||||
blocks to find matching adjacent blocks in the filesystem that have been
|
blocks to find matching adjacent blocks in the filesystem that have been
|
||||||
@@ -36,6 +35,10 @@ one block in common between two extents in order to be able to dedupe
|
|||||||
the entire extents. This provides significantly more dedupe hit rate
|
the entire extents. This provides significantly more dedupe hit rate
|
||||||
per hash table byte than other dedupe tools.
|
per hash table byte than other dedupe tools.
|
||||||
|
|
||||||
|
* There is a fairly wide range of usable hash sizes, and performances
|
||||||
|
degrades according to a smooth probabilistic curve in both directions.
|
||||||
|
Double or half the optimium size usually works just as well.
|
||||||
|
|
||||||
* When counting unique data in compressed data blocks to estimate
|
* When counting unique data in compressed data blocks to estimate
|
||||||
optimum hash table size, count the *uncompressed* size of the data.
|
optimum hash table size, count the *uncompressed* size of the data.
|
||||||
|
|
||||||
@@ -52,25 +55,25 @@ patterns on dedupe effectiveness without performing deep inspection of
|
|||||||
both the filesystem data and its structure--a task that is as expensive
|
both the filesystem data and its structure--a task that is as expensive
|
||||||
as performing the deduplication.
|
as performing the deduplication.
|
||||||
|
|
||||||
* **Compression** on the filesystem reduces the average extent length
|
* **Compression** in files reduces the average extent length compared
|
||||||
compared to uncompressed filesystems. The maximum compressed extent
|
to uncompressed files. The maximum compressed extent length on
|
||||||
length on btrfs is 128KB, while the maximum uncompressed extent length
|
btrfs is 128KB, while the maximum uncompressed extent length is 128MB.
|
||||||
is 128MB. Longer extents decrease the optimum hash table size while
|
Longer extents decrease the optimum hash table size while shorter extents
|
||||||
shorter extents increase the optimum hash table size because the
|
increase the optimum hash table size, because the probability of a hash
|
||||||
probability of a hash table entry being present (i.e. unevicted) in
|
table entry being present (i.e. unevicted) in each extent is proportional
|
||||||
each extent is proportional to the extent length.
|
to the extent length.
|
||||||
|
|
||||||
As a rule of thumb, the optimal hash table size for a compressed
|
As a rule of thumb, the optimal hash table size for a compressed
|
||||||
filesystem is 2-4x larger than the optimal hash table size for the same
|
filesystem is 2-4x larger than the optimal hash table size for the same
|
||||||
data on an uncompressed filesystem. Dedupe efficiency falls dramatically
|
data on an uncompressed filesystem. Dedupe efficiency falls rapidly with
|
||||||
with hash tables smaller than 128MB/TB as the average dedupe extent size
|
hash tables smaller than 128MB/TB as the average dedupe extent size is
|
||||||
is larger than the largest possible compressed extent size (128KB).
|
larger than the largest possible compressed extent size (128KB).
|
||||||
|
|
||||||
* **Short writes** also shorten the average extent length and increase
|
* **Short writes or fragmentation** also shorten the average extent
|
||||||
optimum hash table size. If a database writes to files randomly using
|
length and increase optimum hash table size. If a database writes to
|
||||||
4K page writes, all of these extents will be 4K in length, and the hash
|
files randomly using 4K page writes, all of these extents will be 4K
|
||||||
table size must be increased to retain each one (or the user must accept
|
in length, and the hash table size must be increased to retain each one
|
||||||
a lower dedupe hit rate).
|
(or the user must accept a lower dedupe hit rate).
|
||||||
|
|
||||||
Defragmenting files that have had many short writes increases the
|
Defragmenting files that have had many short writes increases the
|
||||||
extent length and therefore reduces the optimum hash table size.
|
extent length and therefore reduces the optimum hash table size.
|
||||||
@@ -91,27 +94,70 @@ code files over and over, so it will need a smaller hash table than a
|
|||||||
backup server which has to refer to the oldest data on the filesystem
|
backup server which has to refer to the oldest data on the filesystem
|
||||||
every time a new client machine's data is added to the server.
|
every time a new client machine's data is added to the server.
|
||||||
|
|
||||||
Scanning modes for multiple subvols
|
Scanning modes
|
||||||
-----------------------------------
|
--------------
|
||||||
|
|
||||||
The `--scan-mode` option affects how bees schedules worker threads
|
The `--scan-mode` option affects how bees iterates over the filesystem,
|
||||||
between subvolumes. Scan modes are an experimental feature and will
|
schedules extents for scanning, and tracks progress.
|
||||||
likely be deprecated in favor of a better solution.
|
|
||||||
|
|
||||||
Scan mode can be changed at any time by restarting bees with a different
|
There are now two kinds of scan mode: the legacy **subvol** scan modes,
|
||||||
mode option. Scan state tracking is the same for all of the currently
|
and the new **extent** scan mode.
|
||||||
implemented modes. The difference between the modes is the order in
|
|
||||||
which subvols are selected.
|
|
||||||
|
|
||||||
If a filesystem has only one subvolume with data in it, then the
|
Scan mode can be changed by restarting bees with a different scan mode
|
||||||
`--scan-mode` option has no effect. In this case, there is only one
|
option.
|
||||||
subvolume to scan, so worker threads will all scan that one.
|
|
||||||
|
|
||||||
Within a subvol, there is a single optimal scan order: files are scanned
|
Extent scan mode:
|
||||||
in ascending numerical inode order. Each worker will scan a different
|
|
||||||
inode to avoid having the threads contend with each other for locks.
|
* Works with 4.15 and later kernels.
|
||||||
File data is read sequentially and in order, but old blocks from earlier
|
* Can estimate progress and provide an ETA.
|
||||||
scans are skipped.
|
* Can optimize scanning order to dedupe large extents first.
|
||||||
|
* Can keep up with frequent creation and deletion of snapshots.
|
||||||
|
|
||||||
|
Subvol scan modes:
|
||||||
|
|
||||||
|
* Work with 4.14 and earlier kernels.
|
||||||
|
* Cannot estimate or report progress.
|
||||||
|
* Cannot optimize scanning order by extent size.
|
||||||
|
* Have problems keeping up with multiple snapshots created during a scan.
|
||||||
|
|
||||||
|
The default scan mode is 4, "extent".
|
||||||
|
|
||||||
|
If you are using bees for the first time on a filesystem with many
|
||||||
|
existing snapshots, you should read about [snapshot gotchas](gotchas.md).
|
||||||
|
|
||||||
|
Subvol scan modes
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Subvol scan modes are maintained for compatibility with existing
|
||||||
|
installations, but will not be developed further. New installations
|
||||||
|
should use extent scan mode instead.
|
||||||
|
|
||||||
|
The _quantity_ of text below detailing the shortcomings of each subvol
|
||||||
|
scan mode should be informative all by itself.
|
||||||
|
|
||||||
|
Subvol scan modes work on any kernel version supported by bees. They
|
||||||
|
are the only scan modes usable on kernel 4.14 and earlier.
|
||||||
|
|
||||||
|
The difference between the subvol scan modes is the order in which the
|
||||||
|
files from different subvols are fed into the scanner. They all scan
|
||||||
|
files in inode number order, from low to high offset within each inode,
|
||||||
|
the same way that a program like `cat` would read files (but skipping
|
||||||
|
over old data from earlier btrfs transactions).
|
||||||
|
|
||||||
|
If a filesystem has only one subvolume with data in it, then all of
|
||||||
|
the subvol scan modes are equivalent. In this case, there is only one
|
||||||
|
subvolume to scan, so every possible ordering of subvols is the same.
|
||||||
|
|
||||||
|
The `--workaround-btrfs-send` option pauses scanning subvols that are
|
||||||
|
read-only. If the subvol is made read-write (e.g. with `btrfs prop set
|
||||||
|
$subvol ro false`), or if the `--workaround-btrfs-send` option is removed,
|
||||||
|
then the scan of that subvol is unpaused and dedupe proceeds normally.
|
||||||
|
Space will only be recovered when the last read-only subvol is deleted.
|
||||||
|
|
||||||
|
Subvol scan modes cannot efficiently or accurately calculate an ETA for
|
||||||
|
completion or estimate progress through the data. They simply request
|
||||||
|
"the next new inode" from btrfs, and they are completed when btrfs says
|
||||||
|
there is no next new inode.
|
||||||
|
|
||||||
Between subvols, there are several scheduling algorithms with different
|
Between subvols, there are several scheduling algorithms with different
|
||||||
trade-offs:
|
trade-offs:
|
||||||
@@ -119,68 +165,151 @@ trade-offs:
|
|||||||
Scan mode 0, "lockstep", scans the same inode number in each subvol at
|
Scan mode 0, "lockstep", scans the same inode number in each subvol at
|
||||||
close to the same time. This is useful if the subvols are snapshots
|
close to the same time. This is useful if the subvols are snapshots
|
||||||
with a common ancestor, since the same inode number in each subvol will
|
with a common ancestor, since the same inode number in each subvol will
|
||||||
have similar or identical contents. This maximizes the likelihood
|
have similar or identical contents. This maximizes the likelihood that
|
||||||
that all of the references to a snapshot of a file are scanned at
|
all of the references to a snapshot of a file are scanned at close to
|
||||||
close to the same time, improving dedupe hit rate and possibly taking
|
the same time, improving dedupe hit rate. If the subvols are unrelated
|
||||||
advantage of VFS caching in the Linux kernel. If the subvols are
|
(i.e. not snapshots of a single subvol) then this mode does not provide
|
||||||
unrelated (i.e. not snapshots of a single subvol) then this mode does
|
any significant advantage. This mode uses smaller amounts of temporary
|
||||||
not provide significant benefit over random selection. This mode uses
|
space for shorter periods of time when most subvols are snapshots. When a
|
||||||
smaller amounts of temporary space for shorter periods of time when most
|
new snapshot is created, this mode will stop scanning other subvols and
|
||||||
subvols are snapshots. When a new snapshot is created, this mode will
|
scan the new snapshot until the same inode number is reached in each
|
||||||
stop scanning other subvols and scan the new snapshot until the same
|
subvol, which will effectively stop dedupe temporarily as this data has
|
||||||
inode number is reached in each subvol, which will effectively stop
|
already been scanned and deduped in the other snapshots.
|
||||||
dedupe temporarily as this data has already been scanned and deduped
|
|
||||||
in the other snapshots.
|
|
||||||
|
|
||||||
Scan mode 1, "independent", scans the next inode with new data in each
|
Scan mode 1, "independent", scans the next inode with new data in
|
||||||
subvol. Each subvol's scanner shares inodes uniformly with all other
|
each subvol. There is no coordination between the subvols, other than
|
||||||
subvol scanners until the subvol has no new inodes left. This mode makes
|
round-robin distribution of files from each subvol to each worker thread.
|
||||||
continuous forward progress across the filesystem and provides average
|
This mode makes continuous forward progress in all subvols. When a new
|
||||||
performance across a variety of workloads, but is slow to respond to new
|
snapshot is created, previous subvol scans continue as before, but the
|
||||||
data, and may spend a lot of time deduping short-lived subvols that will
|
worker threads are now divided among one more subvol.
|
||||||
soon be deleted when it is preferable to dedupe long-lived subvols that
|
|
||||||
will be the origin of future snapshots. When a new snapshot is created,
|
|
||||||
previous subvol scans continue as before, but the time is now divided
|
|
||||||
among one more subvol.
|
|
||||||
|
|
||||||
Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
|
Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
|
||||||
ID order, processing each subvol completely before proceeding to the
|
ID order, processing each subvol completely before proceeding to the next
|
||||||
next subvol. This avoids spending time scanning short-lived snapshots
|
subvol. This avoids spending time scanning short-lived snapshots that
|
||||||
that will be deleted before they can be fully deduped (e.g. those used
|
will be deleted before they can be fully deduped (e.g. those used for
|
||||||
for `btrfs send`). Scanning is concentrated on older subvols that are
|
`btrfs send`). Scanning starts on older subvols that are more likely
|
||||||
more likely to be origin subvols for future snapshots, eliminating the
|
to be origin subvols for future snapshots, eliminating the need to
|
||||||
need to dedupe future snapshots separately. This mode uses the largest
|
dedupe future snapshots separately. This mode uses the largest amount
|
||||||
amount of temporary space for the longest time, and typically requires
|
of temporary space for the longest time, and typically requires a larger
|
||||||
a larger hash table to maintain dedupe hit rate.
|
hash table to maintain dedupe hit rate.
|
||||||
|
|
||||||
Scan mode 3, "recent", scans the subvols with the highest `min_transid`
|
Scan mode 3, "recent", scans the subvols with the highest `min_transid`
|
||||||
value first (i.e. the ones that were most recently completely scanned),
|
value first (i.e. the ones that were most recently completely scanned),
|
||||||
then falls back to "independent" mode to break ties. This interrupts
|
then falls back to "independent" mode to break ties. This interrupts
|
||||||
long scans of old subvols to give a rapid dedupe response to new data,
|
long scans of old subvols to give a rapid dedupe response to new data
|
||||||
then returns to the old subvols after the new data is scanned. It is
|
in previously scanned subvols, then returns to the old subvols after
|
||||||
useful for large filesystems with multiple active subvols and rotating
|
the new data is scanned.
|
||||||
snapshots, where the first-pass scan can take months, but new duplicate
|
|
||||||
data appears every day.
|
|
||||||
|
|
||||||
The default scan mode is 1, "independent".
|
Extent scan mode
|
||||||
|
----------------
|
||||||
|
|
||||||
If you are using bees for the first time on a filesystem with many
|
Scan mode 4, "extent", scans the extent tree instead of the subvol trees.
|
||||||
existing snapshots, you should read about [snapshot gotchas](gotchas.md).
|
Extent scan mode reads each extent once, regardless of the number of
|
||||||
|
reflinks or snapshots. It adapts to the creation of new snapshots
|
||||||
|
and reflinks immediately, without having to revisit old data.
|
||||||
|
|
||||||
|
In the extent scan mode, extents are separated into multiple size tiers
|
||||||
|
to prioritize large extents over small ones. Deduping large extents
|
||||||
|
keeps the metadata update cost low per block saved, resulting in faster
|
||||||
|
dedupe at the start of a scan cycle. This is important for maximizing
|
||||||
|
performance in use cases where bees runs for a limited time, such as
|
||||||
|
during an overnight maintenance window.
|
||||||
|
|
||||||
|
Once the larger size tiers are completed, dedupe space recovery speeds
|
||||||
|
slow down significantly. It may be desirable to stop bees running once
|
||||||
|
the larger size tiers are finished, then start bees running some time
|
||||||
|
later after new data has appeared.
|
||||||
|
|
||||||
|
Each extent is mapped in physical address order, and all extent references
|
||||||
|
are submitted to the scanner at the same time, resulting in much better
|
||||||
|
cache behavior and dedupe performance compared to the subvol scan modes.
|
||||||
|
|
||||||
|
The "extent" scan mode is not usable on kernels before 4.15 because
|
||||||
|
it relies on the `LOGICAL_INO_V2` ioctl added in that kernel release.
|
||||||
|
When using bees with an older kernel, only subvol scan modes will work.
|
||||||
|
|
||||||
|
Extents are divided into virtual subvols by size, using reserved btrfs
|
||||||
|
subvol IDs 250..255. The size tier groups are:
|
||||||
|
* 250: 32M+1 and larger
|
||||||
|
* 251: 8M+1..32M
|
||||||
|
* 252: 2M+1..8M
|
||||||
|
* 253: 512K+1..2M
|
||||||
|
* 254: 128K+1..512K
|
||||||
|
* 255: 128K and smaller (includes all compressed extents)
|
||||||
|
|
||||||
|
Extent scan mode can efficiently calculate dedupe progress within
|
||||||
|
the filesystem and estimate an ETA for completion within each size
|
||||||
|
tier; however, the accuracy of the ETA can be questionable due to the
|
||||||
|
non-uniform distribution of block addresses in a typical user filesystem.
|
||||||
|
|
||||||
|
Older versions of bees do not recognize the virtual subvols, so running
|
||||||
|
an old bees version after running a new bees version will reset the
|
||||||
|
"extent" scan mode's progress in `beescrawl.dat` to the beginning.
|
||||||
|
This may change in future bees releases, i.e. extent scans will store
|
||||||
|
their checkpoint data somewhere else.
|
||||||
|
|
||||||
|
The `--workaround-btrfs-send` option behaves differently in extent
|
||||||
|
scan modes: In extent scan mode, dedupe proceeds on all subvols that are
|
||||||
|
read-write, but all subvols that are read-only are excluded from dedupe.
|
||||||
|
Space will only be recovered when the last read-only subvol is deleted.
|
||||||
|
|
||||||
|
During `btrfs send` all duplicate extents in the sent subvol will not be
|
||||||
|
removed (the kernel will reject dedupe commands while send is active,
|
||||||
|
and bees currently will not re-issue them after the send is complete).
|
||||||
|
It may be preferable to terminate the bees process while running `btrfs
|
||||||
|
send` in extent scan mode, and restart bees after the `send` is complete.
|
||||||
|
|
||||||
Threads and load management
|
Threads and load management
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
By default, bees creates one worker thread for each CPU detected.
|
By default, bees creates one worker thread for each CPU detected. These
|
||||||
These threads then perform scanning and dedupe operations. The number of
|
threads then perform scanning and dedupe operations. bees attempts to
|
||||||
worker threads can be set with the [`--thread-count` and `--thread-factor`
|
maximize the amount of productive work each thread does, until either the
|
||||||
options](options.md).
|
threads are all continuously busy, or there is no remaining work to do.
|
||||||
|
|
||||||
If desired, bees can automatically increase or decrease the number
|
In many cases it is not desirable to continually run bees at maximum
|
||||||
of worker threads in response to system load. This reduces impact on
|
performance. Maximum performance is not necessary if bees can dedupe
|
||||||
the rest of the system by pausing bees when other CPU and IO intensive
|
new data faster than it appears on the filesystem. If it only takes
|
||||||
loads are active on the system, and resumes bees when the other loads
|
bees 10 minutes per day to dedupe all new data on a filesystem, then
|
||||||
are inactive. This is configured with the [`--loadavg-target` and
|
bees doesn't need to run for more than 10 minutes per day.
|
||||||
`--thread-min` options](options.md).
|
|
||||||
|
bees supports a number of options for reducing system load:
|
||||||
|
|
||||||
|
* Run bees for a few hours per day, at an off-peak time (i.e. during
|
||||||
|
a maintenace window), instead of running bees continuously. Any data
|
||||||
|
added to the filesystem while bees is not running will be scanned when
|
||||||
|
bees restarts. At the end of the maintenance window, terminate the
|
||||||
|
bees process with SIGTERM to write the hash table and scan position
|
||||||
|
for the next maintenance window.
|
||||||
|
|
||||||
|
* Temporarily pause bees operation by sending the bees process SIGUSR1,
|
||||||
|
and resume operation with SIGUSR2. This is preferable to freezing
|
||||||
|
and thawing the process, e.g. with freezer cgroups or SIGSTOP/SIGCONT
|
||||||
|
signals, because it allows bees to close open file handles that would
|
||||||
|
otherwise prevent those files from being deleted while bees is frozen.
|
||||||
|
|
||||||
|
* Reduce the number of worker threads with the [`--thread-count` or
|
||||||
|
`--thread-factor` options](options.md). This simply leaves CPU cores
|
||||||
|
idle so that other applications on the host can use them, or to save
|
||||||
|
power.
|
||||||
|
|
||||||
|
* Allow bees to automatically track system load and increase or decrease
|
||||||
|
the number of threads to reach a target system load. This reduces
|
||||||
|
impact on the rest of the system by pausing bees when other CPU and IO
|
||||||
|
intensive loads are active on the system, and resumes bees when the other
|
||||||
|
loads are inactive. This is configured with the [`--loadavg-target`
|
||||||
|
and `--thread-min` options](options.md).
|
||||||
|
|
||||||
|
* Allow bees to self-throttle operations that enqueue delayed work
|
||||||
|
within btrfs. These operations are not well controlled by Linux
|
||||||
|
features such as process priority or IO priority or IO rate-limiting,
|
||||||
|
because the enqueued work is submitted to btrfs several seconds before
|
||||||
|
btrfs performs the work. By the time btrfs performs the work, it's too
|
||||||
|
late for external throttling to be effective. The [`--throttle-factor`
|
||||||
|
option](options.md) tracks how long it takes btrfs to complete queued
|
||||||
|
operations, and reduces bees's queued work submission rate to match
|
||||||
|
btrfs's queued work completion rate (or a fraction thereof, to reduce
|
||||||
|
system load).
|
||||||
|
|
||||||
Log verbosity
|
Log verbosity
|
||||||
-------------
|
-------------
|
||||||
|
@@ -120,10 +120,14 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
|||||||
|
|
||||||
* `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
|
* `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
|
||||||
* `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
|
* `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
|
||||||
* `crawl_create`: A new subvol crawler was created.
|
* `crawl_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
|
||||||
* `crawl_done`: One pass over all subvols on the filesystem was completed.
|
* `crawl_done`: One pass over a subvol was completed.
|
||||||
|
* `crawl_discard_high`: An extent that was too large for the crawler's size tier was discarded.
|
||||||
|
* `crawl_discard_low`: An extent that was too small for the crawler's size tier was discarded.
|
||||||
* `crawl_empty`: A `TREE_SEARCH_V2` ioctl call failed or returned an empty set (usually because all data in the subvol was scanned).
|
* `crawl_empty`: A `TREE_SEARCH_V2` ioctl call failed or returned an empty set (usually because all data in the subvol was scanned).
|
||||||
|
* `crawl_extent`: The extent crawler queued all references to an extent for processing.
|
||||||
* `crawl_fail`: A `TREE_SEARCH_V2` ioctl call failed.
|
* `crawl_fail`: A `TREE_SEARCH_V2` ioctl call failed.
|
||||||
|
* `crawl_flop`: Small extent items were not skipped because the next extent started at or before the end of the previous extent.
|
||||||
* `crawl_gen_high`: An extent item in the search results refers to an extent that is newer than the current crawl's `max_transid` allows.
|
* `crawl_gen_high`: An extent item in the search results refers to an extent that is newer than the current crawl's `max_transid` allows.
|
||||||
* `crawl_gen_low`: An extent item in the search results refers to an extent that is older than the current crawl's `min_transid` allows.
|
* `crawl_gen_low`: An extent item in the search results refers to an extent that is older than the current crawl's `min_transid` allows.
|
||||||
* `crawl_hole`: An extent item in the search results refers to a hole.
|
* `crawl_hole`: An extent item in the search results refers to a hole.
|
||||||
@@ -135,8 +139,13 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
|||||||
* `crawl_prealloc`: An extent item in the search results refers to a `PREALLOC` extent.
|
* `crawl_prealloc`: An extent item in the search results refers to a `PREALLOC` extent.
|
||||||
* `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
|
* `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
|
||||||
* `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
|
* `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
|
||||||
|
* `crawl_skip`: Small extent items were skipped because no extent of sufficient size was found within the minimum search distance.
|
||||||
|
* `crawl_skip_ms`: Time spent skipping small extent items.
|
||||||
* `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
|
* `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
|
||||||
|
* `crawl_throttled`: Extent scan created too many work queue items and was prevented from creating any more.
|
||||||
|
* `crawl_tree_block`: Extent scan found and skipped a metadata tree block.
|
||||||
* `crawl_unknown`: An extent item in the search results has an unrecognized type.
|
* `crawl_unknown`: An extent item in the search results has an unrecognized type.
|
||||||
|
* `crawl_unthrottled`: Extent scan allowed to create work queue items again.
|
||||||
|
|
||||||
dedup
|
dedup
|
||||||
-----
|
-----
|
||||||
@@ -162,6 +171,25 @@ The `exception` event group consists of C++ exceptions. C++ exceptions are thro
|
|||||||
* `exception_caught`: Total number of C++ exceptions thrown and caught by a generic exception handler.
|
* `exception_caught`: Total number of C++ exceptions thrown and caught by a generic exception handler.
|
||||||
* `exception_caught_silent`: Total number of "silent" C++ exceptions thrown and caught by a generic exception handler. These are exceptions which are part of the correct and normal operation of bees. The exceptions are logged at a lower log level.
|
* `exception_caught_silent`: Total number of "silent" C++ exceptions thrown and caught by a generic exception handler. These are exceptions which are part of the correct and normal operation of bees. The exceptions are logged at a lower log level.
|
||||||
|
|
||||||
|
extent
|
||||||
|
------
|
||||||
|
|
||||||
|
The `extent` event group consists of events that occur within the extent scanner.
|
||||||
|
|
||||||
|
* `extent_deferred_inode`: A lock conflict was detected when two worker threads attempted to manipulate the same inode at the same time.
|
||||||
|
* `extent_empty`: A complete list of references to an extent was created but the list was empty, e.g. because all refs are in deleted inodes or snapshots.
|
||||||
|
* `extent_fail`: An ioctl call to `LOGICAL_INO` failed.
|
||||||
|
* `extent_forward`: An extent reference was submitted for scanning.
|
||||||
|
* `extent_mapped`: A complete map of references to an extent was created and added to the crawl queue.
|
||||||
|
* `extent_ok`: An ioctl call to `LOGICAL_INO` completed successfully.
|
||||||
|
* `extent_overflow`: A complete map of references to an extent exceeded `BEES_MAX_EXTENT_REF_COUNT`, so the extent was dropped.
|
||||||
|
* `extent_ref_missing`: An extent reference reported by `LOGICAL_INO` was not found by later `TREE_SEARCH_V2` calls.
|
||||||
|
* `extent_ref_ok`: One extent reference was queued for scanning.
|
||||||
|
* `extent_restart`: An extent reference was requeued to be scanned again after an active extent lock is released.
|
||||||
|
* `extent_retry`: An extent reference was requeued to be scanned again after an active inode lock is released.
|
||||||
|
* `extent_skip`: A 4K extent with more than 1000 refs was skipped.
|
||||||
|
* `extent_zero`: An ioctl call to `LOGICAL_INO` succeeded, but reported an empty list of extents.
|
||||||
|
|
||||||
hash
|
hash
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -180,24 +208,6 @@ The `hash` event group consists of operations related to the bees hash table.
|
|||||||
* `hash_insert`: A `(hash, address)` pair was inserted by `BeesHashTable::push_random_hash_addr`.
|
* `hash_insert`: A `(hash, address)` pair was inserted by `BeesHashTable::push_random_hash_addr`.
|
||||||
* `hash_lookup`: The hash table was searched for `(hash, address)` pairs matching a given `hash`.
|
* `hash_lookup`: The hash table was searched for `(hash, address)` pairs matching a given `hash`.
|
||||||
|
|
||||||
inserted
|
|
||||||
--------
|
|
||||||
|
|
||||||
The `inserted` event group consists of operations related to storing hash and address data in the hash table (i.e. the hash table client).
|
|
||||||
|
|
||||||
* `inserted_block`: Total number of data block references scanned and inserted into the hash table.
|
|
||||||
* `inserted_clobbered`: Total number of data block references scanned and eliminated from the filesystem.
|
|
||||||
|
|
||||||
matched
|
|
||||||
-------
|
|
||||||
|
|
||||||
The `matched` event group consists of events related to matching incoming data blocks against existing hash table entries.
|
|
||||||
|
|
||||||
* `matched_0`: A data block was scanned, hash table entries found, but no matching data blocks on the filesytem located.
|
|
||||||
* `matched_1_or_more`: A data block was scanned, hash table entries found, and one or more matching data blocks on the filesystem located.
|
|
||||||
* `matched_2_or_more`: A data block was scanned, hash table entries found, and two or more matching data blocks on the filesystem located.
|
|
||||||
* `matched_3_or_more`: A data block was scanned, hash table entries found, and three or more matching data blocks on the filesystem located.
|
|
||||||
|
|
||||||
open
|
open
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -259,12 +269,29 @@ The `pairforward` event group consists of events related to extending matching b
|
|||||||
* `pairforward_try`: Started extending a pair of matching block ranges forward.
|
* `pairforward_try`: Started extending a pair of matching block ranges forward.
|
||||||
* `pairforward_zero`: A pair of matching block ranges could not be extended backward by one block because the src block contained all zeros and was not compressed.
|
* `pairforward_zero`: A pair of matching block ranges could not be extended backward by one block because the src block contained all zeros and was not compressed.
|
||||||
|
|
||||||
|
progress
|
||||||
|
--------
|
||||||
|
|
||||||
|
The `progress` event group consists of events related to progress estimation.
|
||||||
|
|
||||||
|
* `progress_no_data_bg`: Failed to retrieve any data block groups from the filesystem.
|
||||||
|
* `progress_not_created`: A crawler for one size tier had not been created for the extent scanner.
|
||||||
|
* `progress_complete`: A crawler for one size tier has completed a scan.
|
||||||
|
* `progress_not_found`: The extent position for a crawler does not correspond to any block group.
|
||||||
|
* `progress_out_of_bg`: The extent position for a crawler does not correspond to any data block group.
|
||||||
|
* `progress_ok`: Table of progress and ETA created successfully.
|
||||||
|
|
||||||
readahead
|
readahead
|
||||||
---------
|
---------
|
||||||
|
|
||||||
The `readahead` event group consists of events related to calls to `posix_fadvise`.
|
The `readahead` event group consists of events related to data prefetching (formerly calls to `posix_fadvise` or `readahead`, but now emulated in userspace).
|
||||||
|
|
||||||
* `readahead_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_WILLNEED)` aka `readahead()`.
|
* `readahead_bytes`: Number of bytes prefetched.
|
||||||
|
* `readahead_count`: Number of read calls.
|
||||||
|
* `readahead_clear`: Number of times the duplicate read cache was cleared.
|
||||||
|
* `readahead_fail`: Number of read errors during prefetch.
|
||||||
|
* `readahead_ms`: Total time spent emulating readahead in user-space (kernel readahead is not measured).
|
||||||
|
* `readahead_skip`: Number of times a duplicate read was identified in the cache and skipped.
|
||||||
* `readahead_unread_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_DONTNEED)`.
|
* `readahead_unread_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_DONTNEED)`.
|
||||||
|
|
||||||
replacedst
|
replacedst
|
||||||
@@ -296,11 +323,12 @@ resolve
|
|||||||
|
|
||||||
The `resolve` event group consists of operations related to translating a btrfs virtual block address (i.e. physical block address) to a `(root, inode, offset)` tuple (i.e. locating and opening the file containing a matching block). `resolve` is the top level, `chase` and `adjust` are the lower two levels.
|
The `resolve` event group consists of operations related to translating a btrfs virtual block address (i.e. physical block address) to a `(root, inode, offset)` tuple (i.e. locating and opening the file containing a matching block). `resolve` is the top level, `chase` and `adjust` are the lower two levels.
|
||||||
|
|
||||||
|
* `resolve_empty`: The `LOGICAL_INO` ioctl returned successfully with an empty reference list (0 items).
|
||||||
* `resolve_fail`: The `LOGICAL_INO` ioctl returned an error.
|
* `resolve_fail`: The `LOGICAL_INO` ioctl returned an error.
|
||||||
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
||||||
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
||||||
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
||||||
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned more than 655050 extents (the limit of the v2 ioctl).
|
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned 9999 or more extents (the limit configured in `bees.h`).
|
||||||
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
||||||
|
|
||||||
root
|
root
|
||||||
@@ -328,35 +356,38 @@ The `scan` event group consists of operations related to scanning incoming data.
|
|||||||
|
|
||||||
* `scan_blacklisted`: A blacklisted extent was passed to `scan_forward` and dropped.
|
* `scan_blacklisted`: A blacklisted extent was passed to `scan_forward` and dropped.
|
||||||
* `scan_block`: A block of data was scanned.
|
* `scan_block`: A block of data was scanned.
|
||||||
* `scan_bump`: After deduping a block range, the scan pointer had to be moved past the end of the deduped byte range.
|
* `scan_compressed_no_dedup`: An extent that was compressed contained non-zero, non-duplicate data.
|
||||||
* `scan_dup_block`: Number of duplicate blocks deduped.
|
* `scan_dup_block`: Number of duplicate block references deduped.
|
||||||
* `scan_dup_hit`: A pair of duplicate block ranges was found and removed.
|
* `scan_dup_hit`: A pair of duplicate block ranges was found.
|
||||||
* `scan_dup_miss`: A pair of duplicate blocks was found in the hash table but not in the filesystem.
|
* `scan_dup_miss`: A pair of duplicate blocks was found in the hash table but not in the filesystem.
|
||||||
* `scan_eof`: Scan past EOF was attempted.
|
|
||||||
* `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
|
|
||||||
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
||||||
* `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned. No action is currently taken for these--they are merely counted.
|
|
||||||
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
||||||
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
||||||
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
||||||
* `scan_hash_miss`: A block was not found on the filesystem corresponding to a block found in the hash table.
|
* `scan_hash_miss`: A block was not found on the filesystem corresponding to a block found in the hash table.
|
||||||
* `scan_hash_preinsert`: A block was prepared for insertion into the hash table.
|
* `scan_hash_preinsert`: A non-zero data block's hash was prepared for possible insertion into the hash table.
|
||||||
|
* `scan_hash_insert`: A non-zero data block's hash was inserted into the hash table.
|
||||||
* `scan_hole`: A hole extent was found during scan and ignored.
|
* `scan_hole`: A hole extent was found during scan and ignored.
|
||||||
* `scan_interesting`: An extent had flags that were not recognized by bees and was ignored.
|
* `scan_interesting`: An extent had flags that were not recognized by bees and was ignored.
|
||||||
* `scan_lookup`: A hash was looked up in the hash table.
|
* `scan_lookup`: A hash was looked up in the hash table.
|
||||||
* `scan_malign`: A block being scanned matched a hash at EOF in the hash table, but the EOF was not aligned to a block boundary and the two blocks did not have the same length.
|
* `scan_malign`: A block being scanned matched a hash at EOF in the hash table, but the EOF was not aligned to a block boundary and the two blocks did not have the same length.
|
||||||
* `scan_no_fd`: References to a block from the hash table were found, but a FD could not be opened.
|
|
||||||
* `scan_no_rewrite`: All blocks in an extent were removed by dedupe (i.e. no copies).
|
|
||||||
* `scan_push_front`: An entry in the hash table matched a duplicate block, so the entry was moved to the head of its LRU list.
|
* `scan_push_front`: An entry in the hash table matched a duplicate block, so the entry was moved to the head of its LRU list.
|
||||||
* `scan_reinsert`: A copied block's hash and block address was inserted into the hash table.
|
* `scan_reinsert`: A copied block's hash and block address was inserted into the hash table.
|
||||||
* `scan_resolve_hit`: A block address in the hash table was successfully resolved to an open FD and offset pair.
|
* `scan_resolve_hit`: A block address in the hash table was successfully resolved to an open FD and offset pair.
|
||||||
* `scan_resolve_zero`: A block address in the hash table was not resolved to any subvol/inode pair, so the corresponding hash table entry was removed.
|
* `scan_resolve_zero`: A block address in the hash table was not resolved to any subvol/inode pair, so the corresponding hash table entry was removed.
|
||||||
* `scan_rewrite`: A range of bytes in a file was copied, then the copy deduped over the original data.
|
* `scan_rewrite`: A range of bytes in a file was copied, then the copy deduped over the original data.
|
||||||
|
* `scan_root_dead`: A deleted subvol was detected.
|
||||||
|
* `scan_seen_clear`: The list of recently scanned extents reached maximum size and was cleared.
|
||||||
|
* `scan_seen_erase`: An extent reference was modified by scan, so all future references to the extent must be scanned.
|
||||||
|
* `scan_seen_hit`: A scan was skipped because the same extent had recently been scanned.
|
||||||
|
* `scan_seen_insert`: An extent reference was not modified by scan and its hashes have been inserted into the hash table, so all future references to the extent can be ignored.
|
||||||
|
* `scan_seen_miss`: A scan was not skipped because the same extent had not recently been scanned (i.e. the extent was scanned normally).
|
||||||
|
* `scan_skip_bytes`: Nuisance dedupe or hole-punching would save less than half of the data in an extent.
|
||||||
|
* `scan_skip_ops`: Nuisance dedupe or hole-punching would require too many dedupe/copy/hole-punch operations in an extent.
|
||||||
* `scan_toxic_hash`: A scanned block has the same hash as a hash table entry that is marked toxic.
|
* `scan_toxic_hash`: A scanned block has the same hash as a hash table entry that is marked toxic.
|
||||||
* `scan_toxic_match`: A hash table entry points to a block that is discovered to be toxic.
|
* `scan_toxic_match`: A hash table entry points to a block that is discovered to be toxic.
|
||||||
* `scan_twice`: Two references to the same block have been found in the hash table.
|
* `scan_twice`: Two references to the same block have been found in the hash table.
|
||||||
* `scan_zero_compressed`: An extent that was compressed and contained only zero bytes was found.
|
* `scan_zero`: A data block containing only zero bytes was detected.
|
||||||
* `scan_zero_uncompressed`: A block that contained only zero bytes was found in an uncompressed extent.
|
|
||||||
|
|
||||||
scanf
|
scanf
|
||||||
-----
|
-----
|
||||||
@@ -364,9 +395,10 @@ scanf
|
|||||||
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
|
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
|
||||||
|
|
||||||
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
|
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
|
||||||
* `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
|
* `scanf_eof`: Scan past EOF was attempted.
|
||||||
* `scanf_extent`: A btrfs extent item was scanned.
|
* `scanf_extent`: A btrfs extent item was scanned.
|
||||||
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
|
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
|
||||||
|
* `scanf_no_fd`: References to a block from the hash table were found, but a FD could not be opened.
|
||||||
* `scanf_total`: A logical byte range of a file was scanned.
|
* `scanf_total`: A logical byte range of a file was scanned.
|
||||||
* `scanf_total_ms`: Total thread-seconds spent scanning logical byte ranges.
|
* `scanf_total_ms`: Total thread-seconds spent scanning logical byte ranges.
|
||||||
|
|
||||||
|
@@ -51,81 +51,40 @@ loops early. The exception text in this case is:
|
|||||||
Terminating bees with SIGTERM
|
Terminating bees with SIGTERM
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
bees is designed to survive host crashes, so it is safe to terminate
|
bees is designed to survive host crashes, so it is safe to terminate bees
|
||||||
bees using SIGKILL; however, when bees next starts up, it will repeat
|
using SIGKILL; however, when bees next starts up, it will repeat some
|
||||||
some work that was performed between the last bees crawl state save point
|
work that was performed between the last bees crawl state save point
|
||||||
and the SIGKILL (up to 15 minutes). If bees is stopped and started less
|
and the SIGKILL (up to 15 minutes), and a large hash table may not be
|
||||||
than once per day, then this is not a problem as the proportional impact
|
completely written back to disk, so some duplicate matches will be lost.
|
||||||
is quite small; however, users who stop and start bees daily or even
|
|
||||||
more often may prefer to have a clean shutdown with SIGTERM so bees can
|
|
||||||
restart faster.
|
|
||||||
|
|
||||||
bees handling of SIGTERM can take a long time on machines with some or
|
If bees is stopped and started less than once per week, then this is not
|
||||||
all of:
|
a problem as the proportional impact is quite small; however, users who
|
||||||
|
stop and start bees daily or even more often may prefer to have a clean
|
||||||
|
shutdown with SIGTERM so bees can restart faster.
|
||||||
|
|
||||||
* Large RAM and `vm.dirty_ratio`
|
The shutdown procedure performs these steps:
|
||||||
* Large number of active bees worker threads
|
|
||||||
* Large number of bees temporary files (proportional to thread count)
|
|
||||||
* Large hash table size
|
|
||||||
* Large filesystem size
|
|
||||||
* High IO latency, especially "low power" spinning disks
|
|
||||||
* High filesystem activity, especially duplicate data writes
|
|
||||||
|
|
||||||
Each of these factors individually increases the total time required
|
1. Crawl state is saved to `$BEESHOME`. This is the most
|
||||||
to perform a clean bees shutdown. When combined, the factors can
|
|
||||||
multiply with each other, dramatically increasing the time required to
|
|
||||||
flush bees state to disk.
|
|
||||||
|
|
||||||
On a large system with many of the above factors present, a "clean"
|
|
||||||
bees shutdown can take more than 20 minutes. Even a small machine
|
|
||||||
(16GB RAM, 1GB hash table, 1TB NVME disk) can take several seconds to
|
|
||||||
complete a SIGTERM shutdown.
|
|
||||||
|
|
||||||
The shutdown procedure performs potentially long-running tasks in
|
|
||||||
this order:
|
|
||||||
|
|
||||||
1. Worker threads finish executing their current Task and exit.
|
|
||||||
Threads executing `LOGICAL_INO` ioctl calls usually finish quickly,
|
|
||||||
but btrfs imposes no limit on the ioctl's running time, so it
|
|
||||||
can take several minutes in rare bad cases. If there is a btrfs
|
|
||||||
commit already in progress on the filesystem, then most worker
|
|
||||||
threads will be blocked until the btrfs commit is finished.
|
|
||||||
|
|
||||||
2. Crawl state is saved to `$BEESHOME`. This normally completes
|
|
||||||
relatively quickly (a few seconds at most). This is the most
|
|
||||||
important bees state to save to disk as it directly impacts
|
important bees state to save to disk as it directly impacts
|
||||||
restart time, so it is done as early as possible (but no earlier).
|
restart time, so it is done as early as possible
|
||||||
|
|
||||||
3. Hash table is written to disk. Normally the hash table is
|
2. Hash table is written to disk. Normally the hash table is
|
||||||
trickled back to disk at a rate of about 2GB per hour;
|
trickled back to disk at a rate of about 128KiB per second;
|
||||||
however, SIGTERM causes bees to attempt to flush the whole table
|
however, SIGTERM causes bees to attempt to flush the whole table
|
||||||
immediately. If bees has recently been idle then the hash table is
|
immediately. The time spent here depends on the size of RAM, speed
|
||||||
likely already flushed to disk, so this step will finish quickly;
|
of disks, and aggressiveness of competing filesystem workloads.
|
||||||
however, if bees has recently been active and the hash table is
|
It can trigger `vm.dirty_bytes` limits and block other processes
|
||||||
large relative to RAM size, the blast of rapidly written data
|
writing to the filesystem for a while.
|
||||||
can force the Linux VFS to block all writes to the filesystem
|
|
||||||
for sufficient time to complete all pending btrfs metadata
|
|
||||||
writes which accumulated during the btrfs commit before bees
|
|
||||||
received SIGTERM...and _then_ let bees write out the hash table.
|
|
||||||
The time spent here depends on the size of RAM, speed of disks,
|
|
||||||
and aggressiveness of competing filesystem workloads.
|
|
||||||
|
|
||||||
4. bees temporary files are closed, which implies deletion of their
|
3. The bees process calls `_exit`, which terminates all running
|
||||||
inodes. These are files which consist entirely of shared extent
|
worker threads, closes and deletes all temporary files. This
|
||||||
structures, and btrfs takes an unusually long time to delete such
|
can take a while _after_ the bees process exits, especially on
|
||||||
files (up to a few minutes for each on slow spinning disks).
|
slow spinning disks.
|
||||||
|
|
||||||
If bees is terminated with SIGKILL, only step #1 and #4 are performed (the
|
|
||||||
kernel performs these automatically if bees exits). This reduces the
|
|
||||||
shutdown time at the cost of increased startup time.
|
|
||||||
|
|
||||||
Balances
|
Balances
|
||||||
--------
|
--------
|
||||||
|
|
||||||
First, read [`LOGICAL_INO` and btrfs balance WARNING](btrfs-kernel.md).
|
|
||||||
bees will suspend operations during a btrfs balance to work around
|
|
||||||
kernel bugs.
|
|
||||||
|
|
||||||
A btrfs balance relocates data on disk by making a new copy of the
|
A btrfs balance relocates data on disk by making a new copy of the
|
||||||
data, replacing all references to the old data with references to the
|
data, replacing all references to the old data with references to the
|
||||||
new copy, and deleting the old copy. To bees, this is the same as any
|
new copy, and deleting the old copy. To bees, this is the same as any
|
||||||
@@ -175,7 +134,9 @@ the beginning.
|
|||||||
|
|
||||||
Each time bees dedupes an extent that is referenced by a snapshot,
|
Each time bees dedupes an extent that is referenced by a snapshot,
|
||||||
the entire metadata page in the snapshot subvol (16KB by default) must
|
the entire metadata page in the snapshot subvol (16KB by default) must
|
||||||
be CoWed in btrfs. This can result in a substantial increase in btrfs
|
be CoWed in btrfs. Since all references must be removed at the same
|
||||||
|
time, this CoW operation is repeated in every snapshot containing the
|
||||||
|
duplicate data. This can result in a substantial increase in btrfs
|
||||||
metadata size if there are many snapshots on a filesystem.
|
metadata size if there are many snapshots on a filesystem.
|
||||||
|
|
||||||
Normally, metadata is small (less than 1% of the filesystem) and dedupe
|
Normally, metadata is small (less than 1% of the filesystem) and dedupe
|
||||||
@@ -244,7 +205,7 @@ Other Gotchas
|
|||||||
|
|
||||||
* bees avoids the [slow backrefs kernel bug](btrfs-kernel.md) by
|
* bees avoids the [slow backrefs kernel bug](btrfs-kernel.md) by
|
||||||
measuring the time required to perform `LOGICAL_INO` operations.
|
measuring the time required to perform `LOGICAL_INO` operations.
|
||||||
If an extent requires over 0.1 kernel CPU seconds to perform a
|
If an extent requires over 5.0 kernel CPU seconds to perform a
|
||||||
`LOGICAL_INO` ioctl, then bees blacklists the extent and avoids
|
`LOGICAL_INO` ioctl, then bees blacklists the extent and avoids
|
||||||
referencing it in future operations. In most cases, fewer than 0.1%
|
referencing it in future operations. In most cases, fewer than 0.1%
|
||||||
of extents in a filesystem must be avoided this way. This results
|
of extents in a filesystem must be avoided this way. This results
|
||||||
@@ -252,17 +213,18 @@ Other Gotchas
|
|||||||
filesystem while `LOGICAL_INO` is running. Generally the CPU spends
|
filesystem while `LOGICAL_INO` is running. Generally the CPU spends
|
||||||
most of the runtime of the `LOGICAL_INO` ioctl running the kernel,
|
most of the runtime of the `LOGICAL_INO` ioctl running the kernel,
|
||||||
so on a single-core CPU the entire system can freeze up for a second
|
so on a single-core CPU the entire system can freeze up for a second
|
||||||
during operations on toxic extents.
|
during operations on toxic extents. Note this only occurs on older
|
||||||
|
kernels. See [the slow backrefs kernel bug section](btrfs-kernel.md).
|
||||||
|
|
||||||
* If a process holds a directory FD open, the subvol containing the
|
* If a process holds a directory FD open, the subvol containing the
|
||||||
directory cannot be deleted (`btrfs sub del` will start the deletion
|
directory cannot be deleted (`btrfs sub del` will start the deletion
|
||||||
process, but it will not proceed past the first open directory FD).
|
process, but it will not proceed past the first open directory FD).
|
||||||
`btrfs-cleaner` will simply skip over the directory *and all of its
|
`btrfs-cleaner` will simply skip over the directory *and all of its
|
||||||
children* until the FD is closed. bees avoids this gotcha by closing
|
children* until the FD is closed. bees avoids this gotcha by closing
|
||||||
all of the FDs in its directory FD cache every 10 btrfs transactions.
|
all of the FDs in its directory FD cache every btrfs transaction.
|
||||||
|
|
||||||
* If a file is deleted while bees is caching an open FD to the file,
|
* If a file is deleted while bees is caching an open FD to the file,
|
||||||
bees continues to scan the file. For very large files (e.g. VM
|
bees continues to scan the file. For very large files (e.g. VM
|
||||||
images), the deletion of the file can be delayed indefinitely.
|
images), the deletion of the file can be delayed indefinitely.
|
||||||
To limit this delay, bees closes all FDs in its file FD cache every
|
To limit this delay, bees closes all FDs in its file FD cache every
|
||||||
10 btrfs transactions.
|
btrfs transaction.
|
||||||
|
@@ -8,10 +8,12 @@ bees uses checkpoints for persistence to eliminate the IO overhead of a
|
|||||||
transactional data store. On restart, bees will dedupe any data that
|
transactional data store. On restart, bees will dedupe any data that
|
||||||
was added to the filesystem since the last checkpoint. Checkpoints
|
was added to the filesystem since the last checkpoint. Checkpoints
|
||||||
occur every 15 minutes for scan progress, stored in `beescrawl.dat`.
|
occur every 15 minutes for scan progress, stored in `beescrawl.dat`.
|
||||||
The hash table trickle-writes to disk at 4GB/hour to `beeshash.dat`.
|
The hash table trickle-writes to disk at 128KiB/s to `beeshash.dat`,
|
||||||
An hourly performance report is written to `beesstats.txt`. There are
|
but will flush immediately if bees is terminated by SIGTERM.
|
||||||
no special requirements for bees hash table storage--`.beeshome` could
|
|
||||||
be stored on a different btrfs filesystem, ext4, or even CIFS.
|
There are no special requirements for bees hash table storage--`.beeshome`
|
||||||
|
could be stored on a different btrfs filesystem, ext4, or even CIFS (but
|
||||||
|
not MS-DOS--beeshome does need filenames longer than 8.3).
|
||||||
|
|
||||||
bees uses a persistent dedupe hash table with a fixed size configured
|
bees uses a persistent dedupe hash table with a fixed size configured
|
||||||
by the user. Any size of hash table can be dedicated to dedupe. If a
|
by the user. Any size of hash table can be dedicated to dedupe. If a
|
||||||
@@ -20,7 +22,7 @@ small as 128KB.
|
|||||||
|
|
||||||
The bees hash table is loaded into RAM at startup and `mlock`ed so it
|
The bees hash table is loaded into RAM at startup and `mlock`ed so it
|
||||||
will not be swapped out by the kernel (if swap is permitted, performance
|
will not be swapped out by the kernel (if swap is permitted, performance
|
||||||
degrades to nearly zero).
|
degrades to nearly zero, for both bees and the swap device).
|
||||||
|
|
||||||
bees scans the filesystem in a single pass which removes duplicate
|
bees scans the filesystem in a single pass which removes duplicate
|
||||||
extents immediately after they are detected. There are no distinct
|
extents immediately after they are detected. There are no distinct
|
||||||
@@ -83,12 +85,12 @@ of these functions in userspace, at the expense of encountering [some
|
|||||||
kernel bugs in `LOGICAL_INO` performance](btrfs-kernel.md).
|
kernel bugs in `LOGICAL_INO` performance](btrfs-kernel.md).
|
||||||
|
|
||||||
bees uses only the data-safe `FILE_EXTENT_SAME` (aka `FIDEDUPERANGE`)
|
bees uses only the data-safe `FILE_EXTENT_SAME` (aka `FIDEDUPERANGE`)
|
||||||
kernel operations to manipulate user data, so it can dedupe live data
|
kernel ioctl to manipulate user data, so it can dedupe live data
|
||||||
(e.g. build servers, sqlite databases, VM disk images). It does not
|
(e.g. build servers, sqlite databases, VM disk images). bees does not
|
||||||
modify file attributes or timestamps.
|
modify file attributes or timestamps in deduplicated files.
|
||||||
|
|
||||||
When bees has scanned all of the data, bees will pause until 10
|
When bees has scanned all of the data, bees will pause until a new
|
||||||
transactions have been completed in the btrfs filesystem. bees tracks
|
transaction has completed in the btrfs filesystem. bees tracks
|
||||||
the current btrfs transaction ID over time so that it polls less often
|
the current btrfs transaction ID over time so that it polls less often
|
||||||
on quiescent filesystems and more often on busy filesystems.
|
on quiescent filesystems and more often on busy filesystems.
|
||||||
|
|
||||||
|
@@ -6,31 +6,30 @@ Best-Effort Extent-Same, a btrfs deduplication agent.
|
|||||||
About bees
|
About bees
|
||||||
----------
|
----------
|
||||||
|
|
||||||
bees is a block-oriented userspace deduplication agent designed for large
|
bees is a block-oriented userspace deduplication agent designed to scale
|
||||||
btrfs filesystems. It is an offline dedupe combined with an incremental
|
up to large btrfs filesystems. It is an offline dedupe combined with
|
||||||
data scan capability to minimize time data spends on disk from write
|
an incremental data scan capability to minimize time data spends on disk
|
||||||
to dedupe.
|
from write to dedupe.
|
||||||
|
|
||||||
Strengths
|
Strengths
|
||||||
---------
|
---------
|
||||||
|
|
||||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
* Space-efficient hash table - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||||
* Daemon incrementally dedupes new data using btrfs tree search
|
* Daemon mode - incrementally dedupes new data as it appears
|
||||||
|
* Largest extents first - recover more free space during fixed maintenance windows
|
||||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](options.md)
|
* Whole-filesystem dedupe - scans data only once, even with snapshots and reflinks
|
||||||
* Works around btrfs filesystem structure to free more disk space
|
|
||||||
* Persistent hash table for rapid restart after shutdown
|
* Persistent hash table for rapid restart after shutdown
|
||||||
* Whole-filesystem dedupe - including snapshots
|
|
||||||
* Constant hash table size - no increased RAM usage if data set becomes larger
|
* Constant hash table size - no increased RAM usage if data set becomes larger
|
||||||
* Works on live data - no scheduled downtime required
|
* Works on live data - no scheduled downtime required
|
||||||
* Automatic self-throttling based on system load
|
* Automatic self-throttling - reduces system load
|
||||||
|
* btrfs support - recovers more free space from btrfs than naive dedupers
|
||||||
|
|
||||||
Weaknesses
|
Weaknesses
|
||||||
----------
|
----------
|
||||||
|
|
||||||
* Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists
|
* Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists
|
||||||
* Requires root privilege (or `CAP_SYS_ADMIN`)
|
* Requires root privilege (`CAP_SYS_ADMIN` plus the usual filesystem read/modify caps)
|
||||||
* First run may require temporary disk space for extent reorganization
|
|
||||||
* [First run may increase metadata space usage if many snapshots exist](gotchas.md)
|
* [First run may increase metadata space usage if many snapshots exist](gotchas.md)
|
||||||
* Constant hash table size - no decreased RAM usage if data set becomes smaller
|
* Constant hash table size - no decreased RAM usage if data set becomes smaller
|
||||||
* btrfs only
|
* btrfs only
|
||||||
@@ -47,7 +46,7 @@ Recommended Reading
|
|||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
* [bees Gotchas](gotchas.md)
|
* [bees Gotchas](gotchas.md)
|
||||||
* [btrfs kernel bugs](btrfs-kernel.md) - especially DATA CORRUPTION WARNING
|
* [btrfs kernel bugs](btrfs-kernel.md) - especially DATA CORRUPTION WARNING for old kernels
|
||||||
* [bees vs. other btrfs features](btrfs-other.md)
|
* [bees vs. other btrfs features](btrfs-other.md)
|
||||||
* [What to do when something goes wrong](wrong.md)
|
* [What to do when something goes wrong](wrong.md)
|
||||||
|
|
||||||
@@ -70,6 +69,6 @@ You can also use Github:
|
|||||||
Copyright & License
|
Copyright & License
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
Copyright 2015-2025 Zygo Blaxell <bees@furryterror.org>.
|
||||||
|
|
||||||
GPL (version 3 or later).
|
GPL (version 3 or later).
|
||||||
|
@@ -4,7 +4,7 @@ Building bees
|
|||||||
Dependencies
|
Dependencies
|
||||||
------------
|
------------
|
||||||
|
|
||||||
* C++11 compiler (tested with GCC 4.9, 6.3.0, 8.1.0)
|
* C++11 compiler (tested with GCC 8.1.0, 12.2.0)
|
||||||
|
|
||||||
Sorry. I really like closures and shared_ptr, so support
|
Sorry. I really like closures and shared_ptr, so support
|
||||||
for earlier compiler versions is unlikely.
|
for earlier compiler versions is unlikely.
|
||||||
@@ -19,7 +19,7 @@ Dependencies
|
|||||||
|
|
||||||
* [Linux kernel version](btrfs-kernel.md) gets its own page.
|
* [Linux kernel version](btrfs-kernel.md) gets its own page.
|
||||||
|
|
||||||
* markdown for documentation
|
* markdown to build the documentation
|
||||||
|
|
||||||
* util-linux version that provides `blkid` command for the helper
|
* util-linux version that provides `blkid` command for the helper
|
||||||
script `scripts/beesd` to work
|
script `scripts/beesd` to work
|
||||||
|
@@ -2,8 +2,8 @@ Features You Might Expect That bees Doesn't Have
|
|||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
|
||||||
* There's no configuration file (patches welcome!). There are
|
* There's no configuration file (patches welcome!). There are
|
||||||
some tunables hardcoded in the source that could eventually become
|
some tunables hardcoded in the source (`src/bees.h`) that could eventually
|
||||||
configuration options. There's also an incomplete option parser
|
become configuration options. There's also an incomplete option parser
|
||||||
(patches welcome!).
|
(patches welcome!).
|
||||||
|
|
||||||
* The bees process doesn't fork and writes its log to stdout/stderr.
|
* The bees process doesn't fork and writes its log to stdout/stderr.
|
||||||
@@ -15,16 +15,9 @@ specific files (patches welcome).
|
|||||||
* PREALLOC extents and extents containing blocks filled with zeros will
|
* PREALLOC extents and extents containing blocks filled with zeros will
|
||||||
be replaced by holes. There is no way to turn this off.
|
be replaced by holes. There is no way to turn this off.
|
||||||
|
|
||||||
* Consecutive runs of duplicate blocks that are less than 12K in length
|
* The fundamental unit of deduplication is the extent _reference_, when
|
||||||
can take 30% of the processing time while saving only 3% of the disk
|
it should be the _extent_ itself. This is an architectural limitation
|
||||||
space. There should be an option to just not bother with those, but it's
|
that results in excess reads of extent data, even in the Extent scan mode.
|
||||||
complicated by the btrfs requirement to always dedupe complete extents.
|
|
||||||
|
|
||||||
* There is a lot of duplicate reading of blocks in snapshots. bees will
|
|
||||||
scan all snapshots at close to the same time to try to get better
|
|
||||||
performance by caching, but really fixing this requires rewriting the
|
|
||||||
crawler to scan the btrfs extent tree directly instead of the subvol
|
|
||||||
FS trees.
|
|
||||||
|
|
||||||
* Block reads are currently more allocation- and CPU-intensive than they
|
* Block reads are currently more allocation- and CPU-intensive than they
|
||||||
should be, especially for filesystems on SSD where the IO overhead is
|
should be, especially for filesystems on SSD where the IO overhead is
|
||||||
@@ -33,8 +26,9 @@ much smaller. This is a problem for CPU-power-constrained environments
|
|||||||
|
|
||||||
* bees can currently fragment extents when required to remove duplicate
|
* bees can currently fragment extents when required to remove duplicate
|
||||||
blocks, but has no defragmentation capability yet. When possible, bees
|
blocks, but has no defragmentation capability yet. When possible, bees
|
||||||
will attempt to work with existing extent boundaries, but it will not
|
will attempt to work with existing extent boundaries and choose the
|
||||||
aggregate blocks together from multiple extents to create larger ones.
|
largest fragments available, but it will not aggregate blocks together
|
||||||
|
from multiple extents to create larger ones.
|
||||||
|
|
||||||
* When bees fragments an extent, the copied data is compressed. There
|
* When bees fragments an extent, the copied data is compressed. There
|
||||||
is currently no way (other than by modifying the source) to select a
|
is currently no way (other than by modifying the source) to select a
|
||||||
@@ -43,3 +37,6 @@ compression method or not compress the data (patches welcome!).
|
|||||||
* It is theoretically possible to resize the hash table without starting
|
* It is theoretically possible to resize the hash table without starting
|
||||||
over with a new full-filesystem scan; however, this feature has not been
|
over with a new full-filesystem scan; however, this feature has not been
|
||||||
implemented yet.
|
implemented yet.
|
||||||
|
|
||||||
|
* btrfs maintains csums of data blocks which bees could use to improve
|
||||||
|
scan speeds, but bees doesn't use them yet.
|
||||||
|
@@ -36,6 +36,34 @@
|
|||||||
|
|
||||||
Has no effect unless `--loadavg-target` is used to specify a target load.
|
Has no effect unless `--loadavg-target` is used to specify a target load.
|
||||||
|
|
||||||
|
* `--throttle-factor FACTOR`
|
||||||
|
|
||||||
|
In order to avoid saturating btrfs deferred work queues, bees tracks
|
||||||
|
the time that operations with delayed effect (dedupe and tmpfile copy)
|
||||||
|
and operations with long run times (`LOGICAL_INO`) run. If an operation
|
||||||
|
finishes before the average run time for that operation, bees will
|
||||||
|
sleep for the remainder of the average run time, so that operations
|
||||||
|
are submitted to btrfs at a rate similar to the rate that btrfs can
|
||||||
|
complete them.
|
||||||
|
|
||||||
|
The `FACTOR` is multiplied by the average run time for each operation
|
||||||
|
to calculate the target delay time.
|
||||||
|
|
||||||
|
`FACTOR` 0 is the default, which adds no delays. bees will attempt
|
||||||
|
to saturate btrfs delayed work queues as quickly as possible, which
|
||||||
|
may impact other processes on the same filesystem, or even slow down
|
||||||
|
bees itself.
|
||||||
|
|
||||||
|
`FACTOR` 1.0 will attempt to keep btrfs delayed work queues filled at
|
||||||
|
a steady average rate.
|
||||||
|
|
||||||
|
`FACTOR` more than 1.0 will add delays longer than the average
|
||||||
|
run time (e.g. 10.0 will delay all operations that take less than 10x
|
||||||
|
the average run time). High values of `FACTOR` may be desirable when
|
||||||
|
using bees with other applications on the same filesystem.
|
||||||
|
|
||||||
|
The maximum delay per operation is 60 seconds.
|
||||||
|
|
||||||
## Filesystem tree traversal options
|
## Filesystem tree traversal options
|
||||||
|
|
||||||
* `--scan-mode MODE` or `-m`
|
* `--scan-mode MODE` or `-m`
|
||||||
@@ -47,6 +75,7 @@
|
|||||||
* Mode 1: independent
|
* Mode 1: independent
|
||||||
* Mode 2: sequential
|
* Mode 2: sequential
|
||||||
* Mode 3: recent
|
* Mode 3: recent
|
||||||
|
* Mode 4: extent
|
||||||
|
|
||||||
For details of the different scanning modes and the default value of
|
For details of the different scanning modes and the default value of
|
||||||
this option, see [bees configuration](config.md).
|
this option, see [bees configuration](config.md).
|
||||||
@@ -55,19 +84,22 @@
|
|||||||
|
|
||||||
* `--workaround-btrfs-send` or `-a`
|
* `--workaround-btrfs-send` or `-a`
|
||||||
|
|
||||||
|
_This option is obsolete and should not be used any more._
|
||||||
|
|
||||||
Pretend that read-only snapshots are empty and silently discard any
|
Pretend that read-only snapshots are empty and silently discard any
|
||||||
request to dedupe files referenced through them. This is a workaround for
|
request to dedupe files referenced through them. This is a workaround
|
||||||
[problems with the kernel implementation of `btrfs send` and `btrfs send
|
for [problems with old kernels running `btrfs send` and `btrfs send
|
||||||
-p`](btrfs-kernel.md) which make these btrfs features unusable with bees.
|
-p`](btrfs-kernel.md) which make these btrfs features unusable with bees.
|
||||||
|
|
||||||
This option should be used to avoid breaking `btrfs send` on the same
|
This option was used to avoid breaking `btrfs send` on old kernels.
|
||||||
filesystem.
|
The affected kernels are now too old to be recommended for use with bees.
|
||||||
|
|
||||||
|
bees now waits for `btrfs send` to finish. There is no need for an
|
||||||
|
option to enable this.
|
||||||
|
|
||||||
**Note:** There is a _significant_ space tradeoff when using this option:
|
**Note:** There is a _significant_ space tradeoff when using this option:
|
||||||
it is likely no space will be recovered--and possibly significant extra
|
it is likely no space will be recovered--and possibly significant extra
|
||||||
space used--until the read-only snapshots are deleted. On the other
|
space used--until the read-only snapshots are deleted.
|
||||||
hand, if snapshots are rotated frequently then bees will spend less time
|
|
||||||
scanning them.
|
|
||||||
|
|
||||||
## Logging options
|
## Logging options
|
||||||
|
|
||||||
|
@@ -75,9 +75,8 @@ in the shell script that launches `bees`:
|
|||||||
schedtool -D -n20 $$
|
schedtool -D -n20 $$
|
||||||
ionice -c3 -p $$
|
ionice -c3 -p $$
|
||||||
|
|
||||||
You can also use the [`--loadavg-target` and `--thread-min`
|
You can also use the [load management options](options.md) to further
|
||||||
options](options.md) to further control the impact of bees on the rest
|
control the impact of bees on the rest of the system.
|
||||||
of the system.
|
|
||||||
|
|
||||||
Let the bees fly:
|
Let the bees fly:
|
||||||
|
|
||||||
|
@@ -4,16 +4,13 @@ What to do when something goes wrong with bees
|
|||||||
Hangs and excessive slowness
|
Hangs and excessive slowness
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
||||||
### Are you using qgroups or autodefrag?
|
|
||||||
|
|
||||||
Read about [bad btrfs feature interactions](btrfs-other.md).
|
|
||||||
|
|
||||||
### Use load-throttling options
|
### Use load-throttling options
|
||||||
|
|
||||||
If bees is just more aggressive than you would like, consider using
|
If bees is just more aggressive than you would like, consider using
|
||||||
[load throttling options](options.md). These are usually more effective
|
[load throttling options](options.md). These are usually more effective
|
||||||
than `ionice`, `schedtool`, and the `blkio` cgroup (though you can
|
than `ionice`, `schedtool`, and the `blkio` cgroup (though you can
|
||||||
certainly use those too).
|
certainly use those too) because they limit work that bees queues up
|
||||||
|
for later execution inside btrfs.
|
||||||
|
|
||||||
### Check `$BEESSTATUS`
|
### Check `$BEESSTATUS`
|
||||||
|
|
||||||
@@ -52,10 +49,6 @@ dst = 15 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data.new/home/builder/li
|
|||||||
|
|
||||||
Thread names of note:
|
Thread names of note:
|
||||||
|
|
||||||
* `crawl_12345`: scan/dedupe worker threads (the number is the subvol
|
|
||||||
ID which the thread is currently working on). These threads appear
|
|
||||||
and disappear from the status dynamically according to the requirements
|
|
||||||
of the work queue and loadavg throttling.
|
|
||||||
* `bees`: main thread (doesn't do anything after startup, but its task execution time is that of the whole bees process)
|
* `bees`: main thread (doesn't do anything after startup, but its task execution time is that of the whole bees process)
|
||||||
* `crawl_master`: task that finds new extents in the filesystem and populates the work queue
|
* `crawl_master`: task that finds new extents in the filesystem and populates the work queue
|
||||||
* `crawl_transid`: btrfs transid (generation number) tracker and polling thread
|
* `crawl_transid`: btrfs transid (generation number) tracker and polling thread
|
||||||
@@ -64,6 +57,13 @@ dst = 15 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data.new/home/builder/li
|
|||||||
* `hash_writeback`: trickle-writes the hash table back to `beeshash.dat`
|
* `hash_writeback`: trickle-writes the hash table back to `beeshash.dat`
|
||||||
* `hash_prefetch`: prefetches the hash table at startup and updates `beesstats.txt` hourly
|
* `hash_prefetch`: prefetches the hash table at startup and updates `beesstats.txt` hourly
|
||||||
|
|
||||||
|
Most other threads have names that are derived from the current dedupe
|
||||||
|
task that they are executing:
|
||||||
|
|
||||||
|
* `ref_205ad76b1000_24K_50`: extent scan performing dedupe of btrfs extent bytenr `205ad76b1000`, which is 24 KiB long and has 50 references
|
||||||
|
* `extent_250_32M_16E`: extent scan searching for extents between 32 MiB + 1 and 16 EiB bytes long, tracking scan position in virtual subvol `250`.
|
||||||
|
* `crawl_378_18916`: subvol scan searching for extent refs in subvol `378`, inode `18916`.
|
||||||
|
|
||||||
### Dump kernel stacks of hung processes
|
### Dump kernel stacks of hung processes
|
||||||
|
|
||||||
Check the kernel stacks of all blocked kernel processes:
|
Check the kernel stacks of all blocked kernel processes:
|
||||||
@@ -91,7 +91,7 @@ bees Crashes
|
|||||||
(gdb) thread apply all bt full
|
(gdb) thread apply all bt full
|
||||||
|
|
||||||
The last line generates megabytes of output and will often crash gdb.
|
The last line generates megabytes of output and will often crash gdb.
|
||||||
This is OK, submit whatever output gdb can produce.
|
Submit whatever output gdb can produce.
|
||||||
|
|
||||||
**Note that this output may include filenames or data from your
|
**Note that this output may include filenames or data from your
|
||||||
filesystem.**
|
filesystem.**
|
||||||
@@ -160,8 +160,7 @@ Kernel crashes, corruption, and filesystem damage
|
|||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
bees doesn't do anything that _should_ cause corruption or data loss;
|
bees doesn't do anything that _should_ cause corruption or data loss;
|
||||||
however, [btrfs has kernel bugs](btrfs-kernel.md) and [interacts poorly
|
however, [btrfs has kernel bugs](btrfs-kernel.md), so corruption is
|
||||||
with some Linux block device layers](btrfs-other.md), so corruption is
|
|
||||||
not impossible.
|
not impossible.
|
||||||
|
|
||||||
Issues with the btrfs filesystem kernel code or other block device layers
|
Issues with the btrfs filesystem kernel code or other block device layers
|
||||||
|
@@ -49,6 +49,7 @@ namespace crucible {
|
|||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @{ Inode items
|
/// @{ Inode items
|
||||||
|
uint64_t inode_flags() const;
|
||||||
uint64_t inode_size() const;
|
uint64_t inode_size() const;
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
@@ -64,11 +65,13 @@ namespace crucible {
|
|||||||
/// @{ Extent items (EXTENT_ITEM)
|
/// @{ Extent items (EXTENT_ITEM)
|
||||||
uint64_t extent_begin() const;
|
uint64_t extent_begin() const;
|
||||||
uint64_t extent_end() const;
|
uint64_t extent_end() const;
|
||||||
|
uint64_t extent_flags() const;
|
||||||
uint64_t extent_generation() const;
|
uint64_t extent_generation() const;
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @{ Root items
|
/// @{ Root items
|
||||||
uint64_t root_flags() const;
|
uint64_t root_flags() const;
|
||||||
|
uint64_t root_refs() const;
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @{ Root backref items.
|
/// @{ Root backref items.
|
||||||
@@ -108,7 +111,9 @@ namespace crucible {
|
|||||||
virtual ~BtrfsTreeFetcher() = default;
|
virtual ~BtrfsTreeFetcher() = default;
|
||||||
BtrfsTreeFetcher(Fd new_fd);
|
BtrfsTreeFetcher(Fd new_fd);
|
||||||
void type(uint8_t type);
|
void type(uint8_t type);
|
||||||
|
uint8_t type();
|
||||||
void tree(uint64_t tree);
|
void tree(uint64_t tree);
|
||||||
|
uint64_t tree();
|
||||||
void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
|
void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
|
||||||
/// Block size (sectorsize) of filesystem
|
/// Block size (sectorsize) of filesystem
|
||||||
uint64_t block_size() const;
|
uint64_t block_size() const;
|
||||||
@@ -169,34 +174,42 @@ namespace crucible {
|
|||||||
void get_sums(uint64_t logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t count)> output);
|
void get_sums(uint64_t logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t count)> output);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Fetch extent items from extent tree
|
/// Fetch extent items from extent tree.
|
||||||
|
/// Does not filter out metadata! See BtrfsDataExtentTreeFetcher for that.
|
||||||
class BtrfsExtentItemFetcher : public BtrfsTreeObjectFetcher {
|
class BtrfsExtentItemFetcher : public BtrfsTreeObjectFetcher {
|
||||||
public:
|
public:
|
||||||
BtrfsExtentItemFetcher(const Fd &fd);
|
BtrfsExtentItemFetcher(const Fd &fd);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Fetch extent refs from an inode
|
/// Fetch extent refs from an inode. Caller must set the tree and objectid.
|
||||||
class BtrfsExtentDataFetcher : public BtrfsTreeOffsetFetcher {
|
class BtrfsExtentDataFetcher : public BtrfsTreeOffsetFetcher {
|
||||||
public:
|
public:
|
||||||
BtrfsExtentDataFetcher(const Fd &fd);
|
BtrfsExtentDataFetcher(const Fd &fd);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Fetch inodes from a subvol
|
/// Fetch raw inode items
|
||||||
class BtrfsFsTreeFetcher : public BtrfsTreeObjectFetcher {
|
|
||||||
public:
|
|
||||||
BtrfsFsTreeFetcher(const Fd &fd, uint64_t subvol);
|
|
||||||
};
|
|
||||||
|
|
||||||
class BtrfsInodeFetcher : public BtrfsTreeObjectFetcher {
|
class BtrfsInodeFetcher : public BtrfsTreeObjectFetcher {
|
||||||
public:
|
public:
|
||||||
BtrfsInodeFetcher(const Fd &fd);
|
BtrfsInodeFetcher(const Fd &fd);
|
||||||
BtrfsTreeItem stat(uint64_t subvol, uint64_t inode);
|
BtrfsTreeItem stat(uint64_t subvol, uint64_t inode);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Fetch a root (subvol) item
|
||||||
class BtrfsRootFetcher : public BtrfsTreeObjectFetcher {
|
class BtrfsRootFetcher : public BtrfsTreeObjectFetcher {
|
||||||
public:
|
public:
|
||||||
BtrfsRootFetcher(const Fd &fd);
|
BtrfsRootFetcher(const Fd &fd);
|
||||||
BtrfsTreeItem root(uint64_t subvol);
|
BtrfsTreeItem root(uint64_t subvol);
|
||||||
|
BtrfsTreeItem root_backref(uint64_t subvol);
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Fetch data extent items from extent tree, skipping metadata-only block groups
|
||||||
|
class BtrfsDataExtentTreeFetcher : public BtrfsExtentItemFetcher {
|
||||||
|
BtrfsTreeItem m_current_bg;
|
||||||
|
BtrfsTreeOffsetFetcher m_chunk_tree;
|
||||||
|
protected:
|
||||||
|
virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr) override;
|
||||||
|
public:
|
||||||
|
BtrfsDataExtentTreeFetcher(const Fd &fd);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -78,9 +78,6 @@ enum btrfs_compression_type {
|
|||||||
#define BTRFS_SHARED_BLOCK_REF_KEY 182
|
#define BTRFS_SHARED_BLOCK_REF_KEY 182
|
||||||
#define BTRFS_SHARED_DATA_REF_KEY 184
|
#define BTRFS_SHARED_DATA_REF_KEY 184
|
||||||
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
|
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
|
||||||
#define BTRFS_FREE_SPACE_INFO_KEY 198
|
|
||||||
#define BTRFS_FREE_SPACE_EXTENT_KEY 199
|
|
||||||
#define BTRFS_FREE_SPACE_BITMAP_KEY 200
|
|
||||||
#define BTRFS_DEV_EXTENT_KEY 204
|
#define BTRFS_DEV_EXTENT_KEY 204
|
||||||
#define BTRFS_DEV_ITEM_KEY 216
|
#define BTRFS_DEV_ITEM_KEY 216
|
||||||
#define BTRFS_CHUNK_ITEM_KEY 228
|
#define BTRFS_CHUNK_ITEM_KEY 228
|
||||||
@@ -94,7 +91,35 @@ enum btrfs_compression_type {
|
|||||||
#define BTRFS_UUID_KEY_SUBVOL 251
|
#define BTRFS_UUID_KEY_SUBVOL 251
|
||||||
#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252
|
#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252
|
||||||
#define BTRFS_STRING_ITEM_KEY 253
|
#define BTRFS_STRING_ITEM_KEY 253
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// BTRFS_INODE_* was added to include/uapi/btrfs_tree.h in v6.2-rc1
|
||||||
|
#ifndef BTRFS_INODE_NODATASUM
|
||||||
|
#define BTRFS_INODE_NODATASUM (1U << 0)
|
||||||
|
#define BTRFS_INODE_NODATACOW (1U << 1)
|
||||||
|
#define BTRFS_INODE_READONLY (1U << 2)
|
||||||
|
#define BTRFS_INODE_NOCOMPRESS (1U << 3)
|
||||||
|
#define BTRFS_INODE_PREALLOC (1U << 4)
|
||||||
|
#define BTRFS_INODE_SYNC (1U << 5)
|
||||||
|
#define BTRFS_INODE_IMMUTABLE (1U << 6)
|
||||||
|
#define BTRFS_INODE_APPEND (1U << 7)
|
||||||
|
#define BTRFS_INODE_NODUMP (1U << 8)
|
||||||
|
#define BTRFS_INODE_NOATIME (1U << 9)
|
||||||
|
#define BTRFS_INODE_DIRSYNC (1U << 10)
|
||||||
|
#define BTRFS_INODE_COMPRESS (1U << 11)
|
||||||
|
#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BTRFS_FREE_SPACE_INFO_KEY
|
||||||
|
#define BTRFS_FREE_SPACE_INFO_KEY 198
|
||||||
|
#define BTRFS_FREE_SPACE_EXTENT_KEY 199
|
||||||
|
#define BTRFS_FREE_SPACE_BITMAP_KEY 200
|
||||||
|
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BTRFS_BLOCK_GROUP_RAID1C4
|
||||||
|
#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
|
||||||
|
#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef BTRFS_DEFRAG_RANGE_START_IO
|
#ifndef BTRFS_DEFRAG_RANGE_START_IO
|
||||||
|
@@ -55,7 +55,6 @@ namespace crucible {
|
|||||||
Pointer m_ptr;
|
Pointer m_ptr;
|
||||||
size_t m_size = 0;
|
size_t m_size = 0;
|
||||||
mutable mutex m_mutex;
|
mutable mutex m_mutex;
|
||||||
friend ostream & operator<<(ostream &os, const ByteVector &bv);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@@ -74,6 +73,8 @@ namespace crucible {
|
|||||||
THROW_CHECK2(out_of_range, size(), sizeof(T), size() >= sizeof(T));
|
THROW_CHECK2(out_of_range, size(), sizeof(T), size() >= sizeof(T));
|
||||||
return reinterpret_cast<T*>(data());
|
return reinterpret_cast<T*>(data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ostream& operator<<(ostream &os, const ByteVector &bv);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // _CRUCIBLE_BYTEVECTOR_H_
|
#endif // _CRUCIBLE_BYTEVECTOR_H_
|
||||||
|
@@ -69,9 +69,11 @@ namespace crucible {
|
|||||||
|
|
||||||
uint64_t get_flags() const;
|
uint64_t get_flags() const;
|
||||||
void set_flags(uint64_t new_flags);
|
void set_flags(uint64_t new_flags);
|
||||||
|
void set_logical(uint64_t new_logical);
|
||||||
|
void set_size(uint64_t new_size);
|
||||||
|
|
||||||
virtual void do_ioctl(int fd);
|
void do_ioctl(int fd);
|
||||||
virtual bool do_ioctl_nothrow(int fd);
|
bool do_ioctl_nothrow(int fd);
|
||||||
|
|
||||||
struct BtrfsInodeOffsetRootSpan {
|
struct BtrfsInodeOffsetRootSpan {
|
||||||
using iterator = BtrfsInodeOffsetRoot*;
|
using iterator = BtrfsInodeOffsetRoot*;
|
||||||
@@ -195,11 +197,18 @@ namespace crucible {
|
|||||||
|
|
||||||
size_t m_buf_size;
|
size_t m_buf_size;
|
||||||
set<BtrfsIoctlSearchHeader> m_result;
|
set<BtrfsIoctlSearchHeader> m_result;
|
||||||
|
|
||||||
|
static thread_local size_t s_calls;
|
||||||
|
static thread_local size_t s_loops;
|
||||||
|
static thread_local size_t s_loops_empty;
|
||||||
|
static thread_local shared_ptr<ostream> s_debug_ostream;
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
||||||
ostream & operator<<(ostream &os, const BtrfsIoctlSearchKey &key);
|
ostream & operator<<(ostream &os, const BtrfsIoctlSearchKey &key);
|
||||||
|
|
||||||
|
string btrfs_chunk_type_ntoa(uint64_t type);
|
||||||
|
string btrfs_inode_flags_ntoa(uint64_t inode_flags);
|
||||||
string btrfs_search_type_ntoa(unsigned type);
|
string btrfs_search_type_ntoa(unsigned type);
|
||||||
string btrfs_search_objectid_ntoa(uint64_t objectid);
|
string btrfs_search_objectid_ntoa(uint64_t objectid);
|
||||||
string btrfs_compress_type_ntoa(uint8_t type);
|
string btrfs_compress_type_ntoa(uint8_t type);
|
||||||
@@ -237,14 +246,14 @@ namespace crucible {
|
|||||||
unsigned long available() const;
|
unsigned long available() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class V> ostream &hexdump(ostream &os, const V &v);
|
|
||||||
|
|
||||||
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 {
|
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 {
|
||||||
BtrfsIoctlFsInfoArgs();
|
BtrfsIoctlFsInfoArgs();
|
||||||
void do_ioctl(int fd);
|
void do_ioctl(int fd);
|
||||||
|
bool do_ioctl_nothrow(int fd);
|
||||||
uint16_t csum_type() const;
|
uint16_t csum_type() const;
|
||||||
uint16_t csum_size() const;
|
uint16_t csum_size() const;
|
||||||
uint64_t generation() const;
|
uint64_t generation() const;
|
||||||
|
vector<uint8_t> fsid() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
|
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
|
||||||
|
@@ -12,12 +12,14 @@ namespace crucible {
|
|||||||
ostream &
|
ostream &
|
||||||
hexdump(ostream &os, const V &v)
|
hexdump(ostream &os, const V &v)
|
||||||
{
|
{
|
||||||
os << "V { size = " << v.size() << ", data:\n";
|
const auto v_size = v.size();
|
||||||
for (size_t i = 0; i < v.size(); i += 8) {
|
const uint8_t* const v_data = reinterpret_cast<const uint8_t*>(v.data());
|
||||||
|
os << "V { size = " << v_size << ", data:\n";
|
||||||
|
for (size_t i = 0; i < v_size; i += 8) {
|
||||||
string hex, ascii;
|
string hex, ascii;
|
||||||
for (size_t j = i; j < i + 8; ++j) {
|
for (size_t j = i; j < i + 8; ++j) {
|
||||||
if (j < v.size()) {
|
if (j < v_size) {
|
||||||
uint8_t c = v[j];
|
const uint8_t c = v_data[j];
|
||||||
char buf[8];
|
char buf[8];
|
||||||
sprintf(buf, "%02x ", c);
|
sprintf(buf, "%02x ", c);
|
||||||
hex += buf;
|
hex += buf;
|
||||||
|
@@ -117,7 +117,7 @@ namespace crucible {
|
|||||||
while (full() || locked(name)) {
|
while (full() || locked(name)) {
|
||||||
m_condvar.wait(lock);
|
m_condvar.wait(lock);
|
||||||
}
|
}
|
||||||
auto rv = m_set.insert(make_pair(name, crucible::gettid()));
|
auto rv = m_set.insert(make_pair(name, gettid()));
|
||||||
THROW_CHECK0(runtime_error, rv.second);
|
THROW_CHECK0(runtime_error, rv.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ namespace crucible {
|
|||||||
if (full() || locked(name)) {
|
if (full() || locked(name)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto rv = m_set.insert(make_pair(name, crucible::gettid()));
|
auto rv = m_set.insert(make_pair(name, gettid()));
|
||||||
THROW_CHECK1(runtime_error, name, rv.second);
|
THROW_CHECK1(runtime_error, name, rv.second);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -14,6 +14,7 @@ namespace crucible {
|
|||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
condition_variable m_cv;
|
condition_variable m_cv;
|
||||||
map<string, size_t> m_counters;
|
map<string, size_t> m_counters;
|
||||||
|
bool m_do_locking = true;
|
||||||
|
|
||||||
class LockHandle {
|
class LockHandle {
|
||||||
const string m_type;
|
const string m_type;
|
||||||
@@ -33,6 +34,7 @@ namespace crucible {
|
|||||||
shared_ptr<LockHandle> get_lock_private(const string &type);
|
shared_ptr<LockHandle> get_lock_private(const string &type);
|
||||||
public:
|
public:
|
||||||
static shared_ptr<LockHandle> get_lock(const string &type);
|
static shared_ptr<LockHandle> get_lock(const string &type);
|
||||||
|
static void enable_locking(bool enabled);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
52
include/crucible/openat2.h
Normal file
52
include/crucible/openat2.h
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#ifndef CRUCIBLE_OPENAT2_H
|
||||||
|
#define CRUCIBLE_OPENAT2_H
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
// Compatibility for building on old libc for new kernel
|
||||||
|
#include <linux/version.h>
|
||||||
|
|
||||||
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
|
||||||
|
|
||||||
|
#include <linux/openat2.h>
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#ifndef RESOLVE_NO_XDEV
|
||||||
|
#define RESOLVE_NO_XDEV 1
|
||||||
|
|
||||||
|
// RESOLVE_NO_XDEV was there from the beginning of openat2,
|
||||||
|
// so if that's missing, so is open_how
|
||||||
|
|
||||||
|
struct open_how {
|
||||||
|
__u64 flags;
|
||||||
|
__u64 mode;
|
||||||
|
__u64 resolve;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef RESOLVE_NO_MAGICLINKS
|
||||||
|
#define RESOLVE_NO_MAGICLINKS 2
|
||||||
|
#endif
|
||||||
|
#ifndef RESOLVE_NO_SYMLINKS
|
||||||
|
#define RESOLVE_NO_SYMLINKS 4
|
||||||
|
#endif
|
||||||
|
#ifndef RESOLVE_BENEATH
|
||||||
|
#define RESOLVE_BENEATH 8
|
||||||
|
#endif
|
||||||
|
#ifndef RESOLVE_IN_ROOT
|
||||||
|
#define RESOLVE_IN_ROOT 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // Linux version >= v5.6
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
/// Weak symbol to support libc with no syscall wrapper
|
||||||
|
int openat2(int dirfd, const char *pathname, struct open_how *how, size_t size) throw();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CRUCIBLE_OPENAT2_H
|
@@ -10,6 +10,10 @@
|
|||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
pid_t gettid() throw();
|
||||||
|
};
|
||||||
|
|
||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@@ -73,7 +77,6 @@ namespace crucible {
|
|||||||
|
|
||||||
typedef ResourceHandle<Process::id, Process> Pid;
|
typedef ResourceHandle<Process::id, Process> Pid;
|
||||||
|
|
||||||
pid_t gettid();
|
|
||||||
double getloadavg1();
|
double getloadavg1();
|
||||||
double getloadavg5();
|
double getloadavg5();
|
||||||
double getloadavg15();
|
double getloadavg15();
|
||||||
|
@@ -4,13 +4,20 @@
|
|||||||
#include "crucible/error.h"
|
#include "crucible/error.h"
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
/// A class to track progress of multiple workers using only two points:
|
||||||
|
/// the first and last incomplete state. The first incomplete
|
||||||
|
/// state can be recorded as a checkpoint to resume later on.
|
||||||
|
/// The last completed state is the starting point for workers that
|
||||||
|
/// need something to do.
|
||||||
template <class T>
|
template <class T>
|
||||||
class ProgressTracker {
|
class ProgressTracker {
|
||||||
struct ProgressTrackerState;
|
struct ProgressTrackerState;
|
||||||
@@ -19,8 +26,16 @@ namespace crucible {
|
|||||||
using value_type = T;
|
using value_type = T;
|
||||||
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
||||||
|
|
||||||
|
/// Create ProgressTracker with initial begin and end state 'v'.
|
||||||
ProgressTracker(const value_type &v);
|
ProgressTracker(const value_type &v);
|
||||||
|
|
||||||
|
/// The first incomplete state. This is not "sticky",
|
||||||
|
/// it will revert to the end state if there are no
|
||||||
|
/// items in progress.
|
||||||
value_type begin() const;
|
value_type begin() const;
|
||||||
|
|
||||||
|
/// The last incomplete state. This is "sticky",
|
||||||
|
/// it can only increase and never decrease.
|
||||||
value_type end() const;
|
value_type end() const;
|
||||||
|
|
||||||
ProgressHolder hold(const value_type &v);
|
ProgressHolder hold(const value_type &v);
|
||||||
@@ -31,7 +46,7 @@ namespace crucible {
|
|||||||
struct ProgressTrackerState {
|
struct ProgressTrackerState {
|
||||||
using key_type = pair<value_type, ProgressHolderState *>;
|
using key_type = pair<value_type, ProgressHolderState *>;
|
||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
map<key_type, bool> m_in_progress;
|
set<key_type> m_in_progress;
|
||||||
value_type m_begin;
|
value_type m_begin;
|
||||||
value_type m_end;
|
value_type m_end;
|
||||||
};
|
};
|
||||||
@@ -39,6 +54,7 @@ namespace crucible {
|
|||||||
class ProgressHolderState {
|
class ProgressHolderState {
|
||||||
shared_ptr<ProgressTrackerState> m_state;
|
shared_ptr<ProgressTrackerState> m_state;
|
||||||
const value_type m_value;
|
const value_type m_value;
|
||||||
|
using key_type = typename ProgressTrackerState::key_type;
|
||||||
public:
|
public:
|
||||||
ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v);
|
ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v);
|
||||||
~ProgressHolderState();
|
~ProgressHolderState();
|
||||||
@@ -86,7 +102,11 @@ namespace crucible {
|
|||||||
m_value(v)
|
m_value(v)
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_state->m_mutex);
|
unique_lock<mutex> lock(m_state->m_mutex);
|
||||||
m_state->m_in_progress[make_pair(m_value, this)] = true;
|
const auto rv = m_state->m_in_progress.insert(key_type(m_value, this));
|
||||||
|
THROW_CHECK1(runtime_error, m_value, rv.second);
|
||||||
|
// Set the beginning to the first existing in-progress item
|
||||||
|
m_state->m_begin = m_state->m_in_progress.begin()->first;
|
||||||
|
// If this value is past the end, move the end, but don't go backwards
|
||||||
if (m_state->m_end < m_value) {
|
if (m_state->m_end < m_value) {
|
||||||
m_state->m_end = m_value;
|
m_state->m_end = m_value;
|
||||||
}
|
}
|
||||||
@@ -96,17 +116,15 @@ namespace crucible {
|
|||||||
ProgressTracker<T>::ProgressHolderState::~ProgressHolderState()
|
ProgressTracker<T>::ProgressHolderState::~ProgressHolderState()
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_state->m_mutex);
|
unique_lock<mutex> lock(m_state->m_mutex);
|
||||||
m_state->m_in_progress[make_pair(m_value, this)] = false;
|
const auto rv = m_state->m_in_progress.erase(key_type(m_value, this));
|
||||||
auto p = m_state->m_in_progress.begin();
|
// THROW_CHECK2(runtime_error, m_value, rv, rv == 1);
|
||||||
while (p != m_state->m_in_progress.end()) {
|
assert(rv == 1);
|
||||||
if (p->second) {
|
if (m_state->m_in_progress.empty()) {
|
||||||
break;
|
// If we made the list empty, then m_begin == m_end
|
||||||
}
|
m_state->m_begin = m_state->m_end;
|
||||||
if (m_state->m_begin < p->first.first) {
|
} else {
|
||||||
m_state->m_begin = p->first.first;
|
// If we deleted the first element, then m_begin = current first element
|
||||||
}
|
m_state->m_begin = m_state->m_in_progress.begin()->first;
|
||||||
m_state->m_in_progress.erase(p);
|
|
||||||
p = m_state->m_in_progress.begin();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -6,23 +6,23 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include <cstdint>
|
// Debug stream
|
||||||
|
#include <memory>
|
||||||
#if 1
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#define DINIT(__x) __x
|
|
||||||
#define DLOG(__x) do { logs << __x << std::endl; } while (false)
|
#include <cstdint>
|
||||||
#define DOUT(__err) do { __err << logs.str(); } while (false)
|
|
||||||
#else
|
|
||||||
#define DINIT(__x) do {} while (false)
|
|
||||||
#define DLOG(__x) do {} while (false)
|
|
||||||
#define DOUT(__x) do {} while (false)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
extern thread_local shared_ptr<ostream> tl_seeker_debug_str;
|
||||||
|
#define SEEKER_DEBUG_LOG(__x) do { \
|
||||||
|
if (tl_seeker_debug_str) { \
|
||||||
|
(*tl_seeker_debug_str) << __x << "\n"; \
|
||||||
|
} \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
// Requirements for Container<Pos> Fetch(Pos lower, Pos upper):
|
// Requirements for Container<Pos> Fetch(Pos lower, Pos upper):
|
||||||
// - fetches objects in Pos order, starting from lower (must be >= lower)
|
// - fetches objects in Pos order, starting from lower (must be >= lower)
|
||||||
// - must return upper if present, may or may not return objects after that
|
// - must return upper if present, may or may not return objects after that
|
||||||
@@ -49,113 +49,108 @@ namespace crucible {
|
|||||||
Pos
|
Pos
|
||||||
seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max())
|
seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max())
|
||||||
{
|
{
|
||||||
DINIT(ostringstream logs);
|
static const Pos end_pos = numeric_limits<Pos>::max();
|
||||||
try {
|
// TBH this probably won't work if begin_pos != 0, i.e. any signed type
|
||||||
static const Pos end_pos = numeric_limits<Pos>::max();
|
static const Pos begin_pos = numeric_limits<Pos>::min();
|
||||||
// TBH this probably won't work if begin_pos != 0, i.e. any signed type
|
// Run a binary search looking for the highest key below target_pos.
|
||||||
static const Pos begin_pos = numeric_limits<Pos>::min();
|
// Initial upper bound of the search is target_pos.
|
||||||
// Run a binary search looking for the highest key below target_pos.
|
// Find initial lower bound by doubling the size of the range until a key below target_pos
|
||||||
// Initial upper bound of the search is target_pos.
|
// is found, or the lower bound reaches the beginning of the search space.
|
||||||
// Find initial lower bound by doubling the size of the range until a key below target_pos
|
// If the lower bound search reaches the beginning of the search space without finding a key,
|
||||||
// is found, or the lower bound reaches the beginning of the search space.
|
// return the beginning of the search space; otherwise, perform a binary search between
|
||||||
// If the lower bound search reaches the beginning of the search space without finding a key,
|
// the bounds now established.
|
||||||
// return the beginning of the search space; otherwise, perform a binary search between
|
Pos lower_bound = 0;
|
||||||
// the bounds now established.
|
Pos upper_bound = target_pos;
|
||||||
Pos lower_bound = 0;
|
bool found_low = false;
|
||||||
Pos upper_bound = target_pos;
|
Pos probe_pos = target_pos;
|
||||||
bool found_low = false;
|
// We need one loop for each bit of the search space to find the lower bound,
|
||||||
Pos probe_pos = target_pos;
|
// one loop for each bit of the search space to find the upper bound,
|
||||||
// We need one loop for each bit of the search space to find the lower bound,
|
// and one extra loop to confirm the boundary is correct.
|
||||||
// one loop for each bit of the search space to find the upper bound,
|
for (size_t loop_count = min((1 + numeric_limits<Pos>::digits) * size_t(2), max_loops); loop_count; --loop_count) {
|
||||||
// and one extra loop to confirm the boundary is correct.
|
SEEKER_DEBUG_LOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")");
|
||||||
for (size_t loop_count = min(numeric_limits<Pos>::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) {
|
auto result = fetch(probe_pos, target_pos);
|
||||||
DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")");
|
const Pos low_pos = result.empty() ? end_pos : *result.begin();
|
||||||
auto result = fetch(probe_pos, target_pos);
|
const Pos high_pos = result.empty() ? end_pos : *result.rbegin();
|
||||||
const Pos low_pos = result.empty() ? end_pos : *result.begin();
|
SEEKER_DEBUG_LOG(" = " << low_pos << ".." << high_pos);
|
||||||
const Pos high_pos = result.empty() ? end_pos : *result.rbegin();
|
// check for correct behavior of the fetch function
|
||||||
DLOG(" = " << low_pos << ".." << high_pos);
|
THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos);
|
||||||
// check for correct behavior of the fetch function
|
THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos);
|
||||||
THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos);
|
THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos);
|
||||||
THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos);
|
if (!found_low) {
|
||||||
THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos);
|
// if target_pos == end_pos then we will find it in every empty result set,
|
||||||
if (!found_low) {
|
// so in that case we force the lower bound to be lower than end_pos
|
||||||
// if target_pos == end_pos then we will find it in every empty result set,
|
if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) {
|
||||||
// so in that case we force the lower bound to be lower than end_pos
|
// found a lower bound, set the low bound there and switch to binary search
|
||||||
if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) {
|
found_low = true;
|
||||||
// found a lower bound, set the low bound there and switch to binary search
|
lower_bound = low_pos;
|
||||||
found_low = true;
|
SEEKER_DEBUG_LOG("found_low = true, lower_bound = " << lower_bound);
|
||||||
lower_bound = low_pos;
|
} else {
|
||||||
DLOG("found_low = true, lower_bound = " << lower_bound);
|
// still looking for lower bound
|
||||||
} else {
|
// if probe_pos was begin_pos then we can stop with no result
|
||||||
// still looking for lower bound
|
if (probe_pos == begin_pos) {
|
||||||
// if probe_pos was begin_pos then we can stop with no result
|
SEEKER_DEBUG_LOG("return: probe_pos == begin_pos " << begin_pos);
|
||||||
if (probe_pos == begin_pos) {
|
return begin_pos;
|
||||||
DLOG("return: probe_pos == begin_pos " << begin_pos);
|
|
||||||
return begin_pos;
|
|
||||||
}
|
|
||||||
// double the range size, or use the distance between objects found so far
|
|
||||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
|
||||||
// already checked low_pos <= high_pos above
|
|
||||||
const Pos want_delta = max(upper_bound - probe_pos, min_step);
|
|
||||||
// avoid underflowing the beginning of the search space
|
|
||||||
const Pos have_delta = min(want_delta, probe_pos - begin_pos);
|
|
||||||
THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta);
|
|
||||||
// move probe and try again
|
|
||||||
probe_pos = probe_pos - have_delta;
|
|
||||||
DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")");
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
// double the range size, or use the distance between objects found so far
|
||||||
|
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||||
|
// already checked low_pos <= high_pos above
|
||||||
|
const Pos want_delta = max(upper_bound - probe_pos, min_step);
|
||||||
|
// avoid underflowing the beginning of the search space
|
||||||
|
const Pos have_delta = min(want_delta, probe_pos - begin_pos);
|
||||||
|
THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta);
|
||||||
|
// move probe and try again
|
||||||
|
probe_pos = probe_pos - have_delta;
|
||||||
|
SEEKER_DEBUG_LOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")");
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
if (low_pos <= target_pos && target_pos <= high_pos) {
|
|
||||||
// have keys on either side of target_pos in result
|
|
||||||
// search from the high end until we find the highest key below target
|
|
||||||
for (auto i = result.rbegin(); i != result.rend(); ++i) {
|
|
||||||
// more correctness checking for fetch
|
|
||||||
THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i);
|
|
||||||
if (*i <= target_pos) {
|
|
||||||
DLOG("return: *i " << *i << " <= target_pos " << target_pos);
|
|
||||||
return *i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if the list is empty then low_pos = high_pos = end_pos
|
|
||||||
// if target_pos = end_pos also, then we will execute the loop
|
|
||||||
// above but not find any matching entries.
|
|
||||||
THROW_CHECK0(runtime_error, result.empty());
|
|
||||||
}
|
|
||||||
if (target_pos <= low_pos) {
|
|
||||||
// results are all too high, so probe_pos..low_pos is too high
|
|
||||||
// lower the high bound to the probe pos
|
|
||||||
upper_bound = probe_pos;
|
|
||||||
DLOG("upper_bound = probe_pos " << probe_pos);
|
|
||||||
}
|
|
||||||
if (high_pos < target_pos) {
|
|
||||||
// results are all too low, so probe_pos..high_pos is too low
|
|
||||||
// raise the low bound to the high_pos
|
|
||||||
DLOG("lower_bound = high_pos " << high_pos);
|
|
||||||
lower_bound = high_pos;
|
|
||||||
}
|
|
||||||
// compute a new probe pos at the middle of the range and try again
|
|
||||||
// we can't have a zero-size range here because we would not have set found_low yet
|
|
||||||
THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound);
|
|
||||||
const Pos delta = (upper_bound - lower_bound) / 2;
|
|
||||||
probe_pos = lower_bound + delta;
|
|
||||||
if (delta < 1) {
|
|
||||||
// nothing can exist in the range (lower_bound, upper_bound)
|
|
||||||
// and an object is known to exist at lower_bound
|
|
||||||
DLOG("return: probe_pos == lower_bound " << lower_bound);
|
|
||||||
return lower_bound;
|
|
||||||
}
|
|
||||||
THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos);
|
|
||||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
|
||||||
DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound);
|
|
||||||
}
|
}
|
||||||
THROW_ERROR(runtime_error, "FIXME: should not reach this line: "
|
if (low_pos <= target_pos && target_pos <= high_pos) {
|
||||||
"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", "
|
// have keys on either side of target_pos in result
|
||||||
"found_low " << found_low);
|
// search from the high end until we find the highest key below target
|
||||||
} catch (...) {
|
for (auto i = result.rbegin(); i != result.rend(); ++i) {
|
||||||
DOUT(cerr);
|
// more correctness checking for fetch
|
||||||
throw;
|
THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i);
|
||||||
|
if (*i <= target_pos) {
|
||||||
|
SEEKER_DEBUG_LOG("return: *i " << *i << " <= target_pos " << target_pos);
|
||||||
|
return *i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if the list is empty then low_pos = high_pos = end_pos
|
||||||
|
// if target_pos = end_pos also, then we will execute the loop
|
||||||
|
// above but not find any matching entries.
|
||||||
|
THROW_CHECK0(runtime_error, result.empty());
|
||||||
|
}
|
||||||
|
if (target_pos <= low_pos) {
|
||||||
|
// results are all too high, so probe_pos..low_pos is too high
|
||||||
|
// lower the high bound to the probe pos, low_pos cannot be lower
|
||||||
|
SEEKER_DEBUG_LOG("upper_bound = probe_pos " << probe_pos);
|
||||||
|
upper_bound = probe_pos;
|
||||||
|
}
|
||||||
|
if (high_pos < target_pos) {
|
||||||
|
// results are all too low, so probe_pos..high_pos is too low
|
||||||
|
// raise the low bound to high_pos but not above upper_bound
|
||||||
|
const auto next_pos = min(high_pos, upper_bound);
|
||||||
|
SEEKER_DEBUG_LOG("lower_bound = next_pos " << next_pos);
|
||||||
|
lower_bound = next_pos;
|
||||||
|
}
|
||||||
|
// compute a new probe pos at the middle of the range and try again
|
||||||
|
// we can't have a zero-size range here because we would not have set found_low yet
|
||||||
|
THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound);
|
||||||
|
const Pos delta = (upper_bound - lower_bound) / 2;
|
||||||
|
probe_pos = lower_bound + delta;
|
||||||
|
if (delta < 1) {
|
||||||
|
// nothing can exist in the range (lower_bound, upper_bound)
|
||||||
|
// and an object is known to exist at lower_bound
|
||||||
|
SEEKER_DEBUG_LOG("return: probe_pos == lower_bound " << lower_bound);
|
||||||
|
return lower_bound;
|
||||||
|
}
|
||||||
|
THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos);
|
||||||
|
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||||
|
SEEKER_DEBUG_LOG("loop bottom: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound);
|
||||||
}
|
}
|
||||||
|
THROW_ERROR(runtime_error, "FIXME: should not reach this line: "
|
||||||
|
"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", "
|
||||||
|
"found_low " << found_low);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
106
include/crucible/table.h
Normal file
106
include/crucible/table.h
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
#ifndef CRUCIBLE_TABLE_H
|
||||||
|
#define CRUCIBLE_TABLE_H
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
namespace Table {
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
using Content = function<string(size_t width, size_t height)>;
|
||||||
|
const size_t endpos = numeric_limits<size_t>::max();
|
||||||
|
|
||||||
|
Content Fill(const char c);
|
||||||
|
Content Text(const string& s);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
Content Number(const T& num)
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << num;
|
||||||
|
return Text(oss.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
class Cell {
|
||||||
|
Content m_content;
|
||||||
|
public:
|
||||||
|
Cell(const Content &fn = [](size_t, size_t) { return string(); } );
|
||||||
|
Cell& operator=(const Content &fn);
|
||||||
|
string text(size_t width, size_t height) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Dimension {
|
||||||
|
size_t m_next_pos = 0;
|
||||||
|
vector<size_t> m_elements;
|
||||||
|
friend class Table;
|
||||||
|
size_t at(size_t) const;
|
||||||
|
public:
|
||||||
|
size_t size() const;
|
||||||
|
size_t insert(size_t pos);
|
||||||
|
void erase(size_t pos);
|
||||||
|
};
|
||||||
|
|
||||||
|
class Table {
|
||||||
|
Dimension m_rows, m_cols;
|
||||||
|
map<pair<size_t, size_t>, Cell> m_cells;
|
||||||
|
string m_left = "|";
|
||||||
|
string m_mid = "|";
|
||||||
|
string m_right = "|";
|
||||||
|
public:
|
||||||
|
Dimension &rows();
|
||||||
|
const Dimension& rows() const;
|
||||||
|
Dimension &cols();
|
||||||
|
const Dimension& cols() const;
|
||||||
|
Cell& at(size_t row, size_t col);
|
||||||
|
const Cell& at(size_t row, size_t col) const;
|
||||||
|
template <class T> void insert_row(size_t pos, const T& container);
|
||||||
|
template <class T> void insert_col(size_t pos, const T& container);
|
||||||
|
void left(const string &s);
|
||||||
|
void mid(const string &s);
|
||||||
|
void right(const string &s);
|
||||||
|
const string& left() const;
|
||||||
|
const string& mid() const;
|
||||||
|
const string& right() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
ostream& operator<<(ostream &os, const Table &table);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
Table::insert_row(size_t pos, const T& container)
|
||||||
|
{
|
||||||
|
const auto new_pos = m_rows.insert(pos);
|
||||||
|
size_t col = 0;
|
||||||
|
for (const auto &i : container) {
|
||||||
|
if (col >= cols().size()) {
|
||||||
|
cols().insert(col);
|
||||||
|
}
|
||||||
|
at(new_pos, col++) = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
Table::insert_col(size_t pos, const T& container)
|
||||||
|
{
|
||||||
|
const auto new_pos = m_cols.insert(pos);
|
||||||
|
size_t row = 0;
|
||||||
|
for (const auto &i : container) {
|
||||||
|
if (row >= rows().size()) {
|
||||||
|
rows().insert(row);
|
||||||
|
}
|
||||||
|
at(row++, new_pos) = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CRUCIBLE_TABLE_H
|
@@ -40,10 +40,17 @@ namespace crucible {
|
|||||||
/// after the current instance exits.
|
/// after the current instance exits.
|
||||||
void run() const;
|
void run() const;
|
||||||
|
|
||||||
|
/// Schedule task to run when no other Task is available.
|
||||||
|
void idle() const;
|
||||||
|
|
||||||
/// Schedule Task to run after this Task has run or
|
/// Schedule Task to run after this Task has run or
|
||||||
/// been destroyed.
|
/// been destroyed.
|
||||||
void append(const Task &task) const;
|
void append(const Task &task) const;
|
||||||
|
|
||||||
|
/// Schedule Task to run after this Task has run or
|
||||||
|
/// been destroyed, in Task ID order.
|
||||||
|
void insert(const Task &task) const;
|
||||||
|
|
||||||
/// Describe Task as text.
|
/// Describe Task as text.
|
||||||
string title() const;
|
string title() const;
|
||||||
|
|
||||||
@@ -163,15 +170,12 @@ namespace crucible {
|
|||||||
/// (it is the ExclusionLock that owns the lock, so it can
|
/// (it is the ExclusionLock that owns the lock, so it can
|
||||||
/// be passed to other Tasks or threads, but this is not
|
/// be passed to other Tasks or threads, but this is not
|
||||||
/// recommended practice).
|
/// recommended practice).
|
||||||
/// If not successful, current Task is appended to the
|
/// If not successful, the argument Task is appended to the
|
||||||
/// task that currently holds the lock. Current task is
|
/// task that currently holds the lock. Current task is
|
||||||
/// expected to release any other ExclusionLock
|
/// expected to immediately release any other ExclusionLock
|
||||||
/// objects it holds, and exit its Task function.
|
/// objects it holds, and exit its Task function.
|
||||||
ExclusionLock try_lock(const Task &task);
|
ExclusionLock try_lock(const Task &task);
|
||||||
|
|
||||||
/// Execute Task when Exclusion is unlocked (possibly
|
|
||||||
/// immediately).
|
|
||||||
void insert_task(const Task &t);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Wrapper around pthread_setname_np which handles length limits
|
/// Wrapper around pthread_setname_np which handles length limits
|
||||||
|
@@ -34,7 +34,7 @@ namespace crucible {
|
|||||||
double m_rate;
|
double m_rate;
|
||||||
double m_burst;
|
double m_burst;
|
||||||
double m_tokens = 0.0;
|
double m_tokens = 0.0;
|
||||||
mutex m_mutex;
|
mutable mutex m_mutex;
|
||||||
|
|
||||||
void update_tokens();
|
void update_tokens();
|
||||||
RateLimiter() = delete;
|
RateLimiter() = delete;
|
||||||
@@ -45,6 +45,8 @@ namespace crucible {
|
|||||||
double sleep_time(double cost = 1.0);
|
double sleep_time(double cost = 1.0);
|
||||||
bool is_ready();
|
bool is_ready();
|
||||||
void borrow(double cost = 1.0);
|
void borrow(double cost = 1.0);
|
||||||
|
void rate(double new_rate);
|
||||||
|
double rate() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RateEstimator {
|
class RateEstimator {
|
||||||
@@ -88,6 +90,9 @@ namespace crucible {
|
|||||||
// Read count
|
// Read count
|
||||||
uint64_t count() const;
|
uint64_t count() const;
|
||||||
|
|
||||||
|
/// Increment count (like update(count() + more), but atomic)
|
||||||
|
void increment(uint64_t more = 1);
|
||||||
|
|
||||||
// Convert counts to chrono types
|
// Convert counts to chrono types
|
||||||
chrono::high_resolution_clock::time_point time_point(uint64_t absolute_count) const;
|
chrono::high_resolution_clock::time_point time_point(uint64_t absolute_count) const;
|
||||||
chrono::duration<double> duration(uint64_t relative_count) const;
|
chrono::duration<double> duration(uint64_t relative_count) const;
|
||||||
|
@@ -14,9 +14,12 @@ CRUCIBLE_OBJS = \
|
|||||||
fs.o \
|
fs.o \
|
||||||
multilock.o \
|
multilock.o \
|
||||||
ntoa.o \
|
ntoa.o \
|
||||||
|
openat2.o \
|
||||||
path.o \
|
path.o \
|
||||||
process.o \
|
process.o \
|
||||||
|
seeker.o \
|
||||||
string.o \
|
string.o \
|
||||||
|
table.o \
|
||||||
task.o \
|
task.o \
|
||||||
time.o \
|
time.o \
|
||||||
uname.o \
|
uname.o \
|
||||||
|
@@ -5,6 +5,12 @@
|
|||||||
#include "crucible/hexdump.h"
|
#include "crucible/hexdump.h"
|
||||||
#include "crucible/seeker.h"
|
#include "crucible/seeker.h"
|
||||||
|
|
||||||
|
#define CRUCIBLE_BTRFS_TREE_DEBUG(x) do { \
|
||||||
|
if (BtrfsIoctlSearchKey::s_debug_ostream) { \
|
||||||
|
(*BtrfsIoctlSearchKey::s_debug_ostream) << x; \
|
||||||
|
} \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@@ -22,6 +28,13 @@ namespace crucible {
|
|||||||
return m_objectid + m_offset;
|
return m_objectid + m_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeItem::extent_flags() const
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
|
||||||
|
return btrfs_get_member(&btrfs_extent_item::flags, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
BtrfsTreeItem::extent_generation() const
|
BtrfsTreeItem::extent_generation() const
|
||||||
{
|
{
|
||||||
@@ -61,6 +74,13 @@ namespace crucible {
|
|||||||
return btrfs_get_member(&btrfs_root_item::flags, m_data);
|
return btrfs_get_member(&btrfs_root_item::flags, m_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeItem::root_refs() const
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_ITEM_KEY);
|
||||||
|
return btrfs_get_member(&btrfs_root_item::refs, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const BtrfsTreeItem &bti)
|
operator<<(ostream &os, const BtrfsTreeItem &bti)
|
||||||
{
|
{
|
||||||
@@ -137,6 +157,13 @@ namespace crucible {
|
|||||||
return btrfs_get_member(&btrfs_inode_item::size, m_data);
|
return btrfs_get_member(&btrfs_inode_item::size, m_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeItem::inode_flags() const
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_INODE_ITEM_KEY);
|
||||||
|
return btrfs_get_member(&btrfs_inode_item::flags, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
BtrfsTreeItem::file_extent_logical_bytes() const
|
BtrfsTreeItem::file_extent_logical_bytes() const
|
||||||
{
|
{
|
||||||
@@ -269,12 +296,24 @@ namespace crucible {
|
|||||||
m_type = type;
|
m_type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint8_t
|
||||||
|
BtrfsTreeFetcher::type()
|
||||||
|
{
|
||||||
|
return m_type;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsTreeFetcher::tree(uint64_t tree)
|
BtrfsTreeFetcher::tree(uint64_t tree)
|
||||||
{
|
{
|
||||||
m_tree = tree;
|
m_tree = tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeFetcher::tree()
|
||||||
|
{
|
||||||
|
return m_tree;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
|
BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
|
||||||
{
|
{
|
||||||
@@ -329,6 +368,7 @@ namespace crucible {
|
|||||||
BtrfsTreeItem
|
BtrfsTreeItem
|
||||||
BtrfsTreeFetcher::at(uint64_t logical)
|
BtrfsTreeFetcher::at(uint64_t logical)
|
||||||
{
|
{
|
||||||
|
CRUCIBLE_BTRFS_TREE_DEBUG("at " << logical);
|
||||||
BtrfsIoctlSearchKey &sk = m_sk;
|
BtrfsIoctlSearchKey &sk = m_sk;
|
||||||
fill_sk(sk, logical);
|
fill_sk(sk, logical);
|
||||||
// Exact match, should return 0 or 1 items
|
// Exact match, should return 0 or 1 items
|
||||||
@@ -371,53 +411,59 @@ namespace crucible {
|
|||||||
BtrfsTreeFetcher::rlower_bound(uint64_t logical)
|
BtrfsTreeFetcher::rlower_bound(uint64_t logical)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
#define BTFRLB_DEBUG(x) do { cerr << x; } while (false)
|
static bool btfrlb_debug = getenv("BTFLRB_DEBUG");
|
||||||
|
#define BTFRLB_DEBUG(x) do { if (btfrlb_debug) cerr << x; } while (false)
|
||||||
#else
|
#else
|
||||||
#define BTFRLB_DEBUG(x) do { } while (false)
|
#define BTFRLB_DEBUG(x) CRUCIBLE_BTRFS_TREE_DEBUG(x)
|
||||||
#endif
|
#endif
|
||||||
BtrfsTreeItem closest_item;
|
BtrfsTreeItem closest_item;
|
||||||
uint64_t closest_logical = 0;
|
uint64_t closest_logical = 0;
|
||||||
BtrfsIoctlSearchKey &sk = m_sk;
|
BtrfsIoctlSearchKey &sk = m_sk;
|
||||||
size_t loops = 0;
|
size_t loops = 0;
|
||||||
BTFRLB_DEBUG("rlower_bound: " << to_hex(logical) << endl);
|
BTFRLB_DEBUG("rlower_bound: " << to_hex(logical) << " in tree " << tree() << endl);
|
||||||
seek_backward(scale_logical(logical), [&](uint64_t lower_bound, uint64_t upper_bound) {
|
seek_backward(scale_logical(logical), [&](uint64_t const lower_bound, uint64_t const upper_bound) {
|
||||||
++loops;
|
++loops;
|
||||||
fill_sk(sk, unscale_logical(min(scaled_max_logical(), lower_bound)));
|
fill_sk(sk, unscale_logical(min(scaled_max_logical(), lower_bound)));
|
||||||
set<uint64_t> rv;
|
set<uint64_t> rv;
|
||||||
|
bool too_far = false;
|
||||||
do {
|
do {
|
||||||
sk.nr_items = 4;
|
sk.nr_items = 4;
|
||||||
sk.do_ioctl(fd());
|
sk.do_ioctl(fd());
|
||||||
BTFRLB_DEBUG("fetch: loop " << loops << " lower_bound..upper_bound " << to_hex(lower_bound) << ".." << to_hex(upper_bound));
|
BTFRLB_DEBUG("fetch: loop " << loops << " lower_bound..upper_bound " << to_hex(lower_bound) << ".." << to_hex(upper_bound));
|
||||||
for (auto &i : sk.m_result) {
|
for (auto &i : sk.m_result) {
|
||||||
next_sk(sk, i);
|
next_sk(sk, i);
|
||||||
const auto this_logical = hdr_logical(i);
|
// If hdr_stop or !hdr_match, don't inspect the item
|
||||||
const auto scaled_hdr_logical = scale_logical(this_logical);
|
if (hdr_stop(i)) {
|
||||||
BTFRLB_DEBUG(" " << to_hex(scaled_hdr_logical));
|
too_far = true;
|
||||||
if (hdr_match(i)) {
|
rv.insert(numeric_limits<uint64_t>::max());
|
||||||
if (this_logical <= logical && this_logical > closest_logical) {
|
BTFRLB_DEBUG("(stop)");
|
||||||
closest_logical = this_logical;
|
|
||||||
closest_item = i;
|
|
||||||
}
|
|
||||||
BTFRLB_DEBUG("(match)");
|
|
||||||
rv.insert(scaled_hdr_logical);
|
|
||||||
}
|
|
||||||
if (scaled_hdr_logical > upper_bound || hdr_stop(i)) {
|
|
||||||
if (scaled_hdr_logical >= upper_bound) {
|
|
||||||
BTFRLB_DEBUG("(" << to_hex(scaled_hdr_logical) << " >= " << to_hex(upper_bound) << ")");
|
|
||||||
}
|
|
||||||
if (hdr_stop(i)) {
|
|
||||||
rv.insert(numeric_limits<uint64_t>::max());
|
|
||||||
BTFRLB_DEBUG("(stop)");
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
} else {
|
|
||||||
BTFRLB_DEBUG("(cont'd)");
|
|
||||||
}
|
}
|
||||||
|
if (!hdr_match(i)) {
|
||||||
|
BTFRLB_DEBUG("(no match)");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto this_logical = hdr_logical(i);
|
||||||
|
BTFRLB_DEBUG(" " << to_hex(this_logical) << " " << i);
|
||||||
|
const auto scaled_hdr_logical = scale_logical(this_logical);
|
||||||
|
BTFRLB_DEBUG(" " << "(match)");
|
||||||
|
if (scaled_hdr_logical > upper_bound) {
|
||||||
|
too_far = true;
|
||||||
|
BTFRLB_DEBUG("(" << to_hex(scaled_hdr_logical) << " >= " << to_hex(upper_bound) << ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (this_logical <= logical && this_logical > closest_logical) {
|
||||||
|
closest_logical = this_logical;
|
||||||
|
closest_item = i;
|
||||||
|
BTFRLB_DEBUG("(closest)");
|
||||||
|
}
|
||||||
|
rv.insert(scaled_hdr_logical);
|
||||||
|
BTFRLB_DEBUG("(cont'd)");
|
||||||
}
|
}
|
||||||
BTFRLB_DEBUG(endl);
|
BTFRLB_DEBUG(endl);
|
||||||
// We might get a search result that contains only non-matching items.
|
// We might get a search result that contains only non-matching items.
|
||||||
// Keep looping until we find any matching item or we run out of tree.
|
// Keep looping until we find any matching item or we run out of tree.
|
||||||
} while (rv.empty() && !sk.m_result.empty());
|
} while (!too_far && rv.empty() && !sk.m_result.empty());
|
||||||
return rv;
|
return rv;
|
||||||
}, scale_logical(lookbehind_size()));
|
}, scale_logical(lookbehind_size()));
|
||||||
return closest_item;
|
return closest_item;
|
||||||
@@ -448,6 +494,7 @@ namespace crucible {
|
|||||||
BtrfsTreeItem
|
BtrfsTreeItem
|
||||||
BtrfsTreeFetcher::next(uint64_t logical)
|
BtrfsTreeFetcher::next(uint64_t logical)
|
||||||
{
|
{
|
||||||
|
CRUCIBLE_BTRFS_TREE_DEBUG("next " << logical);
|
||||||
const auto scaled_logical = scale_logical(logical);
|
const auto scaled_logical = scale_logical(logical);
|
||||||
if (scaled_logical + 1 > scaled_max_logical()) {
|
if (scaled_logical + 1 > scaled_max_logical()) {
|
||||||
return BtrfsTreeItem();
|
return BtrfsTreeItem();
|
||||||
@@ -458,6 +505,7 @@ namespace crucible {
|
|||||||
BtrfsTreeItem
|
BtrfsTreeItem
|
||||||
BtrfsTreeFetcher::prev(uint64_t logical)
|
BtrfsTreeFetcher::prev(uint64_t logical)
|
||||||
{
|
{
|
||||||
|
CRUCIBLE_BTRFS_TREE_DEBUG("prev " << logical);
|
||||||
const auto scaled_logical = scale_logical(logical);
|
const auto scaled_logical = scale_logical(logical);
|
||||||
if (scaled_logical < 1) {
|
if (scaled_logical < 1) {
|
||||||
return BtrfsTreeItem();
|
return BtrfsTreeItem();
|
||||||
@@ -542,13 +590,14 @@ namespace crucible {
|
|||||||
BtrfsCsumTreeFetcher::get_sums(uint64_t const logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t bytes)> output)
|
BtrfsCsumTreeFetcher::get_sums(uint64_t const logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t bytes)> output)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
#define BCTFGS_DEBUG(x) do { cerr << x; } while (false)
|
static bool bctfgs_debug = getenv("BCTFGS_DEBUG");
|
||||||
|
#define BCTFGS_DEBUG(x) do { if (bctfgs_debug) cerr << x; } while (false)
|
||||||
#else
|
#else
|
||||||
#define BCTFGS_DEBUG(x) do { } while (false)
|
#define BCTFGS_DEBUG(x) CRUCIBLE_BTRFS_TREE_DEBUG(x)
|
||||||
#endif
|
#endif
|
||||||
const uint64_t logical_end = logical + count * block_size();
|
const uint64_t logical_end = logical + count * block_size();
|
||||||
BtrfsTreeItem bti = rlower_bound(logical);
|
BtrfsTreeItem bti = rlower_bound(logical);
|
||||||
size_t loops = 0;
|
size_t __attribute__((unused)) loops = 0;
|
||||||
BCTFGS_DEBUG("get_sums " << to_hex(logical) << ".." << to_hex(logical_end) << endl);
|
BCTFGS_DEBUG("get_sums " << to_hex(logical) << ".." << to_hex(logical_end) << endl);
|
||||||
while (!!bti) {
|
while (!!bti) {
|
||||||
BCTFGS_DEBUG("get_sums[" << loops << "]: " << bti << endl);
|
BCTFGS_DEBUG("get_sums[" << loops << "]: " << bti << endl);
|
||||||
@@ -636,14 +685,6 @@ namespace crucible {
|
|||||||
type(BTRFS_EXTENT_DATA_KEY);
|
type(BTRFS_EXTENT_DATA_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsFsTreeFetcher::BtrfsFsTreeFetcher(const Fd &new_fd, uint64_t subvol) :
|
|
||||||
BtrfsTreeObjectFetcher(new_fd)
|
|
||||||
{
|
|
||||||
tree(subvol);
|
|
||||||
type(BTRFS_EXTENT_DATA_KEY);
|
|
||||||
scale_size(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
BtrfsInodeFetcher::BtrfsInodeFetcher(const Fd &fd) :
|
BtrfsInodeFetcher::BtrfsInodeFetcher(const Fd &fd) :
|
||||||
BtrfsTreeObjectFetcher(fd)
|
BtrfsTreeObjectFetcher(fd)
|
||||||
{
|
{
|
||||||
@@ -667,18 +708,86 @@ namespace crucible {
|
|||||||
BtrfsTreeObjectFetcher(fd)
|
BtrfsTreeObjectFetcher(fd)
|
||||||
{
|
{
|
||||||
tree(BTRFS_ROOT_TREE_OBJECTID);
|
tree(BTRFS_ROOT_TREE_OBJECTID);
|
||||||
type(BTRFS_ROOT_ITEM_KEY);
|
|
||||||
scale_size(1);
|
scale_size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsTreeItem
|
BtrfsTreeItem
|
||||||
BtrfsRootFetcher::root(uint64_t subvol)
|
BtrfsRootFetcher::root(const uint64_t subvol)
|
||||||
{
|
{
|
||||||
|
const auto my_type = BTRFS_ROOT_ITEM_KEY;
|
||||||
|
type(my_type);
|
||||||
const auto item = at(subvol);
|
const auto item = at(subvol);
|
||||||
if (!!item) {
|
if (!!item) {
|
||||||
THROW_CHECK2(runtime_error, item.objectid(), subvol, subvol == item.objectid());
|
THROW_CHECK2(runtime_error, item.objectid(), subvol, subvol == item.objectid());
|
||||||
THROW_CHECK2(runtime_error, item.type(), BTRFS_ROOT_ITEM_KEY, item.type() == BTRFS_ROOT_ITEM_KEY);
|
THROW_CHECK2(runtime_error, item.type(), my_type, item.type() == my_type);
|
||||||
}
|
}
|
||||||
return item;
|
return item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BtrfsTreeItem
|
||||||
|
BtrfsRootFetcher::root_backref(const uint64_t subvol)
|
||||||
|
{
|
||||||
|
const auto my_type = BTRFS_ROOT_BACKREF_KEY;
|
||||||
|
type(my_type);
|
||||||
|
const auto item = at(subvol);
|
||||||
|
if (!!item) {
|
||||||
|
THROW_CHECK2(runtime_error, item.objectid(), subvol, subvol == item.objectid());
|
||||||
|
THROW_CHECK2(runtime_error, item.type(), my_type, item.type() == my_type);
|
||||||
|
}
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
|
BtrfsDataExtentTreeFetcher::BtrfsDataExtentTreeFetcher(const Fd &fd) :
|
||||||
|
BtrfsExtentItemFetcher(fd),
|
||||||
|
m_chunk_tree(fd)
|
||||||
|
{
|
||||||
|
tree(BTRFS_EXTENT_TREE_OBJECTID);
|
||||||
|
type(BTRFS_EXTENT_ITEM_KEY);
|
||||||
|
m_chunk_tree.tree(BTRFS_CHUNK_TREE_OBJECTID);
|
||||||
|
m_chunk_tree.type(BTRFS_CHUNK_ITEM_KEY);
|
||||||
|
m_chunk_tree.objectid(BTRFS_FIRST_CHUNK_TREE_OBJECTID);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BtrfsDataExtentTreeFetcher::next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr)
|
||||||
|
{
|
||||||
|
key.min_type = key.max_type = type();
|
||||||
|
key.max_objectid = key.max_offset = numeric_limits<uint64_t>::max();
|
||||||
|
key.min_offset = 0;
|
||||||
|
key.min_objectid = hdr.objectid;
|
||||||
|
const auto step = scale_size();
|
||||||
|
if (key.min_objectid < numeric_limits<uint64_t>::max() - step) {
|
||||||
|
key.min_objectid += step;
|
||||||
|
} else {
|
||||||
|
key.min_objectid = numeric_limits<uint64_t>::max();
|
||||||
|
}
|
||||||
|
// If we're still in our current block group, check here
|
||||||
|
if (!!m_current_bg) {
|
||||||
|
const auto bg_begin = m_current_bg.offset();
|
||||||
|
const auto bg_end = bg_begin + m_current_bg.chunk_length();
|
||||||
|
// If we are still in our current block group, return early
|
||||||
|
if (key.min_objectid >= bg_begin && key.min_objectid < bg_end) return;
|
||||||
|
}
|
||||||
|
// We don't have a current block group or we're out of range
|
||||||
|
// Find the chunk that this bytenr belongs to
|
||||||
|
m_current_bg = m_chunk_tree.rlower_bound(key.min_objectid);
|
||||||
|
// Make sure it's a data block group
|
||||||
|
while (!!m_current_bg) {
|
||||||
|
// Data block group, stop here
|
||||||
|
if (m_current_bg.chunk_type() & BTRFS_BLOCK_GROUP_DATA) break;
|
||||||
|
// Not a data block group, skip to end
|
||||||
|
key.min_objectid = m_current_bg.offset() + m_current_bg.chunk_length();
|
||||||
|
m_current_bg = m_chunk_tree.lower_bound(key.min_objectid);
|
||||||
|
}
|
||||||
|
if (!m_current_bg) {
|
||||||
|
// Ran out of data block groups, stop here
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Check to see if bytenr is in the current data block group
|
||||||
|
const auto bg_begin = m_current_bg.offset();
|
||||||
|
if (key.min_objectid < bg_begin) {
|
||||||
|
// Move forward to start of data block group
|
||||||
|
key.min_objectid = bg_begin;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -44,10 +44,10 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ByteVector::value_type&
|
ByteVector::value_type&
|
||||||
ByteVector::operator[](size_t size) const
|
ByteVector::operator[](size_t index) const
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
return m_ptr.get()[size];
|
return m_ptr.get()[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
ByteVector::ByteVector(const ByteVector &that)
|
ByteVector::ByteVector(const ByteVector &that)
|
||||||
@@ -183,7 +183,6 @@ namespace crucible {
|
|||||||
|
|
||||||
ostream&
|
ostream&
|
||||||
operator<<(ostream &os, const ByteVector &bv) {
|
operator<<(ostream &os, const ByteVector &bv) {
|
||||||
unique_lock<mutex> lock(bv.m_mutex);
|
|
||||||
hexdump(os, bv);
|
hexdump(os, bv);
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
@@ -76,7 +76,7 @@ namespace crucible {
|
|||||||
DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", <m));
|
DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", <m));
|
||||||
|
|
||||||
header_stream << buf;
|
header_stream << buf;
|
||||||
header_stream << " " << getpid() << "." << crucible::gettid();
|
header_stream << " " << getpid() << "." << gettid();
|
||||||
if (add_prefix_level) {
|
if (add_prefix_level) {
|
||||||
header_stream << "<" << m_loglevel << ">";
|
header_stream << "<" << m_loglevel << ">";
|
||||||
}
|
}
|
||||||
@@ -88,7 +88,7 @@ namespace crucible {
|
|||||||
header_stream << "<" << m_loglevel << ">";
|
header_stream << "<" << m_loglevel << ">";
|
||||||
}
|
}
|
||||||
header_stream << (m_name.empty() ? "thread" : m_name);
|
header_stream << (m_name.empty() ? "thread" : m_name);
|
||||||
header_stream << "[" << crucible::gettid() << "]";
|
header_stream << "[" << gettid() << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
header_stream << ": ";
|
header_stream << ": ";
|
||||||
|
110
lib/fs.cc
110
lib/fs.cc
@@ -159,12 +159,13 @@ namespace crucible {
|
|||||||
{
|
{
|
||||||
THROW_CHECK1(invalid_argument, src_length, src_length > 0);
|
THROW_CHECK1(invalid_argument, src_length, src_length > 0);
|
||||||
while (src_length > 0) {
|
while (src_length > 0) {
|
||||||
off_t length = min(off_t(BTRFS_MAX_DEDUPE_LEN), src_length);
|
BtrfsExtentSame bes(src_fd, src_offset, src_length);
|
||||||
BtrfsExtentSame bes(src_fd, src_offset, length);
|
|
||||||
bes.add(dst_fd, dst_offset);
|
bes.add(dst_fd, dst_offset);
|
||||||
bes.do_ioctl();
|
bes.do_ioctl();
|
||||||
auto status = bes.m_info.at(0).status;
|
const auto status = bes.m_info.at(0).status;
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
|
const off_t length = bes.m_info.at(0).bytes_deduped;
|
||||||
|
THROW_CHECK0(invalid_argument, length > 0);
|
||||||
src_offset += length;
|
src_offset += length;
|
||||||
dst_offset += length;
|
dst_offset += length;
|
||||||
src_length -= length;
|
src_length -= length;
|
||||||
@@ -315,13 +316,25 @@ namespace crucible {
|
|||||||
return m_flags;
|
return m_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BtrfsIoctlLogicalInoArgs::set_logical(uint64_t new_logical)
|
||||||
|
{
|
||||||
|
m_logical = new_logical;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BtrfsIoctlLogicalInoArgs::set_size(uint64_t new_size)
|
||||||
|
{
|
||||||
|
m_container_size = new_size;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
||||||
{
|
{
|
||||||
btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
|
btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
|
||||||
.logical = m_logical,
|
.logical = m_logical,
|
||||||
.size = m_container_size,
|
.size = m_container_size,
|
||||||
.inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size)),
|
.inodes = reinterpret_cast<uintptr_t>(m_container.prepare(m_container_size)),
|
||||||
};
|
};
|
||||||
// We are still supporting building with old headers that don't have .flags yet
|
// We are still supporting building with old headers that don't have .flags yet
|
||||||
*(&args.reserved[0] + 3) = m_flags;
|
*(&args.reserved[0] + 3) = m_flags;
|
||||||
@@ -404,7 +417,7 @@ namespace crucible {
|
|||||||
{
|
{
|
||||||
btrfs_ioctl_ino_path_args *p = static_cast<btrfs_ioctl_ino_path_args *>(this);
|
btrfs_ioctl_ino_path_args *p = static_cast<btrfs_ioctl_ino_path_args *>(this);
|
||||||
BtrfsDataContainer container(m_container_size);
|
BtrfsDataContainer container(m_container_size);
|
||||||
fspath = reinterpret_cast<uint64_t>(container.prepare(m_container_size));
|
fspath = reinterpret_cast<uintptr_t>(container.prepare(m_container_size));
|
||||||
size = container.get_size();
|
size = container.get_size();
|
||||||
|
|
||||||
m_paths.clear();
|
m_paths.clear();
|
||||||
@@ -741,6 +754,11 @@ namespace crucible {
|
|||||||
return offset + len;
|
return offset + len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_calls = 0;
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_loops = 0;
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_loops_empty = 0;
|
||||||
|
thread_local shared_ptr<ostream> BtrfsIoctlSearchKey::s_debug_ostream;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
||||||
{
|
{
|
||||||
@@ -759,8 +777,17 @@ namespace crucible {
|
|||||||
ioctl_ptr = ioctl_arg.get<btrfs_ioctl_search_args_v2>();
|
ioctl_ptr = ioctl_arg.get<btrfs_ioctl_search_args_v2>();
|
||||||
ioctl_ptr->key = static_cast<const btrfs_ioctl_search_key&>(*this);
|
ioctl_ptr->key = static_cast<const btrfs_ioctl_search_key&>(*this);
|
||||||
ioctl_ptr->buf_size = buf_size;
|
ioctl_ptr->buf_size = buf_size;
|
||||||
|
if (s_debug_ostream) {
|
||||||
|
(*s_debug_ostream) << "bisk " << (ioctl_ptr->key) << "\n";
|
||||||
|
}
|
||||||
// Don't bother supporting V1. Kernels that old have other problems.
|
// Don't bother supporting V1. Kernels that old have other problems.
|
||||||
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
||||||
|
++s_calls;
|
||||||
|
if (rv != 0 && errno == ENOENT) {
|
||||||
|
// If we are searching a tree that is deleted or no longer exists, just return an empty list
|
||||||
|
ioctl_ptr->key.nr_items = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (rv != 0 && errno != EOVERFLOW) {
|
if (rv != 0 && errno != EOVERFLOW) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -782,6 +809,10 @@ namespace crucible {
|
|||||||
buf_size *= 2;
|
buf_size *= 2;
|
||||||
}
|
}
|
||||||
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
||||||
|
++s_loops;
|
||||||
|
if (ioctl_ptr->key.nr_items == 0) {
|
||||||
|
++s_loops_empty;
|
||||||
|
}
|
||||||
} while (buf_size < 65536);
|
} while (buf_size < 65536);
|
||||||
|
|
||||||
// ioctl changes nr_items, this has to be copied back
|
// ioctl changes nr_items, this has to be copied back
|
||||||
@@ -854,6 +885,26 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string
|
||||||
|
btrfs_chunk_type_ntoa(uint64_t type)
|
||||||
|
{
|
||||||
|
static const bits_ntoa_table table[] = {
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_DATA),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_METADATA),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_SYSTEM),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_DUP),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID0),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID1),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID10),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID1C3),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID1C4),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID5),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_BLOCK_GROUP_RAID6),
|
||||||
|
NTOA_TABLE_ENTRY_END()
|
||||||
|
};
|
||||||
|
return bits_ntoa(type, table);
|
||||||
|
}
|
||||||
|
|
||||||
string
|
string
|
||||||
btrfs_search_type_ntoa(unsigned type)
|
btrfs_search_type_ntoa(unsigned type)
|
||||||
{
|
{
|
||||||
@@ -881,15 +932,9 @@ namespace crucible {
|
|||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_SHARED_BLOCK_REF_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_SHARED_BLOCK_REF_KEY),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_SHARED_DATA_REF_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_SHARED_DATA_REF_KEY),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_BLOCK_GROUP_ITEM_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_BLOCK_GROUP_ITEM_KEY),
|
||||||
#ifdef BTRFS_FREE_SPACE_INFO_KEY
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_INFO_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_INFO_KEY),
|
||||||
#endif
|
|
||||||
#ifdef BTRFS_FREE_SPACE_EXTENT_KEY
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_EXTENT_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_EXTENT_KEY),
|
||||||
#endif
|
|
||||||
#ifdef BTRFS_FREE_SPACE_BITMAP_KEY
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_BITMAP_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_BITMAP_KEY),
|
||||||
#endif
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_DEV_EXTENT_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_DEV_EXTENT_KEY),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_DEV_ITEM_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_DEV_ITEM_KEY),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_CHUNK_ITEM_KEY),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_CHUNK_ITEM_KEY),
|
||||||
@@ -921,9 +966,7 @@ namespace crucible {
|
|||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_CSUM_TREE_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_CSUM_TREE_OBJECTID),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_QUOTA_TREE_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_QUOTA_TREE_OBJECTID),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_UUID_TREE_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_UUID_TREE_OBJECTID),
|
||||||
#ifdef BTRFS_FREE_SPACE_TREE_OBJECTID
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_TREE_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_FREE_SPACE_TREE_OBJECTID),
|
||||||
#endif
|
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_BALANCE_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_BALANCE_OBJECTID),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_ORPHAN_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_ORPHAN_OBJECTID),
|
||||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_TREE_LOG_OBJECTID),
|
NTOA_TABLE_ENTRY_ENUM(BTRFS_TREE_LOG_OBJECTID),
|
||||||
@@ -944,6 +987,28 @@ namespace crucible {
|
|||||||
return bits_ntoa(objectid, table);
|
return bits_ntoa(objectid, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string
|
||||||
|
btrfs_inode_flags_ntoa(uint64_t const inode_flags)
|
||||||
|
{
|
||||||
|
static const bits_ntoa_table table[] = {
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_NODATASUM),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_NODATACOW),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_READONLY),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_NOCOMPRESS),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_PREALLOC),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_SYNC),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_IMMUTABLE),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_APPEND),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_NODUMP),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_NOATIME),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_DIRSYNC),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_COMPRESS),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(BTRFS_INODE_ROOT_ITEM_INIT),
|
||||||
|
NTOA_TABLE_ENTRY_END()
|
||||||
|
};
|
||||||
|
return bits_ntoa(inode_flags, table);
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const btrfs_ioctl_search_key &key)
|
operator<<(ostream &os, const btrfs_ioctl_search_key &key)
|
||||||
{
|
{
|
||||||
@@ -1111,11 +1176,17 @@ namespace crucible {
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
bool
|
||||||
BtrfsIoctlFsInfoArgs::do_ioctl(int fd)
|
BtrfsIoctlFsInfoArgs::do_ioctl_nothrow(int const fd)
|
||||||
{
|
{
|
||||||
btrfs_ioctl_fs_info_args_v3 *p = static_cast<btrfs_ioctl_fs_info_args_v3 *>(this);
|
btrfs_ioctl_fs_info_args_v3 *p = static_cast<btrfs_ioctl_fs_info_args_v3 *>(this);
|
||||||
if (ioctl(fd, BTRFS_IOC_FS_INFO, p)) {
|
return 0 == ioctl(fd, BTRFS_IOC_FS_INFO, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BtrfsIoctlFsInfoArgs::do_ioctl(int const fd)
|
||||||
|
{
|
||||||
|
if (!do_ioctl_nothrow(fd)) {
|
||||||
THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
|
THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1132,6 +1203,13 @@ namespace crucible {
|
|||||||
return this->btrfs_ioctl_fs_info_args_v3::csum_size;
|
return this->btrfs_ioctl_fs_info_args_v3::csum_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vector<uint8_t>
|
||||||
|
BtrfsIoctlFsInfoArgs::fsid() const
|
||||||
|
{
|
||||||
|
const auto begin = btrfs_ioctl_fs_info_args_v3::fsid;
|
||||||
|
return vector<uint8_t>(begin, begin + BTRFS_FSID_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
BtrfsIoctlFsInfoArgs::generation() const
|
BtrfsIoctlFsInfoArgs::generation() const
|
||||||
{
|
{
|
||||||
|
@@ -62,11 +62,22 @@ namespace crucible {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static MultiLocker s_process_instance;
|
||||||
|
|
||||||
shared_ptr<MultiLocker::LockHandle>
|
shared_ptr<MultiLocker::LockHandle>
|
||||||
MultiLocker::get_lock(const string &type)
|
MultiLocker::get_lock(const string &type)
|
||||||
{
|
{
|
||||||
static MultiLocker s_process_instance;
|
if (s_process_instance.m_do_locking) {
|
||||||
return s_process_instance.get_lock_private(type);
|
return s_process_instance.get_lock_private(type);
|
||||||
|
} else {
|
||||||
|
return shared_ptr<MultiLocker::LockHandle>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
MultiLocker::enable_locking(const bool enabled)
|
||||||
|
{
|
||||||
|
s_process_instance.m_do_locking = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
31
lib/openat2.cc
Normal file
31
lib/openat2.cc
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "crucible/openat2.h"
|
||||||
|
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
|
||||||
|
// Compatibility for building on old libc for new kernel
|
||||||
|
|
||||||
|
// Every arch that defines this (so far) uses 437, except Alpha, where 437 is
|
||||||
|
// mq_getsetattr.
|
||||||
|
|
||||||
|
#ifndef SYS_openat2
|
||||||
|
#ifdef __alpha__
|
||||||
|
#define SYS_openat2 547
|
||||||
|
#else
|
||||||
|
#define SYS_openat2 437
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
int
|
||||||
|
__attribute__((weak))
|
||||||
|
openat2(int const dirfd, const char *const pathname, struct open_how *const how, size_t const size)
|
||||||
|
throw()
|
||||||
|
{
|
||||||
|
return syscall(SYS_openat2, dirfd, pathname, how, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
@@ -7,13 +7,18 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
// for gettid()
|
|
||||||
#ifndef _GNU_SOURCE
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#endif
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
pid_t
|
||||||
|
__attribute__((weak))
|
||||||
|
gettid() throw()
|
||||||
|
{
|
||||||
|
return syscall(SYS_gettid);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@@ -111,12 +116,6 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pid_t
|
|
||||||
gettid()
|
|
||||||
{
|
|
||||||
return syscall(SYS_gettid);
|
|
||||||
}
|
|
||||||
|
|
||||||
double
|
double
|
||||||
getloadavg1()
|
getloadavg1()
|
||||||
{
|
{
|
||||||
|
7
lib/seeker.cc
Normal file
7
lib/seeker.cc
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#include "crucible/seeker.h"
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
|
||||||
|
thread_local shared_ptr<ostream> tl_seeker_debug_str;
|
||||||
|
|
||||||
|
};
|
254
lib/table.cc
Normal file
254
lib/table.cc
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
#include "crucible/table.h"
|
||||||
|
|
||||||
|
#include "crucible/string.h"
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
namespace Table {
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
Content
|
||||||
|
Fill(const char c)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
string rv;
|
||||||
|
while (height--) {
|
||||||
|
rv += string(width, c);
|
||||||
|
if (height) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Content
|
||||||
|
Text(const string &s)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
const auto lines = split("\n", s);
|
||||||
|
string rv;
|
||||||
|
size_t line_count = 0;
|
||||||
|
for (const auto &i : lines) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
if (i.length() < width) {
|
||||||
|
rv += string(width - i.length(), ' ');
|
||||||
|
}
|
||||||
|
rv += i;
|
||||||
|
}
|
||||||
|
while (line_count < height) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
rv += string(width, ' ');
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Content
|
||||||
|
Number(const string &s)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
const auto lines = split("\n", s);
|
||||||
|
string rv;
|
||||||
|
size_t line_count = 0;
|
||||||
|
for (const auto &i : lines) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
if (i.length() < width) {
|
||||||
|
rv += string(width - i.length(), ' ');
|
||||||
|
}
|
||||||
|
rv += i;
|
||||||
|
}
|
||||||
|
while (line_count < height) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
rv += string(width, ' ');
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Cell::Cell(const Content &fn) :
|
||||||
|
m_content(fn)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Cell&
|
||||||
|
Cell::operator=(const Content &fn)
|
||||||
|
{
|
||||||
|
m_content = fn;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
string
|
||||||
|
Cell::text(size_t width, size_t height) const
|
||||||
|
{
|
||||||
|
return m_content(width, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::size() const
|
||||||
|
{
|
||||||
|
return m_elements.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::insert(size_t pos)
|
||||||
|
{
|
||||||
|
++m_next_pos;
|
||||||
|
const auto insert_pos = min(m_elements.size(), pos);
|
||||||
|
const auto it = m_elements.begin() + insert_pos;
|
||||||
|
m_elements.insert(it, m_next_pos);
|
||||||
|
return insert_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Dimension::erase(size_t pos)
|
||||||
|
{
|
||||||
|
const auto it = m_elements.begin() + min(m_elements.size(), pos);
|
||||||
|
m_elements.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::at(size_t pos) const
|
||||||
|
{
|
||||||
|
return m_elements.at(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
Dimension&
|
||||||
|
Table::rows()
|
||||||
|
{
|
||||||
|
return m_rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Dimension&
|
||||||
|
Table::rows() const
|
||||||
|
{
|
||||||
|
return m_rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
Dimension&
|
||||||
|
Table::cols()
|
||||||
|
{
|
||||||
|
return m_cols;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Dimension&
|
||||||
|
Table::cols() const
|
||||||
|
{
|
||||||
|
return m_cols;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Cell&
|
||||||
|
Table::at(size_t row, size_t col) const
|
||||||
|
{
|
||||||
|
const auto row_idx = m_rows.at(row);
|
||||||
|
const auto col_idx = m_cols.at(col);
|
||||||
|
const auto found = m_cells.find(make_pair(row_idx, col_idx));
|
||||||
|
if (found == m_cells.end()) {
|
||||||
|
static const Cell s_empty(Fill('.'));
|
||||||
|
return s_empty;
|
||||||
|
}
|
||||||
|
return found->second;
|
||||||
|
};
|
||||||
|
|
||||||
|
Cell&
|
||||||
|
Table::at(size_t row, size_t col)
|
||||||
|
{
|
||||||
|
const auto row_idx = m_rows.at(row);
|
||||||
|
const auto col_idx = m_cols.at(col);
|
||||||
|
return m_cells[make_pair(row_idx, col_idx)];
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
pair<size_t, size_t>
|
||||||
|
text_size(const string &s)
|
||||||
|
{
|
||||||
|
const auto s_split = split("\n", s);
|
||||||
|
size_t width = 0;
|
||||||
|
for (const auto &i : s_split) {
|
||||||
|
width = max(width, i.length());
|
||||||
|
}
|
||||||
|
return make_pair(width, s_split.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
ostream& operator<<(ostream &os, const Table &table)
|
||||||
|
{
|
||||||
|
const auto rows = table.rows().size();
|
||||||
|
const auto cols = table.cols().size();
|
||||||
|
vector<size_t> row_heights(rows, 1);
|
||||||
|
vector<size_t> col_widths(cols, 1);
|
||||||
|
// Get the size of all fixed- and minimum-sized content cells
|
||||||
|
for (size_t row = 0; row < table.rows().size(); ++row) {
|
||||||
|
vector<string> col_text;
|
||||||
|
for (size_t col = 0; col < table.cols().size(); ++col) {
|
||||||
|
col_text.push_back(table.at(row, col).text(0, 0));
|
||||||
|
const auto tsize = text_size(*col_text.rbegin());
|
||||||
|
row_heights[row] = max(row_heights[row], tsize.second);
|
||||||
|
col_widths[col] = max(col_widths[col], tsize.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Render the table
|
||||||
|
for (size_t row = 0; row < table.rows().size(); ++row) {
|
||||||
|
vector<string> lines(row_heights[row], "");
|
||||||
|
for (size_t col = 0; col < table.cols().size(); ++col) {
|
||||||
|
const auto& table_cell = table.at(row, col);
|
||||||
|
const auto table_text = table_cell.text(col_widths[col], row_heights[row]);
|
||||||
|
auto col_lines = split("\n", table_text);
|
||||||
|
col_lines.resize(row_heights[row], "");
|
||||||
|
for (size_t line = 0; line < row_heights[row]; ++line) {
|
||||||
|
if (col > 0) {
|
||||||
|
lines[line] += table.mid();
|
||||||
|
}
|
||||||
|
lines[line] += col_lines[line];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto &line : lines) {
|
||||||
|
os << table.left() << line << table.right() << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::left(const string &s)
|
||||||
|
{
|
||||||
|
m_left = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::mid(const string &s)
|
||||||
|
{
|
||||||
|
m_mid = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::right(const string &s)
|
||||||
|
{
|
||||||
|
m_right = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::left() const
|
||||||
|
{
|
||||||
|
return m_left;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::mid() const
|
||||||
|
{
|
||||||
|
return m_mid;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::right() const
|
||||||
|
{
|
||||||
|
return m_right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
187
lib/task.cc
187
lib/task.cc
@@ -76,13 +76,24 @@ namespace crucible {
|
|||||||
/// Tasks to be executed after the current task is executed
|
/// Tasks to be executed after the current task is executed
|
||||||
list<TaskStatePtr> m_post_exec_queue;
|
list<TaskStatePtr> m_post_exec_queue;
|
||||||
|
|
||||||
/// Set by run() and append(). Cleared by exec().
|
/// Set by run(), append(), and insert(). Cleared by exec().
|
||||||
bool m_run_now = false;
|
bool m_run_now = false;
|
||||||
|
|
||||||
|
/// Set by insert(). Cleared by exec() and destructor.
|
||||||
|
bool m_sort_queue = false;
|
||||||
|
|
||||||
/// Set when task starts execution by exec().
|
/// Set when task starts execution by exec().
|
||||||
/// Cleared when exec() ends.
|
/// Cleared when exec() ends.
|
||||||
bool m_is_running = false;
|
bool m_is_running = false;
|
||||||
|
|
||||||
|
/// Set when task is queued while already running.
|
||||||
|
/// Cleared when task is requeued.
|
||||||
|
bool m_run_again = false;
|
||||||
|
|
||||||
|
/// Set when task is queued as idle task while already running.
|
||||||
|
/// Cleared when task is queued as non-idle task.
|
||||||
|
bool m_idle = false;
|
||||||
|
|
||||||
/// Sequential identifier for next task
|
/// Sequential identifier for next task
|
||||||
static atomic<TaskId> s_next_id;
|
static atomic<TaskId> s_next_id;
|
||||||
|
|
||||||
@@ -107,7 +118,7 @@ namespace crucible {
|
|||||||
static void clear_queue(TaskQueue &tq);
|
static void clear_queue(TaskQueue &tq);
|
||||||
|
|
||||||
/// Rescue any TaskQueue, not just this one.
|
/// Rescue any TaskQueue, not just this one.
|
||||||
static void rescue_queue(TaskQueue &tq);
|
static void rescue_queue(TaskQueue &tq, const bool sort_queue);
|
||||||
|
|
||||||
TaskState &operator=(const TaskState &) = delete;
|
TaskState &operator=(const TaskState &) = delete;
|
||||||
TaskState(const TaskState &) = delete;
|
TaskState(const TaskState &) = delete;
|
||||||
@@ -124,6 +135,9 @@ namespace crucible {
|
|||||||
/// instance at the end of TaskMaster's global queue.
|
/// instance at the end of TaskMaster's global queue.
|
||||||
void run();
|
void run();
|
||||||
|
|
||||||
|
/// Run the task when there are no more Tasks on the main queue.
|
||||||
|
void idle();
|
||||||
|
|
||||||
/// Execute task immediately in current thread if it is not already
|
/// Execute task immediately in current thread if it is not already
|
||||||
/// executing in another thread; otherwise, append the current task
|
/// executing in another thread; otherwise, append the current task
|
||||||
/// to itself to be executed immediately in the other thread.
|
/// to itself to be executed immediately in the other thread.
|
||||||
@@ -139,6 +153,10 @@ namespace crucible {
|
|||||||
/// or is destroyed.
|
/// or is destroyed.
|
||||||
void append(const TaskStatePtr &task);
|
void append(const TaskStatePtr &task);
|
||||||
|
|
||||||
|
/// Queue task to execute after current task finishes executing
|
||||||
|
/// or is destroyed, in task ID order.
|
||||||
|
void insert(const TaskStatePtr &task);
|
||||||
|
|
||||||
/// How masy Tasks are there? Good for catching leaks
|
/// How masy Tasks are there? Good for catching leaks
|
||||||
static size_t instance_count();
|
static size_t instance_count();
|
||||||
};
|
};
|
||||||
@@ -150,6 +168,7 @@ namespace crucible {
|
|||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
condition_variable m_condvar;
|
condition_variable m_condvar;
|
||||||
TaskQueue m_queue;
|
TaskQueue m_queue;
|
||||||
|
TaskQueue m_idle_queue;
|
||||||
size_t m_thread_max;
|
size_t m_thread_max;
|
||||||
size_t m_thread_min = 0;
|
size_t m_thread_min = 0;
|
||||||
set<TaskConsumerPtr> m_threads;
|
set<TaskConsumerPtr> m_threads;
|
||||||
@@ -184,6 +203,7 @@ namespace crucible {
|
|||||||
TaskMasterState(size_t thread_max = thread::hardware_concurrency());
|
TaskMasterState(size_t thread_max = thread::hardware_concurrency());
|
||||||
|
|
||||||
static void push_back(const TaskStatePtr &task);
|
static void push_back(const TaskStatePtr &task);
|
||||||
|
static void push_back_idle(const TaskStatePtr &task);
|
||||||
static void push_front(TaskQueue &queue);
|
static void push_front(TaskQueue &queue);
|
||||||
size_t get_queue_count();
|
size_t get_queue_count();
|
||||||
size_t get_thread_count();
|
size_t get_thread_count();
|
||||||
@@ -214,16 +234,21 @@ namespace crucible {
|
|||||||
static auto s_tms = make_shared<TaskMasterState>();
|
static auto s_tms = make_shared<TaskMasterState>();
|
||||||
|
|
||||||
void
|
void
|
||||||
TaskState::rescue_queue(TaskQueue &queue)
|
TaskState::rescue_queue(TaskQueue &queue, const bool sort_queue)
|
||||||
{
|
{
|
||||||
if (queue.empty()) {
|
if (queue.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto tlcc = tl_current_consumer;
|
const auto &tlcc = tl_current_consumer;
|
||||||
if (tlcc) {
|
if (tlcc) {
|
||||||
// We are executing under a TaskConsumer, splice our post-exec queue at front.
|
// We are executing under a TaskConsumer, splice our post-exec queue at front.
|
||||||
// No locks needed because we are using only thread-local objects.
|
// No locks needed because we are using only thread-local objects.
|
||||||
tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
|
tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
|
||||||
|
if (sort_queue) {
|
||||||
|
tlcc->m_local_queue.sort([&](const TaskStatePtr &a, const TaskStatePtr &b) {
|
||||||
|
return a->m_id < b->m_id;
|
||||||
|
});
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// We are not executing under a TaskConsumer.
|
// We are not executing under a TaskConsumer.
|
||||||
// If there is only one task, then just insert it at the front of the queue.
|
// If there is only one task, then just insert it at the front of the queue.
|
||||||
@@ -234,6 +259,8 @@ namespace crucible {
|
|||||||
// then push it to the front of the global queue using normal locking methods.
|
// then push it to the front of the global queue using normal locking methods.
|
||||||
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
||||||
swap(rescue_task->m_post_exec_queue, queue);
|
swap(rescue_task->m_post_exec_queue, queue);
|
||||||
|
// Do the sort--once--when a new Consumer has picked up the Task
|
||||||
|
rescue_task->m_sort_queue = sort_queue;
|
||||||
TaskQueue tq_one { rescue_task };
|
TaskQueue tq_one { rescue_task };
|
||||||
TaskMasterState::push_front(tq_one);
|
TaskMasterState::push_front(tq_one);
|
||||||
}
|
}
|
||||||
@@ -246,7 +273,8 @@ namespace crucible {
|
|||||||
--s_instance_count;
|
--s_instance_count;
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
// If any dependent Tasks were appended since the last exec, run them now
|
// If any dependent Tasks were appended since the last exec, run them now
|
||||||
TaskState::rescue_queue(m_post_exec_queue);
|
TaskState::rescue_queue(m_post_exec_queue, m_sort_queue);
|
||||||
|
// No need to clear m_sort_queue here, it won't exist soon
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskState::TaskState(string title, function<void()> exec_fn) :
|
TaskState::TaskState(string title, function<void()> exec_fn) :
|
||||||
@@ -305,6 +333,24 @@ namespace crucible {
|
|||||||
task->m_run_now = true;
|
task->m_run_now = true;
|
||||||
append_nolock(task);
|
append_nolock(task);
|
||||||
}
|
}
|
||||||
|
task->m_idle = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TaskState::insert(const TaskStatePtr &task)
|
||||||
|
{
|
||||||
|
THROW_CHECK0(invalid_argument, task);
|
||||||
|
THROW_CHECK2(invalid_argument, m_id, task->m_id, m_id != task->m_id);
|
||||||
|
PairLock lock(m_mutex, task->m_mutex);
|
||||||
|
if (!task->m_run_now) {
|
||||||
|
task->m_run_now = true;
|
||||||
|
// Move the task and its post-exec queue to follow this task,
|
||||||
|
// and request a sort of the flattened list.
|
||||||
|
m_sort_queue = true;
|
||||||
|
m_post_exec_queue.push_back(task);
|
||||||
|
m_post_exec_queue.splice(m_post_exec_queue.end(), task->m_post_exec_queue);
|
||||||
|
}
|
||||||
|
task->m_idle = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -315,7 +361,7 @@ namespace crucible {
|
|||||||
|
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
if (m_is_running) {
|
if (m_is_running) {
|
||||||
append_nolock(shared_from_this());
|
m_run_again = true;
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
m_run_now = false;
|
m_run_now = false;
|
||||||
@@ -339,8 +385,20 @@ namespace crucible {
|
|||||||
swap(this_task, tl_current_task);
|
swap(this_task, tl_current_task);
|
||||||
m_is_running = false;
|
m_is_running = false;
|
||||||
|
|
||||||
|
if (m_run_again) {
|
||||||
|
m_run_again = false;
|
||||||
|
if (m_idle) {
|
||||||
|
// All the way back to the end of the line
|
||||||
|
TaskMasterState::push_back_idle(shared_from_this());
|
||||||
|
} else {
|
||||||
|
// Insert after any dependents waiting for this Task
|
||||||
|
m_post_exec_queue.push_back(shared_from_this());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Splice task post_exec queue at front of local queue
|
// Splice task post_exec queue at front of local queue
|
||||||
TaskState::rescue_queue(m_post_exec_queue);
|
TaskState::rescue_queue(m_post_exec_queue, m_sort_queue);
|
||||||
|
m_sort_queue = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
string
|
string
|
||||||
@@ -360,11 +418,32 @@ namespace crucible {
|
|||||||
TaskState::run()
|
TaskState::run()
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
m_idle = false;
|
||||||
if (m_run_now) {
|
if (m_run_now) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_run_now = true;
|
m_run_now = true;
|
||||||
TaskMasterState::push_back(shared_from_this());
|
if (m_is_running) {
|
||||||
|
m_run_again = true;
|
||||||
|
} else {
|
||||||
|
TaskMasterState::push_back(shared_from_this());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TaskState::idle()
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
m_idle = true;
|
||||||
|
if (m_run_now) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
m_run_now = true;
|
||||||
|
if (m_is_running) {
|
||||||
|
m_run_again = true;
|
||||||
|
} else {
|
||||||
|
TaskMasterState::push_back_idle(shared_from_this());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskMasterState::TaskMasterState(size_t thread_max) :
|
TaskMasterState::TaskMasterState(size_t thread_max) :
|
||||||
@@ -410,6 +489,20 @@ namespace crucible {
|
|||||||
s_tms->start_threads_nolock();
|
s_tms->start_threads_nolock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TaskMasterState::push_back_idle(const TaskStatePtr &task)
|
||||||
|
{
|
||||||
|
THROW_CHECK0(runtime_error, task);
|
||||||
|
unique_lock<mutex> lock(s_tms->m_mutex);
|
||||||
|
if (s_tms->m_cancelled) {
|
||||||
|
task->clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
s_tms->m_idle_queue.push_back(task);
|
||||||
|
s_tms->m_condvar.notify_all();
|
||||||
|
s_tms->start_threads_nolock();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
TaskMasterState::push_front(TaskQueue &queue)
|
TaskMasterState::push_front(TaskQueue &queue)
|
||||||
{
|
{
|
||||||
@@ -456,12 +549,26 @@ namespace crucible {
|
|||||||
TaskMaster::print_queue(ostream &os)
|
TaskMaster::print_queue(ostream &os)
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(s_tms->m_mutex);
|
unique_lock<mutex> lock(s_tms->m_mutex);
|
||||||
os << "Queue (size " << s_tms->m_queue.size() << "):" << endl;
|
auto queue_copy = s_tms->m_queue;
|
||||||
|
lock.unlock();
|
||||||
|
os << "Queue (size " << queue_copy.size() << "):" << endl;
|
||||||
size_t counter = 0;
|
size_t counter = 0;
|
||||||
for (auto i : s_tms->m_queue) {
|
for (auto i : queue_copy) {
|
||||||
os << "Queue #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
os << "Queue #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
||||||
}
|
}
|
||||||
return os << "Queue End" << endl;
|
os << "Queue End" << endl;
|
||||||
|
|
||||||
|
lock.lock();
|
||||||
|
queue_copy = s_tms->m_idle_queue;
|
||||||
|
lock.unlock();
|
||||||
|
os << "Idle (size " << queue_copy.size() << "):" << endl;
|
||||||
|
counter = 0;
|
||||||
|
for (const auto &i : queue_copy) {
|
||||||
|
os << "Idle #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
||||||
|
}
|
||||||
|
os << "Idle End" << endl;
|
||||||
|
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
@@ -486,11 +593,6 @@ namespace crucible {
|
|||||||
size_t
|
size_t
|
||||||
TaskMasterState::calculate_thread_count_nolock()
|
TaskMasterState::calculate_thread_count_nolock()
|
||||||
{
|
{
|
||||||
if (m_paused) {
|
|
||||||
// No threads running while paused or cancelled
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_load_target == 0) {
|
if (m_load_target == 0) {
|
||||||
// No limits, no stats, use configured thread count
|
// No limits, no stats, use configured thread count
|
||||||
return m_configured_thread_max;
|
return m_configured_thread_max;
|
||||||
@@ -583,6 +685,7 @@ namespace crucible {
|
|||||||
m_cancelled = true;
|
m_cancelled = true;
|
||||||
decltype(m_queue) empty_queue;
|
decltype(m_queue) empty_queue;
|
||||||
m_queue.swap(empty_queue);
|
m_queue.swap(empty_queue);
|
||||||
|
empty_queue.splice(empty_queue.end(), m_idle_queue);
|
||||||
m_condvar.notify_all();
|
m_condvar.notify_all();
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
TaskState::clear_queue(empty_queue);
|
TaskState::clear_queue(empty_queue);
|
||||||
@@ -600,6 +703,9 @@ namespace crucible {
|
|||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
m_paused = paused;
|
m_paused = paused;
|
||||||
m_condvar.notify_all();
|
m_condvar.notify_all();
|
||||||
|
if (!m_paused) {
|
||||||
|
start_threads_nolock();
|
||||||
|
}
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -648,7 +754,7 @@ namespace crucible {
|
|||||||
m_prev_loadavg = getloadavg1();
|
m_prev_loadavg = getloadavg1();
|
||||||
|
|
||||||
if (target && !m_load_tracking_thread) {
|
if (target && !m_load_tracking_thread) {
|
||||||
m_load_tracking_thread = make_shared<thread>([=] () { loadavg_thread_fn(); });
|
m_load_tracking_thread = make_shared<thread>([this] () { loadavg_thread_fn(); });
|
||||||
m_load_tracking_thread->detach();
|
m_load_tracking_thread->detach();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -682,6 +788,13 @@ namespace crucible {
|
|||||||
m_task_state->run();
|
m_task_state->run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Task::idle() const
|
||||||
|
{
|
||||||
|
THROW_CHECK0(runtime_error, m_task_state);
|
||||||
|
m_task_state->idle();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Task::append(const Task &that) const
|
Task::append(const Task &that) const
|
||||||
{
|
{
|
||||||
@@ -690,6 +803,14 @@ namespace crucible {
|
|||||||
m_task_state->append(that.m_task_state);
|
m_task_state->append(that.m_task_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Task::insert(const Task &that) const
|
||||||
|
{
|
||||||
|
THROW_CHECK0(runtime_error, m_task_state);
|
||||||
|
THROW_CHECK0(runtime_error, that);
|
||||||
|
m_task_state->insert(that.m_task_state);
|
||||||
|
}
|
||||||
|
|
||||||
Task
|
Task
|
||||||
Task::current_task()
|
Task::current_task()
|
||||||
{
|
{
|
||||||
@@ -772,6 +893,9 @@ namespace crucible {
|
|||||||
} else if (!master_copy->m_queue.empty()) {
|
} else if (!master_copy->m_queue.empty()) {
|
||||||
m_current_task = *master_copy->m_queue.begin();
|
m_current_task = *master_copy->m_queue.begin();
|
||||||
master_copy->m_queue.pop_front();
|
master_copy->m_queue.pop_front();
|
||||||
|
} else if (!master_copy->m_idle_queue.empty()) {
|
||||||
|
m_current_task = *master_copy->m_idle_queue.begin();
|
||||||
|
master_copy->m_idle_queue.pop_front();
|
||||||
} else {
|
} else {
|
||||||
master_copy->m_condvar.wait(lock);
|
master_copy->m_condvar.wait(lock);
|
||||||
continue;
|
continue;
|
||||||
@@ -801,11 +925,13 @@ namespace crucible {
|
|||||||
swap(this_consumer, tl_current_consumer);
|
swap(this_consumer, tl_current_consumer);
|
||||||
assert(!tl_current_consumer);
|
assert(!tl_current_consumer);
|
||||||
|
|
||||||
// Release lock to rescue queue (may attempt to queue a new task at TaskMaster).
|
// Release lock to rescue queue (may attempt to queue a
|
||||||
// rescue_queue normally sends tasks to the local queue of the current TaskConsumer thread,
|
// new task at TaskMaster). rescue_queue normally sends
|
||||||
// but we just disconnected ourselves from that.
|
// tasks to the local queue of the current TaskConsumer
|
||||||
|
// thread, but we just disconnected ourselves from that.
|
||||||
|
// No sorting here because this is not a TaskState.
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
TaskState::rescue_queue(m_local_queue);
|
TaskState::rescue_queue(m_local_queue, false);
|
||||||
|
|
||||||
// Hold lock so we can erase ourselves
|
// Hold lock so we can erase ourselves
|
||||||
lock.lock();
|
lock.lock();
|
||||||
@@ -818,7 +944,7 @@ namespace crucible {
|
|||||||
TaskConsumer::TaskConsumer(const shared_ptr<TaskMasterState> &tms) :
|
TaskConsumer::TaskConsumer(const shared_ptr<TaskMasterState> &tms) :
|
||||||
m_master(tms)
|
m_master(tms)
|
||||||
{
|
{
|
||||||
m_thread = make_shared<thread>([=](){ consumer_thread(); });
|
m_thread = make_shared<thread>([this](){ consumer_thread(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
class BarrierState {
|
class BarrierState {
|
||||||
@@ -883,21 +1009,6 @@ namespace crucible {
|
|||||||
m_owner.reset();
|
m_owner.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
Exclusion::insert_task(const Task &task)
|
|
||||||
{
|
|
||||||
unique_lock<mutex> lock(m_mutex);
|
|
||||||
const auto sp = m_owner.lock();
|
|
||||||
lock.unlock();
|
|
||||||
if (sp) {
|
|
||||||
// If Exclusion is locked then queue task for release;
|
|
||||||
sp->append(task);
|
|
||||||
} else {
|
|
||||||
// otherwise, run the inserted task immediately
|
|
||||||
task.run();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ExclusionLock
|
ExclusionLock
|
||||||
Exclusion::try_lock(const Task &task)
|
Exclusion::try_lock(const Task &task)
|
||||||
{
|
{
|
||||||
@@ -905,7 +1016,7 @@ namespace crucible {
|
|||||||
const auto sp = m_owner.lock();
|
const auto sp = m_owner.lock();
|
||||||
if (sp) {
|
if (sp) {
|
||||||
if (task) {
|
if (task) {
|
||||||
sp->append(task);
|
sp->insert(task);
|
||||||
}
|
}
|
||||||
return ExclusionLock();
|
return ExclusionLock();
|
||||||
} else {
|
} else {
|
||||||
|
27
lib/time.cc
27
lib/time.cc
@@ -98,12 +98,16 @@ namespace crucible {
|
|||||||
m_rate(rate),
|
m_rate(rate),
|
||||||
m_burst(burst)
|
m_burst(burst)
|
||||||
{
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, m_rate, m_rate > 0);
|
||||||
|
THROW_CHECK1(invalid_argument, m_burst, m_burst >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
RateLimiter::RateLimiter(double rate) :
|
RateLimiter::RateLimiter(double rate) :
|
||||||
m_rate(rate),
|
m_rate(rate),
|
||||||
m_burst(rate)
|
m_burst(rate)
|
||||||
{
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, m_rate, m_rate > 0);
|
||||||
|
THROW_CHECK1(invalid_argument, m_burst, m_burst >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -119,6 +123,7 @@ namespace crucible {
|
|||||||
double
|
double
|
||||||
RateLimiter::sleep_time(double cost)
|
RateLimiter::sleep_time(double cost)
|
||||||
{
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, m_rate, m_rate > 0);
|
||||||
borrow(cost);
|
borrow(cost);
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
update_tokens();
|
update_tokens();
|
||||||
@@ -154,6 +159,21 @@ namespace crucible {
|
|||||||
m_tokens -= cost;
|
m_tokens -= cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
RateLimiter::rate(double const new_rate)
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, new_rate, new_rate > 0);
|
||||||
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
m_rate = new_rate;
|
||||||
|
}
|
||||||
|
|
||||||
|
double
|
||||||
|
RateLimiter::rate() const
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
return m_rate;
|
||||||
|
}
|
||||||
|
|
||||||
RateEstimator::RateEstimator(double min_delay, double max_delay) :
|
RateEstimator::RateEstimator(double min_delay, double max_delay) :
|
||||||
m_min_delay(min_delay),
|
m_min_delay(min_delay),
|
||||||
m_max_delay(max_delay)
|
m_max_delay(max_delay)
|
||||||
@@ -202,6 +222,13 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
RateEstimator::increment(const uint64_t more)
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
return update_unlocked(m_last_count + more);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
RateEstimator::count() const
|
RateEstimator::count() const
|
||||||
{
|
{
|
||||||
|
@@ -1,5 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# if not called from systemd try to replicate mount unsharing on ctrl+c
|
||||||
|
# see: https://github.com/Zygo/bees/issues/281
|
||||||
|
if [ -z "${SYSTEMD_EXEC_PID}" -a -z "${UNSHARE_DONE}" ]; then
|
||||||
|
UNSHARE_DONE=true
|
||||||
|
export UNSHARE_DONE
|
||||||
|
exec unshare -m --propagation private -- "$0" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
## Helpful functions
|
## Helpful functions
|
||||||
INFO(){ echo "INFO:" "$@"; }
|
INFO(){ echo "INFO:" "$@"; }
|
||||||
ERRO(){ echo "ERROR:" "$@"; exit 1; }
|
ERRO(){ echo "ERROR:" "$@"; exit 1; }
|
||||||
@@ -108,13 +116,11 @@ mkdir -p "$WORK_DIR" || exit 1
|
|||||||
INFO "MOUNT DIR: $MNT_DIR"
|
INFO "MOUNT DIR: $MNT_DIR"
|
||||||
mkdir -p "$MNT_DIR" || exit 1
|
mkdir -p "$MNT_DIR" || exit 1
|
||||||
|
|
||||||
mount --make-private -osubvolid=5 /dev/disk/by-uuid/$UUID "$MNT_DIR" || exit 1
|
mount --make-private -osubvolid=5,nodev,noexec /dev/disk/by-uuid/$UUID "$MNT_DIR" || exit 1
|
||||||
|
|
||||||
if [ ! -d "$BEESHOME" ]; then
|
if [ ! -d "$BEESHOME" ]; then
|
||||||
INFO "Create subvol $BEESHOME for store bees data"
|
INFO "Create subvol $BEESHOME for store bees data"
|
||||||
btrfs sub cre "$BEESHOME"
|
btrfs sub cre "$BEESHOME"
|
||||||
else
|
|
||||||
btrfs sub show "$BEESHOME" &> /dev/null || ERRO "$BEESHOME MUST BE A SUBVOL!"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check DB size
|
# Check DB size
|
||||||
|
@@ -5,7 +5,7 @@ After=sysinit.target
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
ExecStart=@PREFIX@/sbin/beesd --no-timestamps %i
|
ExecStart=@PREFIX@/@BINDIR@/beesd --no-timestamps %i
|
||||||
CPUAccounting=true
|
CPUAccounting=true
|
||||||
CPUSchedulingPolicy=batch
|
CPUSchedulingPolicy=batch
|
||||||
CPUWeight=12
|
CPUWeight=12
|
||||||
@@ -17,6 +17,7 @@ KillSignal=SIGTERM
|
|||||||
MemoryAccounting=true
|
MemoryAccounting=true
|
||||||
Nice=19
|
Nice=19
|
||||||
Restart=on-abnormal
|
Restart=on-abnormal
|
||||||
|
RuntimeDirectoryMode=0700
|
||||||
RuntimeDirectory=bees
|
RuntimeDirectory=bees
|
||||||
StartupCPUWeight=25
|
StartupCPUWeight=25
|
||||||
StartupIOWeight=25
|
StartupIOWeight=25
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -356,6 +356,8 @@ BeesHashTable::prefetch_loop()
|
|||||||
auto avg_rates = thisStats / m_ctx->total_timer().age();
|
auto avg_rates = thisStats / m_ctx->total_timer().age();
|
||||||
graph_blob << "\t" << avg_rates << "\n";
|
graph_blob << "\t" << avg_rates << "\n";
|
||||||
|
|
||||||
|
graph_blob << m_ctx->get_progress();
|
||||||
|
|
||||||
BEESLOGINFO(graph_blob.str());
|
BEESLOGINFO(graph_blob.str());
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
m_stats_file.write(graph_blob.str());
|
m_stats_file.write(graph_blob.str());
|
||||||
@@ -446,10 +448,38 @@ BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index)
|
|||||||
|
|
||||||
// If we are in prefetch, give the kernel a hint about the next extent
|
// If we are in prefetch, give the kernel a hint about the next extent
|
||||||
if (m_prefetch_running) {
|
if (m_prefetch_running) {
|
||||||
// XXX: don't call this if bees_readahead is implemented by pread()
|
// Use the kernel readahead here, because it might work for this use case
|
||||||
bees_readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size);
|
readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Cell *cell = m_extent_ptr[extent_index ].p_buckets[0].p_cells;
|
||||||
|
Cell *cell_end = m_extent_ptr[extent_index + 1].p_buckets[0].p_cells;
|
||||||
|
size_t toxic_cleared_count = 0;
|
||||||
|
set<BeesHashTable::Cell> seen_it(cell, cell_end);
|
||||||
|
while (cell < cell_end) {
|
||||||
|
if (cell->e_addr & BeesAddress::c_toxic_mask) {
|
||||||
|
++toxic_cleared_count;
|
||||||
|
cell->e_addr &= ~BeesAddress::c_toxic_mask;
|
||||||
|
// Clearing the toxic bit might mean we now have a duplicate.
|
||||||
|
// This could be due to a race between two
|
||||||
|
// inserts, one finds the extent toxic while the
|
||||||
|
// other does not. That's arguably a bug elsewhere,
|
||||||
|
// but we should rewrite the whole extent lookup/insert
|
||||||
|
// loop, not spend time fixing code that will be
|
||||||
|
// thrown out later anyway.
|
||||||
|
// If there is a cell that is identical to this one
|
||||||
|
// except for the toxic bit, then we don't need this one.
|
||||||
|
if (seen_it.count(*cell)) {
|
||||||
|
cell->e_addr = 0;
|
||||||
|
cell->e_hash = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++cell;
|
||||||
|
}
|
||||||
|
if (toxic_cleared_count) {
|
||||||
|
BEESLOGDEBUG("Cleared " << toxic_cleared_count << " hashes while fetching hash table extent " << extent_index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -767,7 +797,7 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
|
|||||||
for (auto fp = madv_flags; fp->value; ++fp) {
|
for (auto fp = madv_flags; fp->value; ++fp) {
|
||||||
BEESTOOLONG("madvise(" << fp->name << ")");
|
BEESTOOLONG("madvise(" << fp->name << ")");
|
||||||
if (madvise(m_byte_ptr, m_size, fp->value)) {
|
if (madvise(m_byte_ptr, m_size, fp->value)) {
|
||||||
BEESLOGWARN("madvise(..., " << fp->name << "): " << strerror(errno) << " (ignored)");
|
BEESLOGNOTICE("madvise(..., " << fp->name << "): " << strerror(errno) << " (ignored)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -781,8 +811,19 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
|
|||||||
prefetch_loop();
|
prefetch_loop();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Blacklist might fail if the hash table is not stored on a btrfs
|
// Blacklist might fail if the hash table is not stored on a btrfs,
|
||||||
|
// or if it's on a _different_ btrfs
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
|
// Root is definitely a btrfs
|
||||||
|
BtrfsIoctlFsInfoArgs root_info;
|
||||||
|
root_info.do_ioctl(m_ctx->root_fd());
|
||||||
|
// Hash might not be a btrfs
|
||||||
|
BtrfsIoctlFsInfoArgs hash_info;
|
||||||
|
// If btrfs fs_info ioctl fails, it must be a different fs
|
||||||
|
if (!hash_info.do_ioctl_nothrow(m_fd)) return;
|
||||||
|
// If Hash is a btrfs, Root must be the same one
|
||||||
|
if (root_info.fsid() != hash_info.fsid()) return;
|
||||||
|
// Hash is on the same one, blacklist it
|
||||||
m_ctx->blacklist_insert(BeesFileId(m_fd));
|
m_ctx->blacklist_insert(BeesFileId(m_fd));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@@ -384,7 +384,7 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
|
|||||||
return stop_now;
|
return stop_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesRangePair
|
||||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||||
{
|
{
|
||||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
||||||
@@ -400,6 +400,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
BEESTRACE("overlap_bfr " << overlap_bfr);
|
BEESTRACE("overlap_bfr " << overlap_bfr);
|
||||||
|
|
||||||
BeesBlockData bbd(dst_bfr);
|
BeesBlockData bbd(dst_bfr);
|
||||||
|
BeesRangePair rv = { BeesFileRange(), BeesFileRange() };
|
||||||
|
|
||||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
||||||
// Open src
|
// Open src
|
||||||
@@ -436,21 +437,12 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
BEESCOUNT(replacedst_grown);
|
BEESCOUNT(replacedst_grown);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dedup
|
rv = brp;
|
||||||
BEESNOTE("dedup " << brp);
|
m_found_dup = true;
|
||||||
if (m_ctx->dedup(brp)) {
|
return true;
|
||||||
BEESCOUNT(replacedst_dedup_hit);
|
|
||||||
m_found_dup = true;
|
|
||||||
overlap_bfr = brp.second;
|
|
||||||
// FIXME: find best range first, then dedupe that
|
|
||||||
return true; // i.e. break
|
|
||||||
} else {
|
|
||||||
BEESCOUNT(replacedst_dedup_miss);
|
|
||||||
return false; // i.e. continue
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
||||||
return overlap_bfr.copy_closed();
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesFileRange
|
||||||
|
1621
src/bees-roots.cc
1621
src/bees-roots.cc
File diff suppressed because it is too large
Load Diff
@@ -14,7 +14,7 @@ BeesThread::exec(function<void()> func)
|
|||||||
{
|
{
|
||||||
m_timer.reset();
|
m_timer.reset();
|
||||||
BEESLOGDEBUG("BeesThread exec " << m_name);
|
BEESLOGDEBUG("BeesThread exec " << m_name);
|
||||||
m_thread_ptr = make_shared<thread>([=]() {
|
m_thread_ptr = make_shared<thread>([this, func]() {
|
||||||
BeesNote::set_name(m_name);
|
BeesNote::set_name(m_name);
|
||||||
BEESLOGDEBUG("Starting thread " << m_name);
|
BEESLOGDEBUG("Starting thread " << m_name);
|
||||||
BEESNOTE("thread function");
|
BEESNOTE("thread function");
|
||||||
|
@@ -8,38 +8,32 @@ thread_local BeesTracer *BeesTracer::tl_next_tracer = nullptr;
|
|||||||
thread_local bool BeesTracer::tl_first = true;
|
thread_local bool BeesTracer::tl_first = true;
|
||||||
thread_local bool BeesTracer::tl_silent = false;
|
thread_local bool BeesTracer::tl_silent = false;
|
||||||
|
|
||||||
|
bool
|
||||||
|
exception_check()
|
||||||
|
{
|
||||||
#if __cplusplus >= 201703
|
#if __cplusplus >= 201703
|
||||||
static
|
|
||||||
bool
|
|
||||||
exception_check()
|
|
||||||
{
|
|
||||||
return uncaught_exceptions();
|
return uncaught_exceptions();
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static
|
|
||||||
bool
|
|
||||||
exception_check()
|
|
||||||
{
|
|
||||||
return uncaught_exception();
|
return uncaught_exception();
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
BeesTracer::~BeesTracer()
|
BeesTracer::~BeesTracer()
|
||||||
{
|
{
|
||||||
if (!tl_silent && exception_check()) {
|
if (!tl_silent && exception_check()) {
|
||||||
if (tl_first) {
|
if (tl_first) {
|
||||||
BEESLOGNOTICE("--- BEGIN TRACE --- exception ---");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- BEGIN TRACE --- exception ---");
|
||||||
tl_first = false;
|
tl_first = false;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
m_func();
|
m_func();
|
||||||
} catch (exception &e) {
|
} catch (exception &e) {
|
||||||
BEESLOGNOTICE("Nested exception: " << e.what());
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: Nested exception: " << e.what());
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
BEESLOGNOTICE("Nested exception ...");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: Nested exception ...");
|
||||||
}
|
}
|
||||||
if (!m_next_tracer) {
|
if (!m_next_tracer) {
|
||||||
BEESLOGNOTICE("--- END TRACE --- exception ---");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- END TRACE --- exception ---");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tl_next_tracer = m_next_tracer;
|
tl_next_tracer = m_next_tracer;
|
||||||
@@ -49,7 +43,7 @@ BeesTracer::~BeesTracer()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesTracer::BeesTracer(function<void()> f, bool silent) :
|
BeesTracer::BeesTracer(const function<void()> &f, bool silent) :
|
||||||
m_func(f)
|
m_func(f)
|
||||||
{
|
{
|
||||||
m_next_tracer = tl_next_tracer;
|
m_next_tracer = tl_next_tracer;
|
||||||
@@ -61,12 +55,12 @@ void
|
|||||||
BeesTracer::trace_now()
|
BeesTracer::trace_now()
|
||||||
{
|
{
|
||||||
BeesTracer *tp = tl_next_tracer;
|
BeesTracer *tp = tl_next_tracer;
|
||||||
BEESLOGNOTICE("--- BEGIN TRACE ---");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- BEGIN TRACE ---");
|
||||||
while (tp) {
|
while (tp) {
|
||||||
tp->m_func();
|
tp->m_func();
|
||||||
tp = tp->m_next_tracer;
|
tp = tp->m_next_tracer;
|
||||||
}
|
}
|
||||||
BEESLOGNOTICE("--- END TRACE ---");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- END TRACE ---");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -91,9 +85,9 @@ BeesNote::~BeesNote()
|
|||||||
tl_next = m_prev;
|
tl_next = m_prev;
|
||||||
unique_lock<mutex> lock(s_mutex);
|
unique_lock<mutex> lock(s_mutex);
|
||||||
if (tl_next) {
|
if (tl_next) {
|
||||||
s_status[crucible::gettid()] = tl_next;
|
s_status[gettid()] = tl_next;
|
||||||
} else {
|
} else {
|
||||||
s_status.erase(crucible::gettid());
|
s_status.erase(gettid());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,7 +98,7 @@ BeesNote::BeesNote(function<void(ostream &os)> f) :
|
|||||||
m_prev = tl_next;
|
m_prev = tl_next;
|
||||||
tl_next = this;
|
tl_next = this;
|
||||||
unique_lock<mutex> lock(s_mutex);
|
unique_lock<mutex> lock(s_mutex);
|
||||||
s_status[crucible::gettid()] = tl_next;
|
s_status[gettid()] = tl_next;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -183,6 +183,24 @@ BeesFileRange::grow_begin(off_t delta)
|
|||||||
return m_begin;
|
return m_begin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
off_t
|
||||||
|
BeesFileRange::shrink_begin(off_t delta)
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, delta, delta > 0);
|
||||||
|
THROW_CHECK3(invalid_argument, delta, m_begin, m_end, delta + m_begin < m_end);
|
||||||
|
m_begin += delta;
|
||||||
|
return m_begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t
|
||||||
|
BeesFileRange::shrink_end(off_t delta)
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, delta, delta > 0);
|
||||||
|
THROW_CHECK2(invalid_argument, delta, m_end, m_end >= delta);
|
||||||
|
m_end -= delta;
|
||||||
|
return m_end;
|
||||||
|
}
|
||||||
|
|
||||||
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
|
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
|
||||||
m_fd(bbd.fd()),
|
m_fd(bbd.fd()),
|
||||||
m_begin(bbd.begin()),
|
m_begin(bbd.begin()),
|
||||||
@@ -349,8 +367,8 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
BEESTRACE("e_second " << e_second);
|
BEESTRACE("e_second " << e_second);
|
||||||
|
|
||||||
// Preread entire extent
|
// Preread entire extent
|
||||||
bees_readahead(second.fd(), e_second.begin(), e_second.size());
|
bees_readahead_pair(second.fd(), e_second.begin(), e_second.size(),
|
||||||
bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
||||||
|
|
||||||
auto hash_table = ctx->hash_table();
|
auto hash_table = ctx->hash_table();
|
||||||
|
|
||||||
@@ -388,17 +406,6 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Source extent cannot be toxic
|
|
||||||
BeesAddress first_addr(first.fd(), new_first.begin());
|
|
||||||
if (!first_addr.is_magic()) {
|
|
||||||
auto first_resolved = ctx->resolve_addr(first_addr);
|
|
||||||
if (first_resolved.is_toxic()) {
|
|
||||||
BEESLOGWARN("WORKAROUND: not growing matching pair backward because src addr is toxic:\n" << *this);
|
|
||||||
BEESCOUNT(pairbackward_toxic_addr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extend second range. If we hit BOF we can go no further.
|
// Extend second range. If we hit BOF we can go no further.
|
||||||
BeesFileRange new_second = second;
|
BeesFileRange new_second = second;
|
||||||
BEESTRACE("new_second = " << new_second);
|
BEESTRACE("new_second = " << new_second);
|
||||||
@@ -434,6 +441,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Source block cannot be zero in a non-compressed non-magic extent
|
// Source block cannot be zero in a non-compressed non-magic extent
|
||||||
|
BeesAddress first_addr(first.fd(), new_first.begin());
|
||||||
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
||||||
BEESCOUNT(pairbackward_zero);
|
BEESCOUNT(pairbackward_zero);
|
||||||
break;
|
break;
|
||||||
@@ -449,7 +457,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (found_toxic) {
|
if (found_toxic) {
|
||||||
BEESLOGWARN("WORKAROUND: found toxic hash in " << first_bbd << " while extending backward:\n" << *this);
|
BEESLOGDEBUG("WORKAROUND: found toxic hash in " << first_bbd << " while extending backward:\n" << *this);
|
||||||
BEESCOUNT(pairbackward_toxic_hash);
|
BEESCOUNT(pairbackward_toxic_hash);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -491,17 +499,6 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Source extent cannot be toxic
|
|
||||||
BeesAddress first_addr(first.fd(), new_first.begin());
|
|
||||||
if (!first_addr.is_magic()) {
|
|
||||||
auto first_resolved = ctx->resolve_addr(first_addr);
|
|
||||||
if (first_resolved.is_toxic()) {
|
|
||||||
BEESLOGWARN("WORKAROUND: not growing matching pair forward because src is toxic:\n" << *this);
|
|
||||||
BEESCOUNT(pairforward_toxic);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extend second range. If we hit EOF we can go no further.
|
// Extend second range. If we hit EOF we can go no further.
|
||||||
BeesFileRange new_second = second;
|
BeesFileRange new_second = second;
|
||||||
BEESTRACE("new_second = " << new_second);
|
BEESTRACE("new_second = " << new_second);
|
||||||
@@ -545,6 +542,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Source block cannot be zero in a non-compressed non-magic extent
|
// Source block cannot be zero in a non-compressed non-magic extent
|
||||||
|
BeesAddress first_addr(first.fd(), new_first.begin());
|
||||||
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
||||||
BEESCOUNT(pairforward_zero);
|
BEESCOUNT(pairforward_zero);
|
||||||
break;
|
break;
|
||||||
@@ -560,7 +558,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (found_toxic) {
|
if (found_toxic) {
|
||||||
BEESLOGWARN("WORKAROUND: found toxic hash in " << first_bbd << " while extending forward:\n" << *this);
|
BEESLOGDEBUG("WORKAROUND: found toxic hash in " << first_bbd << " while extending forward:\n" << *this);
|
||||||
BEESCOUNT(pairforward_toxic_hash);
|
BEESCOUNT(pairforward_toxic_hash);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -574,7 +572,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (first.overlaps(second)) {
|
if (first.overlaps(second)) {
|
||||||
BEESLOGTRACE("after grow, first " << first << "\n\toverlaps " << second);
|
BEESLOGDEBUG("after grow, first " << first << "\n\toverlaps " << second);
|
||||||
BEESCOUNT(bug_grow_pair_overlaps);
|
BEESCOUNT(bug_grow_pair_overlaps);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -589,6 +587,22 @@ BeesRangePair::copy_closed() const
|
|||||||
return BeesRangePair(first.copy_closed(), second.copy_closed());
|
return BeesRangePair(first.copy_closed(), second.copy_closed());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesRangePair::shrink_begin(off_t const delta)
|
||||||
|
{
|
||||||
|
first.shrink_begin(delta);
|
||||||
|
second.shrink_begin(delta);
|
||||||
|
THROW_CHECK2(runtime_error, first.size(), second.size(), first.size() == second.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesRangePair::shrink_end(off_t const delta)
|
||||||
|
{
|
||||||
|
first.shrink_end(delta);
|
||||||
|
second.shrink_end(delta);
|
||||||
|
THROW_CHECK2(runtime_error, first.size(), second.size(), first.size() == second.size());
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const BeesAddress &ba)
|
operator<<(ostream &os, const BeesAddress &ba)
|
||||||
{
|
{
|
||||||
@@ -660,7 +674,7 @@ BeesAddress::magic_check(uint64_t flags)
|
|||||||
static const unsigned recognized_flags = compressed_flags | delalloc_flags | ignore_flags | unusable_flags;
|
static const unsigned recognized_flags = compressed_flags | delalloc_flags | ignore_flags | unusable_flags;
|
||||||
|
|
||||||
if (flags & ~recognized_flags) {
|
if (flags & ~recognized_flags) {
|
||||||
BEESLOGTRACE("Unrecognized flags in " << fiemap_extent_flags_ntoa(flags));
|
BEESLOGNOTICE("Unrecognized flags in " << fiemap_extent_flags_ntoa(flags));
|
||||||
m_addr = UNUSABLE;
|
m_addr = UNUSABLE;
|
||||||
// maybe we throw here?
|
// maybe we throw here?
|
||||||
BEESCOUNT(addr_unrecognized);
|
BEESCOUNT(addr_unrecognized);
|
||||||
|
@@ -12,9 +12,10 @@ Load management options:
|
|||||||
-C, --thread-factor Worker thread factor (default 1)
|
-C, --thread-factor Worker thread factor (default 1)
|
||||||
-G, --thread-min Minimum worker thread count (default 0)
|
-G, --thread-min Minimum worker thread count (default 0)
|
||||||
-g, --loadavg-target Target load average for worker threads (default none)
|
-g, --loadavg-target Target load average for worker threads (default none)
|
||||||
|
--throttle-factor Idle time between operations (default 1.0)
|
||||||
|
|
||||||
Filesystem tree traversal options:
|
Filesystem tree traversal options:
|
||||||
-m, --scan-mode Scanning mode (0..2, default 0)
|
-m, --scan-mode Scanning mode (0..4, default 4)
|
||||||
|
|
||||||
Workarounds:
|
Workarounds:
|
||||||
-a, --workaround-btrfs-send Workaround for btrfs send
|
-a, --workaround-btrfs-send Workaround for btrfs send
|
||||||
|
344
src/bees.cc
344
src/bees.cc
@@ -4,6 +4,7 @@
|
|||||||
#include "crucible/process.h"
|
#include "crucible/process.h"
|
||||||
#include "crucible/string.h"
|
#include "crucible/string.h"
|
||||||
#include "crucible/task.h"
|
#include "crucible/task.h"
|
||||||
|
#include "crucible/uname.h"
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@@ -11,17 +12,19 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <regex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
// PRIx64
|
// PRIx64
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
#include <sched.h>
|
|
||||||
#include <sys/fanotify.h>
|
|
||||||
|
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
// statfs
|
||||||
|
#include <linux/magic.h>
|
||||||
|
#include <sys/statfs.h>
|
||||||
|
|
||||||
// setrlimit
|
// setrlimit
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
@@ -198,7 +201,7 @@ BeesTooLong::check() const
|
|||||||
if (age() > m_limit) {
|
if (age() > m_limit) {
|
||||||
ostringstream oss;
|
ostringstream oss;
|
||||||
m_func(oss);
|
m_func(oss);
|
||||||
BEESLOGWARN("PERFORMANCE: " << *this << " sec: " << oss.str());
|
BEESLOGINFO("PERFORMANCE: " << *this << " sec: " << oss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,23 +217,45 @@ BeesTooLong::operator=(const func_type &f)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static
|
||||||
bees_readahead(int const fd, const off_t offset, const size_t size)
|
bool
|
||||||
|
bees_readahead_check(int const fd, off_t const offset, size_t const size)
|
||||||
{
|
{
|
||||||
|
// FIXME: the rest of the code calls this function more often than necessary,
|
||||||
|
// usually back-to-back calls on the same range in a loop.
|
||||||
|
// Simply discard requests that are identical to recent requests.
|
||||||
|
const Stat stat_rv(fd);
|
||||||
|
auto tup = make_tuple(offset, size, stat_rv.st_dev, stat_rv.st_ino);
|
||||||
|
static mutex s_recent_mutex;
|
||||||
|
static set<decltype(tup)> s_recent;
|
||||||
|
static Timer s_recent_timer;
|
||||||
|
unique_lock<mutex> lock(s_recent_mutex);
|
||||||
|
if (s_recent_timer.age() > 5.0) {
|
||||||
|
s_recent_timer.reset();
|
||||||
|
s_recent.clear();
|
||||||
|
BEESCOUNT(readahead_clear);
|
||||||
|
}
|
||||||
|
const auto rv = s_recent.insert(tup);
|
||||||
|
// If we recently did this readahead, we're done here
|
||||||
|
if (!rv.second) {
|
||||||
|
BEESCOUNT(readahead_skip);
|
||||||
|
}
|
||||||
|
return rv.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void
|
||||||
|
bees_readahead_nolock(int const fd, const off_t offset, const size_t size)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, offset, size)) return;
|
||||||
Timer readahead_timer;
|
Timer readahead_timer;
|
||||||
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
#if 0
|
|
||||||
// In the kernel, readahead() is identical to posix_fadvise(..., POSIX_FADV_DONTNEED)
|
|
||||||
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
|
||||||
#else
|
|
||||||
// Make sure this data is in page cache by brute force
|
// Make sure this data is in page cache by brute force
|
||||||
// This isn't necessary and it might even be slower,
|
// The btrfs kernel code does readahead with lower ioprio
|
||||||
// but the btrfs kernel code does readahead with lower ioprio
|
// and might discard the readahead request entirely.
|
||||||
// and might discard the readahead request entirely,
|
|
||||||
// so it's maybe, *maybe*, worth doing both.
|
|
||||||
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
auto working_size = size;
|
auto working_size = min(size, uint64_t(128 * 1024 * 1024));
|
||||||
auto working_offset = offset;
|
auto working_offset = offset;
|
||||||
while (working_size) {
|
while (working_size) {
|
||||||
// don't care about multithreaded writes to this buffer--it is garbage anyway
|
// don't care about multithreaded writes to this buffer--it is garbage anyway
|
||||||
@@ -239,16 +264,41 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
|
|||||||
// Ignore errors and short reads. It turns out our size
|
// Ignore errors and short reads. It turns out our size
|
||||||
// parameter isn't all that accurate, so we can't use
|
// parameter isn't all that accurate, so we can't use
|
||||||
// the pread_or_die template.
|
// the pread_or_die template.
|
||||||
(void)!pread(fd, dummy, this_read_size, working_offset);
|
const auto pr_rv = pread(fd, dummy, this_read_size, working_offset);
|
||||||
BEESCOUNT(readahead_count);
|
if (pr_rv >= 0) {
|
||||||
BEESCOUNTADD(readahead_bytes, this_read_size);
|
BEESCOUNT(readahead_count);
|
||||||
|
BEESCOUNTADD(readahead_bytes, pr_rv);
|
||||||
|
} else {
|
||||||
|
BEESCOUNT(readahead_fail);
|
||||||
|
}
|
||||||
working_offset += this_read_size;
|
working_offset += this_read_size;
|
||||||
working_size -= this_read_size;
|
working_size -= this_read_size;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static mutex s_only_one;
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, offset, size) && !bees_readahead_check(fd2, offset2, size2)) return;
|
||||||
|
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size) << ","
|
||||||
|
<< "\n\t" << name_fd(fd2) << " offset " << to_hex(offset2) << " len " << pretty(size2));
|
||||||
|
unique_lock<mutex> m_lock(s_only_one);
|
||||||
|
bees_readahead_nolock(fd, offset, size);
|
||||||
|
bees_readahead_nolock(fd2, offset2, size2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, offset, size)) return;
|
||||||
|
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
|
unique_lock<mutex> m_lock(s_only_one);
|
||||||
|
bees_readahead_nolock(fd, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bees_unreadahead(int const fd, off_t offset, size_t size)
|
bees_unreadahead(int const fd, off_t offset, size_t size)
|
||||||
{
|
{
|
||||||
@@ -259,6 +309,48 @@ bees_unreadahead(int const fd, off_t offset, size_t size)
|
|||||||
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static double bees_throttle_factor = 0.0;
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_throttle(const double time_used, const char *const context)
|
||||||
|
{
|
||||||
|
static mutex s_mutex;
|
||||||
|
unique_lock<mutex> throttle_lock(s_mutex);
|
||||||
|
struct time_pair {
|
||||||
|
double time_used = 0;
|
||||||
|
double time_count = 0;
|
||||||
|
double longest_sleep_time = 0;
|
||||||
|
};
|
||||||
|
static map<string, time_pair> s_time_map;
|
||||||
|
auto &this_time = s_time_map[context];
|
||||||
|
auto &this_time_used = this_time.time_used;
|
||||||
|
auto &this_time_count = this_time.time_count;
|
||||||
|
auto &longest_sleep_time = this_time.longest_sleep_time;
|
||||||
|
this_time_used += time_used;
|
||||||
|
++this_time_count;
|
||||||
|
// Keep the timing data fresh
|
||||||
|
static Timer s_fresh_timer;
|
||||||
|
if (s_fresh_timer.age() > 60) {
|
||||||
|
s_fresh_timer.reset();
|
||||||
|
this_time_count *= 0.9;
|
||||||
|
this_time_used *= 0.9;
|
||||||
|
}
|
||||||
|
// Wait for enough data to calculate rates
|
||||||
|
if (this_time_used < 1.0 || this_time_count < 1.0) return;
|
||||||
|
const auto avg_time = this_time_used / this_time_count;
|
||||||
|
const auto sleep_time = min(60.0, bees_throttle_factor * avg_time - time_used);
|
||||||
|
if (sleep_time <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (sleep_time > longest_sleep_time) {
|
||||||
|
BEESLOGDEBUG(context << ": throttle delay " << sleep_time << " s, time used " << time_used << " s, avg time " << avg_time << " s");
|
||||||
|
longest_sleep_time = sleep_time;
|
||||||
|
}
|
||||||
|
throttle_lock.unlock();
|
||||||
|
BEESNOTE(context << ": throttle delay " << sleep_time << " s, time used " << time_used << " s, avg time " << avg_time << " s");
|
||||||
|
nanosleep(sleep_time);
|
||||||
|
}
|
||||||
|
|
||||||
thread_local random_device bees_random_device;
|
thread_local random_device bees_random_device;
|
||||||
thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
|
thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
|
||||||
numeric_limits<default_random_engine::result_type>::min(),
|
numeric_limits<default_random_engine::result_type>::min(),
|
||||||
@@ -304,6 +396,73 @@ BeesStringFile::read()
|
|||||||
return read_string(fd, st.st_size);
|
return read_string(fd, st.st_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void
|
||||||
|
bees_fsync(int const fd)
|
||||||
|
{
|
||||||
|
|
||||||
|
// Note that when btrfs renames a temporary over an existing file,
|
||||||
|
// it flushes the temporary, so we get the right behavior if we
|
||||||
|
// just do nothing here (except when the file is first created;
|
||||||
|
// however, in that case the result is the same as if the file
|
||||||
|
// did not exist, was empty, or was filled with garbage).
|
||||||
|
//
|
||||||
|
// Kernel versions prior to 5.16 had bugs which would put ghost
|
||||||
|
// dirents in $BEESHOME if there was a crash when we called
|
||||||
|
// fsync() here.
|
||||||
|
//
|
||||||
|
// Some other filesystems will throw our data away if we don't
|
||||||
|
// call fsync, so we do need to call fsync() on those filesystems.
|
||||||
|
//
|
||||||
|
// Newer btrfs kernel versions rely on fsync() to report
|
||||||
|
// unrecoverable write errors. If we don't check the fsync()
|
||||||
|
// result, we'll lose the data when we rename(). Kernel 6.2 added
|
||||||
|
// a number of new root causes for the class of "unrecoverable
|
||||||
|
// write errors" so we need to check this now.
|
||||||
|
|
||||||
|
BEESNOTE("checking filesystem type for " << name_fd(fd));
|
||||||
|
// LSB deprecated statfs without providing a replacement that
|
||||||
|
// can fill in the f_type field.
|
||||||
|
struct statfs stf = { 0 };
|
||||||
|
DIE_IF_NON_ZERO(fstatfs(fd, &stf));
|
||||||
|
if (static_cast<decltype(BTRFS_SUPER_MAGIC)>(stf.f_type) != BTRFS_SUPER_MAGIC) {
|
||||||
|
BEESLOGONCE("Using fsync on non-btrfs filesystem type " << to_hex(stf.f_type));
|
||||||
|
BEESNOTE("fsync non-btrfs " << name_fd(fd));
|
||||||
|
DIE_IF_NON_ZERO(fsync(fd));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool did_uname = false;
|
||||||
|
static bool do_fsync = false;
|
||||||
|
|
||||||
|
if (!did_uname) {
|
||||||
|
Uname uname;
|
||||||
|
const string version(uname.release);
|
||||||
|
static const regex version_re(R"/(^(\d+)\.(\d+)\.)/", regex::optimize | regex::ECMAScript);
|
||||||
|
smatch m;
|
||||||
|
// Last known bug in the fsync-rename use case was fixed in kernel 5.16
|
||||||
|
static const auto min_major = 5, min_minor = 16;
|
||||||
|
if (regex_search(version, m, version_re)) {
|
||||||
|
const auto major = stoul(m[1]);
|
||||||
|
const auto minor = stoul(m[2]);
|
||||||
|
if (tie(major, minor) > tie(min_major, min_minor)) {
|
||||||
|
BEESLOGONCE("Using fsync on btrfs because kernel version is " << major << "." << minor);
|
||||||
|
do_fsync = true;
|
||||||
|
} else {
|
||||||
|
BEESLOGONCE("Not using fsync on btrfs because kernel version is " << major << "." << minor);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BEESLOGONCE("Not using fsync on btrfs because can't parse kernel version '" << version << "'");
|
||||||
|
}
|
||||||
|
did_uname = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_fsync) {
|
||||||
|
BEESNOTE("fsync btrfs " << name_fd(fd));
|
||||||
|
DIE_IF_NON_ZERO(fsync(fd));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BeesStringFile::write(string contents)
|
BeesStringFile::write(string contents)
|
||||||
{
|
{
|
||||||
@@ -319,19 +478,8 @@ BeesStringFile::write(string contents)
|
|||||||
Fd ofd = openat_or_die(m_dir_fd, tmpname, FLAGS_CREATE_FILE, S_IRUSR | S_IWUSR);
|
Fd ofd = openat_or_die(m_dir_fd, tmpname, FLAGS_CREATE_FILE, S_IRUSR | S_IWUSR);
|
||||||
BEESNOTE("writing " << tmpname << " in " << name_fd(m_dir_fd));
|
BEESNOTE("writing " << tmpname << " in " << name_fd(m_dir_fd));
|
||||||
write_or_die(ofd, contents);
|
write_or_die(ofd, contents);
|
||||||
#if 0
|
|
||||||
// This triggers too many btrfs bugs. I wish I was kidding.
|
|
||||||
// Forget snapshots, balance, compression, and dedupe:
|
|
||||||
// the system call you have to fear on btrfs is fsync().
|
|
||||||
// Also note that when bees renames a temporary over an
|
|
||||||
// existing file, it flushes the temporary, so we get
|
|
||||||
// the right behavior if we just do nothing here
|
|
||||||
// (except when the file is first created; however,
|
|
||||||
// in that case the result is the same as if the file
|
|
||||||
// did not exist, was empty, or was filled with garbage).
|
|
||||||
BEESNOTE("fsyncing " << tmpname << " in " << name_fd(m_dir_fd));
|
BEESNOTE("fsyncing " << tmpname << " in " << name_fd(m_dir_fd));
|
||||||
DIE_IF_NON_ZERO(fsync(ofd));
|
bees_fsync(ofd);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
BEESNOTE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
BEESNOTE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
||||||
BEESTRACE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
BEESTRACE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
||||||
@@ -355,6 +503,25 @@ BeesTempFile::resize(off_t offset)
|
|||||||
|
|
||||||
// Count time spent here
|
// Count time spent here
|
||||||
BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000);
|
BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000);
|
||||||
|
|
||||||
|
// Modify flags - every time
|
||||||
|
// - btrfs will keep trying to set FS_NOCOMP_FL behind us when compression heuristics identify
|
||||||
|
// the data as compressible, but it fails to compress
|
||||||
|
// - clear FS_NOCOW_FL because we can only dedupe between files with the same FS_NOCOW_FL state,
|
||||||
|
// and we don't open FS_NOCOW_FL files for dedupe.
|
||||||
|
BEESTRACE("Getting FS_COMPR_FL and FS_NOCOMP_FL on m_fd " << name_fd(m_fd));
|
||||||
|
int flags = ioctl_iflags_get(m_fd);
|
||||||
|
const auto orig_flags = flags;
|
||||||
|
|
||||||
|
flags |= FS_COMPR_FL;
|
||||||
|
flags &= ~(FS_NOCOMP_FL | FS_NOCOW_FL);
|
||||||
|
if (flags != orig_flags) {
|
||||||
|
BEESTRACE("Setting FS_COMPR_FL and clearing FS_NOCOMP_FL | FS_NOCOW_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
||||||
|
ioctl_iflags_set(m_fd, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
// That may have queued some delayed ref deletes, so throttle them
|
||||||
|
bees_throttle(resize_timer.age(), "tmpfile_resize");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -395,13 +562,6 @@ BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
|
|||||||
// Add this file to open_root_ino lookup table
|
// Add this file to open_root_ino lookup table
|
||||||
m_roots->insert_tmpfile(m_fd);
|
m_roots->insert_tmpfile(m_fd);
|
||||||
|
|
||||||
// Set compression attribute
|
|
||||||
BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd));
|
|
||||||
int flags = ioctl_iflags_get(m_fd);
|
|
||||||
flags |= FS_COMPR_FL;
|
|
||||||
BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
|
||||||
ioctl_iflags_set(m_fd, flags);
|
|
||||||
|
|
||||||
// Count time spent here
|
// Count time spent here
|
||||||
BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000);
|
BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000);
|
||||||
|
|
||||||
@@ -490,6 +650,8 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
|||||||
}
|
}
|
||||||
BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);
|
BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);
|
||||||
|
|
||||||
|
bees_throttle(copy_timer.age(), "tmpfile_copy");
|
||||||
|
|
||||||
BEESCOUNT(tmp_copy);
|
BEESCOUNT(tmp_copy);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
@@ -528,19 +690,23 @@ operator<<(ostream &os, const siginfo_t &si)
|
|||||||
|
|
||||||
static sigset_t new_sigset, old_sigset;
|
static sigset_t new_sigset, old_sigset;
|
||||||
|
|
||||||
|
static
|
||||||
void
|
void
|
||||||
block_term_signal()
|
block_signals()
|
||||||
{
|
{
|
||||||
BEESLOGDEBUG("Masking signals");
|
BEESLOGDEBUG("Masking signals");
|
||||||
|
|
||||||
DIE_IF_NON_ZERO(sigemptyset(&new_sigset));
|
DIE_IF_NON_ZERO(sigemptyset(&new_sigset));
|
||||||
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGTERM));
|
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGTERM));
|
||||||
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGINT));
|
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGINT));
|
||||||
|
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGUSR1));
|
||||||
|
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGUSR2));
|
||||||
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &new_sigset, &old_sigset));
|
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &new_sigset, &old_sigset));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
void
|
void
|
||||||
wait_for_term_signal()
|
wait_for_signals()
|
||||||
{
|
{
|
||||||
BEESNOTE("waiting for signals");
|
BEESNOTE("waiting for signals");
|
||||||
BEESLOGDEBUG("Waiting for signals...");
|
BEESLOGDEBUG("Waiting for signals...");
|
||||||
@@ -557,14 +723,28 @@ wait_for_term_signal()
|
|||||||
THROW_ERRNO("sigwaitinfo errno = " << errno);
|
THROW_ERRNO("sigwaitinfo errno = " << errno);
|
||||||
} else {
|
} else {
|
||||||
BEESLOGNOTICE("Received signal " << rv << " info " << info);
|
BEESLOGNOTICE("Received signal " << rv << " info " << info);
|
||||||
// Unblock so we die immediately if signalled again
|
// If SIGTERM or SIGINT, unblock so we die immediately if signalled again
|
||||||
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &old_sigset, &new_sigset));
|
switch (info.si_signo) {
|
||||||
break;
|
case SIGUSR1:
|
||||||
|
BEESLOGNOTICE("Received SIGUSR1 - pausing workers");
|
||||||
|
TaskMaster::pause(true);
|
||||||
|
break;
|
||||||
|
case SIGUSR2:
|
||||||
|
BEESLOGNOTICE("Received SIGUSR2 - unpausing workers");
|
||||||
|
TaskMaster::pause(false);
|
||||||
|
break;
|
||||||
|
case SIGTERM:
|
||||||
|
case SIGINT:
|
||||||
|
default:
|
||||||
|
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &old_sigset, &new_sigset));
|
||||||
|
BEESLOGDEBUG("Signal catcher exiting");
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BEESLOGDEBUG("Signal catcher exiting");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
int
|
int
|
||||||
bees_main(int argc, char *argv[])
|
bees_main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
@@ -573,7 +753,7 @@ bees_main(int argc, char *argv[])
|
|||||||
BEESLOGDEBUG("exception (ignored): " << s);
|
BEESLOGDEBUG("exception (ignored): " << s);
|
||||||
BEESCOUNT(exception_caught_silent);
|
BEESCOUNT(exception_caught_silent);
|
||||||
} else {
|
} else {
|
||||||
BEESLOGNOTICE("\n\n*** EXCEPTION ***\n\t" << s << "\n***\n");
|
BEESLOG(BEES_TRACE_LEVEL, "TRACE: EXCEPTION: " << s);
|
||||||
BEESCOUNT(exception_caught);
|
BEESCOUNT(exception_caught);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -588,47 +768,51 @@ bees_main(int argc, char *argv[])
|
|||||||
|
|
||||||
// Have to block signals now before we create a bunch of threads
|
// Have to block signals now before we create a bunch of threads
|
||||||
// so the threads will also have the signals blocked.
|
// so the threads will also have the signals blocked.
|
||||||
block_term_signal();
|
block_signals();
|
||||||
|
|
||||||
// Create a context so we can apply configuration to it
|
// Create a context so we can apply configuration to it
|
||||||
shared_ptr<BeesContext> bc = make_shared<BeesContext>();
|
shared_ptr<BeesContext> bc = make_shared<BeesContext>();
|
||||||
BEESLOGDEBUG("context constructed");
|
BEESLOGDEBUG("context constructed");
|
||||||
|
|
||||||
string cwd(readlink_or_die("/proc/self/cwd"));
|
|
||||||
|
|
||||||
// Defaults
|
// Defaults
|
||||||
|
bool use_relative_paths = false;
|
||||||
bool chatter_prefix_timestamp = true;
|
bool chatter_prefix_timestamp = true;
|
||||||
double thread_factor = 0;
|
double thread_factor = 0;
|
||||||
unsigned thread_count = 0;
|
unsigned thread_count = 0;
|
||||||
unsigned thread_min = 0;
|
unsigned thread_min = 0;
|
||||||
double load_target = 0;
|
double load_target = 0;
|
||||||
bool workaround_btrfs_send = false;
|
bool workaround_btrfs_send = false;
|
||||||
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_INDEPENDENT;
|
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_EXTENT;
|
||||||
|
|
||||||
// Configure getopt_long
|
// Configure getopt_long
|
||||||
|
// Options with no short form
|
||||||
|
enum {
|
||||||
|
BEES_OPT_THROTTLE_FACTOR = 256,
|
||||||
|
};
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
{ "thread-factor", required_argument, NULL, 'C' },
|
{ .name = "thread-factor", .has_arg = required_argument, .val = 'C' },
|
||||||
{ "thread-min", required_argument, NULL, 'G' },
|
{ .name = "throttle-factor", .has_arg = required_argument, .val = BEES_OPT_THROTTLE_FACTOR },
|
||||||
{ "strip-paths", no_argument, NULL, 'P' },
|
{ .name = "thread-min", .has_arg = required_argument, .val = 'G' },
|
||||||
{ "no-timestamps", no_argument, NULL, 'T' },
|
{ .name = "strip-paths", .has_arg = no_argument, .val = 'P' },
|
||||||
{ "workaround-btrfs-send", no_argument, NULL, 'a' },
|
{ .name = "no-timestamps", .has_arg = no_argument, .val = 'T' },
|
||||||
{ "thread-count", required_argument, NULL, 'c' },
|
{ .name = "workaround-btrfs-send", .has_arg = no_argument, .val = 'a' },
|
||||||
{ "loadavg-target", required_argument, NULL, 'g' },
|
{ .name = "thread-count", .has_arg = required_argument, .val = 'c' },
|
||||||
{ "help", no_argument, NULL, 'h' },
|
{ .name = "loadavg-target", .has_arg = required_argument, .val = 'g' },
|
||||||
{ "scan-mode", required_argument, NULL, 'm' },
|
{ .name = "help", .has_arg = no_argument, .val = 'h' },
|
||||||
{ "absolute-paths", no_argument, NULL, 'p' },
|
{ .name = "scan-mode", .has_arg = required_argument, .val = 'm' },
|
||||||
{ "timestamps", no_argument, NULL, 't' },
|
{ .name = "absolute-paths", .has_arg = no_argument, .val = 'p' },
|
||||||
{ "verbose", required_argument, NULL, 'v' },
|
{ .name = "timestamps", .has_arg = no_argument, .val = 't' },
|
||||||
{ 0, 0, 0, 0 },
|
{ .name = "verbose", .has_arg = required_argument, .val = 'v' },
|
||||||
|
{ 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Build getopt_long's short option list from the long_options table.
|
// Build getopt_long's short option list from the long_options table.
|
||||||
// While we're at it, make sure we didn't duplicate any options.
|
// While we're at it, make sure we didn't duplicate any options.
|
||||||
string getopt_list;
|
string getopt_list;
|
||||||
set<decltype(option::val)> option_vals;
|
map<decltype(option::val), string> option_vals;
|
||||||
for (const struct option *op = long_options; op->val; ++op) {
|
for (const struct option *op = long_options; op->val; ++op) {
|
||||||
THROW_CHECK1(runtime_error, op->val, !option_vals.count(op->val));
|
const auto ins_rv = option_vals.insert(make_pair(op->val, op->name));
|
||||||
option_vals.insert(op->val);
|
THROW_CHECK1(runtime_error, op->val, ins_rv.second);
|
||||||
if ((op->val & 0xff) != op->val) {
|
if ((op->val & 0xff) != op->val) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -639,27 +823,31 @@ bees_main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse options
|
// Parse options
|
||||||
int c;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int option_index = 0;
|
int option_index = 0;
|
||||||
|
|
||||||
c = getopt_long(argc, argv, getopt_list.c_str(), long_options, &option_index);
|
const auto c = getopt_long(argc, argv, getopt_list.c_str(), long_options, &option_index);
|
||||||
if (-1 == c) {
|
if (-1 == c) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
BEESLOGDEBUG("Parsing option '" << static_cast<char>(c) << "'");
|
// getopt_long should have weeded out any invalid options,
|
||||||
|
// so we can go ahead and throw here
|
||||||
|
BEESLOGDEBUG("Parsing option '" << option_vals.at(c) << "'");
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
|
||||||
case 'C':
|
case 'C':
|
||||||
thread_factor = stod(optarg);
|
thread_factor = stod(optarg);
|
||||||
break;
|
break;
|
||||||
|
case BEES_OPT_THROTTLE_FACTOR:
|
||||||
|
bees_throttle_factor = stod(optarg);
|
||||||
|
break;
|
||||||
case 'G':
|
case 'G':
|
||||||
thread_min = stoul(optarg);
|
thread_min = stoul(optarg);
|
||||||
break;
|
break;
|
||||||
case 'P':
|
case 'P':
|
||||||
crucible::set_relative_path(cwd);
|
use_relative_paths = true;
|
||||||
break;
|
break;
|
||||||
case 'T':
|
case 'T':
|
||||||
chatter_prefix_timestamp = false;
|
chatter_prefix_timestamp = false;
|
||||||
@@ -677,7 +865,7 @@ bees_main(int argc, char *argv[])
|
|||||||
root_scan_mode = static_cast<BeesRoots::ScanMode>(stoul(optarg));
|
root_scan_mode = static_cast<BeesRoots::ScanMode>(stoul(optarg));
|
||||||
break;
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
crucible::set_relative_path("");
|
use_relative_paths = false;
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
chatter_prefix_timestamp = true;
|
chatter_prefix_timestamp = true;
|
||||||
@@ -695,12 +883,12 @@ bees_main(int argc, char *argv[])
|
|||||||
case 'h':
|
case 'h':
|
||||||
default:
|
default:
|
||||||
do_cmd_help(argv);
|
do_cmd_help(argv);
|
||||||
return EXIT_FAILURE;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optind + 1 != argc) {
|
if (optind + 1 != argc) {
|
||||||
BEESLOGERR("Only one filesystem path per bees process");
|
BEESLOGERR("Exactly one filesystem path required");
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -740,22 +928,32 @@ bees_main(int argc, char *argv[])
|
|||||||
BEESLOGNOTICE("setting worker thread pool maximum size to " << thread_count);
|
BEESLOGNOTICE("setting worker thread pool maximum size to " << thread_count);
|
||||||
TaskMaster::set_thread_count(thread_count);
|
TaskMaster::set_thread_count(thread_count);
|
||||||
|
|
||||||
|
BEESLOGNOTICE("setting throttle factor to " << bees_throttle_factor);
|
||||||
|
|
||||||
// Set root path
|
// Set root path
|
||||||
string root_path = argv[optind++];
|
string root_path = argv[optind++];
|
||||||
BEESLOGNOTICE("setting root path to '" << root_path << "'");
|
BEESLOGNOTICE("setting root path to '" << root_path << "'");
|
||||||
bc->set_root_path(root_path);
|
bc->set_root_path(root_path);
|
||||||
|
|
||||||
|
// Set path prefix
|
||||||
|
if (use_relative_paths) {
|
||||||
|
crucible::set_relative_path(name_fd(bc->root_fd()));
|
||||||
|
}
|
||||||
|
|
||||||
// Workaround for btrfs send
|
// Workaround for btrfs send
|
||||||
bc->roots()->set_workaround_btrfs_send(workaround_btrfs_send);
|
bc->roots()->set_workaround_btrfs_send(workaround_btrfs_send);
|
||||||
|
|
||||||
// Set root scan mode
|
// Set root scan mode
|
||||||
bc->roots()->set_scan_mode(root_scan_mode);
|
bc->roots()->set_scan_mode(root_scan_mode);
|
||||||
|
|
||||||
|
// Workaround for the logical-ino-vs-clone kernel bug
|
||||||
|
MultiLocker::enable_locking(true);
|
||||||
|
|
||||||
// Start crawlers
|
// Start crawlers
|
||||||
bc->start();
|
bc->start();
|
||||||
|
|
||||||
// Now we just wait forever
|
// Now we just wait forever
|
||||||
wait_for_term_signal();
|
wait_for_signals();
|
||||||
|
|
||||||
// Shut it down
|
// Shut it down
|
||||||
bc->stop();
|
bc->stop();
|
||||||
|
76
src/bees.h
76
src/bees.h
@@ -78,13 +78,13 @@ const int BEES_PROGRESS_INTERVAL = BEES_STATS_INTERVAL;
|
|||||||
const int BEES_STATUS_INTERVAL = 1;
|
const int BEES_STATUS_INTERVAL = 1;
|
||||||
|
|
||||||
// Number of file FDs to cache when not in active use
|
// Number of file FDs to cache when not in active use
|
||||||
const size_t BEES_FILE_FD_CACHE_SIZE = 4096;
|
const size_t BEES_FILE_FD_CACHE_SIZE = 524288;
|
||||||
|
|
||||||
// Number of root FDs to cache when not in active use
|
// Number of root FDs to cache when not in active use
|
||||||
const size_t BEES_ROOT_FD_CACHE_SIZE = 1024;
|
const size_t BEES_ROOT_FD_CACHE_SIZE = 65536;
|
||||||
|
|
||||||
// Number of FDs to open (rlimit)
|
// Number of FDs to open (rlimit)
|
||||||
const size_t BEES_OPEN_FILE_LIMIT = (BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACHE_SIZE) * 2 + 100;
|
const size_t BEES_OPEN_FILE_LIMIT = BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACHE_SIZE + 100;
|
||||||
|
|
||||||
// Worker thread factor (multiplied by detected number of CPU cores)
|
// Worker thread factor (multiplied by detected number of CPU cores)
|
||||||
const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
||||||
@@ -93,10 +93,11 @@ const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
|||||||
const double BEES_TOO_LONG = 5.0;
|
const double BEES_TOO_LONG = 5.0;
|
||||||
|
|
||||||
// Avoid any extent where LOGICAL_INO takes this much kernel CPU time
|
// Avoid any extent where LOGICAL_INO takes this much kernel CPU time
|
||||||
const double BEES_TOXIC_SYS_DURATION = 0.1;
|
const double BEES_TOXIC_SYS_DURATION = 5.0;
|
||||||
|
|
||||||
// Maximum number of refs to a single extent
|
// Maximum number of refs to a single extent before we have other problems
|
||||||
const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1;
|
// If we have more than 10K refs to an extent, adding another will save 0.01% space
|
||||||
|
const size_t BEES_MAX_EXTENT_REF_COUNT = 9999; // (16 * 1024 * 1024 / 24);
|
||||||
|
|
||||||
// How long between hash table histograms
|
// How long between hash table histograms
|
||||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
||||||
@@ -121,9 +122,9 @@ const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
|
|||||||
// macros ----------------------------------------
|
// macros ----------------------------------------
|
||||||
|
|
||||||
#define BEESLOG(lv,x) do { if (lv < bees_log_level) { Chatter __chatter(lv, BeesNote::get_name()); __chatter << x; } } while (0)
|
#define BEESLOG(lv,x) do { if (lv < bees_log_level) { Chatter __chatter(lv, BeesNote::get_name()); __chatter << x; } } while (0)
|
||||||
#define BEESLOGTRACE(x) do { BEESLOG(LOG_DEBUG, x); BeesTracer::trace_now(); } while (0)
|
|
||||||
|
|
||||||
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(LOG_ERR, x); })
|
#define BEES_TRACE_LEVEL LOG_DEBUG
|
||||||
|
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(BEES_TRACE_LEVEL, "TRACE: " << x << " at " << __FILE__ << ":" << __LINE__); })
|
||||||
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||||
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||||
|
|
||||||
@@ -133,6 +134,14 @@ const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
|
|||||||
#define BEESLOGINFO(x) BEESLOG(LOG_INFO, x)
|
#define BEESLOGINFO(x) BEESLOG(LOG_INFO, x)
|
||||||
#define BEESLOGDEBUG(x) BEESLOG(LOG_DEBUG, x)
|
#define BEESLOGDEBUG(x) BEESLOG(LOG_DEBUG, x)
|
||||||
|
|
||||||
|
#define BEESLOGONCE(__x) do { \
|
||||||
|
static bool already_logged = false; \
|
||||||
|
if (!already_logged) { \
|
||||||
|
already_logged = true; \
|
||||||
|
BEESLOGNOTICE(__x); \
|
||||||
|
} \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
#define BEESCOUNT(stat) do { \
|
#define BEESCOUNT(stat) do { \
|
||||||
BeesStats::s_global.add_count(#stat); \
|
BeesStats::s_global.add_count(#stat); \
|
||||||
} while (0)
|
} while (0)
|
||||||
@@ -184,7 +193,7 @@ class BeesTracer {
|
|||||||
thread_local static bool tl_silent;
|
thread_local static bool tl_silent;
|
||||||
thread_local static bool tl_first;
|
thread_local static bool tl_first;
|
||||||
public:
|
public:
|
||||||
BeesTracer(function<void()> f, bool silent = false);
|
BeesTracer(const function<void()> &f, bool silent = false);
|
||||||
~BeesTracer();
|
~BeesTracer();
|
||||||
static void trace_now();
|
static void trace_now();
|
||||||
static bool get_silent();
|
static bool get_silent();
|
||||||
@@ -299,6 +308,11 @@ public:
|
|||||||
off_t grow_begin(off_t delta);
|
off_t grow_begin(off_t delta);
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
|
/// @{ Make range smaller
|
||||||
|
off_t shrink_end(off_t delta);
|
||||||
|
off_t shrink_begin(off_t delta);
|
||||||
|
/// @}
|
||||||
|
|
||||||
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -515,7 +529,7 @@ class BeesCrawl {
|
|||||||
|
|
||||||
bool fetch_extents();
|
bool fetch_extents();
|
||||||
void fetch_extents_harder();
|
void fetch_extents_harder();
|
||||||
bool next_transid();
|
bool restart_crawl_unlocked();
|
||||||
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -527,6 +541,9 @@ public:
|
|||||||
BeesCrawlState get_state_end() const;
|
BeesCrawlState get_state_end() const;
|
||||||
void set_state(const BeesCrawlState &bcs);
|
void set_state(const BeesCrawlState &bcs);
|
||||||
void deferred(bool def_setting);
|
void deferred(bool def_setting);
|
||||||
|
bool deferred() const;
|
||||||
|
bool finished() const;
|
||||||
|
bool restart_crawl();
|
||||||
};
|
};
|
||||||
|
|
||||||
class BeesScanMode;
|
class BeesScanMode;
|
||||||
@@ -534,9 +551,9 @@ class BeesScanMode;
|
|||||||
class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||||
shared_ptr<BeesContext> m_ctx;
|
shared_ptr<BeesContext> m_ctx;
|
||||||
|
|
||||||
BtrfsRootFetcher m_root_fetcher;
|
|
||||||
BeesStringFile m_crawl_state_file;
|
BeesStringFile m_crawl_state_file;
|
||||||
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
|
using CrawlMap = map<uint64_t, shared_ptr<BeesCrawl>>;
|
||||||
|
CrawlMap m_root_crawl_map;
|
||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
uint64_t m_crawl_dirty = 0;
|
uint64_t m_crawl_dirty = 0;
|
||||||
uint64_t m_crawl_clean = 0;
|
uint64_t m_crawl_clean = 0;
|
||||||
@@ -555,17 +572,13 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
|||||||
condition_variable m_stop_condvar;
|
condition_variable m_stop_condvar;
|
||||||
bool m_stop_requested = false;
|
bool m_stop_requested = false;
|
||||||
|
|
||||||
void insert_new_crawl();
|
CrawlMap insert_new_crawl();
|
||||||
void insert_root(const BeesCrawlState &bcs);
|
|
||||||
Fd open_root_nocache(uint64_t root);
|
Fd open_root_nocache(uint64_t root);
|
||||||
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
||||||
uint64_t transid_min();
|
|
||||||
uint64_t transid_max();
|
|
||||||
uint64_t transid_max_nocache();
|
uint64_t transid_max_nocache();
|
||||||
void state_load();
|
void state_load();
|
||||||
ostream &state_to_stream(ostream &os);
|
ostream &state_to_stream(ostream &os);
|
||||||
void state_save();
|
void state_save();
|
||||||
bool crawl_roots();
|
|
||||||
string crawl_state_filename() const;
|
string crawl_state_filename() const;
|
||||||
void crawl_state_set_dirty();
|
void crawl_state_set_dirty();
|
||||||
void crawl_state_erase(const BeesCrawlState &bcs);
|
void crawl_state_erase(const BeesCrawlState &bcs);
|
||||||
@@ -573,13 +586,16 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
|||||||
void writeback_thread();
|
void writeback_thread();
|
||||||
uint64_t next_root(uint64_t root = 0);
|
uint64_t next_root(uint64_t root = 0);
|
||||||
void current_state_set(const BeesCrawlState &bcs);
|
void current_state_set(const BeesCrawlState &bcs);
|
||||||
RateEstimator& transid_re();
|
|
||||||
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||||
void clear_caches();
|
void clear_caches();
|
||||||
|
shared_ptr<BeesCrawl> insert_root(const BeesCrawlState &bcs);
|
||||||
|
bool up_to_date(const BeesCrawlState &bcs);
|
||||||
|
|
||||||
friend class BeesCrawl;
|
friend class BeesCrawl;
|
||||||
friend class BeesFdCache;
|
friend class BeesFdCache;
|
||||||
friend class BeesScanMode;
|
friend class BeesScanMode;
|
||||||
|
friend class BeesScanModeSubvol;
|
||||||
|
friend class BeesScanModeExtent;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BeesRoots(shared_ptr<BeesContext> ctx);
|
BeesRoots(shared_ptr<BeesContext> ctx);
|
||||||
@@ -595,17 +611,22 @@ public:
|
|||||||
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
||||||
bool is_root_ro(uint64_t root);
|
bool is_root_ro(uint64_t root);
|
||||||
|
|
||||||
// TODO: do extent-tree scans instead
|
|
||||||
enum ScanMode {
|
enum ScanMode {
|
||||||
SCAN_MODE_LOCKSTEP,
|
SCAN_MODE_LOCKSTEP,
|
||||||
SCAN_MODE_INDEPENDENT,
|
SCAN_MODE_INDEPENDENT,
|
||||||
SCAN_MODE_SEQUENTIAL,
|
SCAN_MODE_SEQUENTIAL,
|
||||||
SCAN_MODE_RECENT,
|
SCAN_MODE_RECENT,
|
||||||
|
SCAN_MODE_EXTENT,
|
||||||
SCAN_MODE_COUNT, // must be last
|
SCAN_MODE_COUNT, // must be last
|
||||||
};
|
};
|
||||||
|
|
||||||
void set_scan_mode(ScanMode new_mode);
|
void set_scan_mode(ScanMode new_mode);
|
||||||
void set_workaround_btrfs_send(bool do_avoid);
|
void set_workaround_btrfs_send(bool do_avoid);
|
||||||
|
|
||||||
|
uint64_t transid_min();
|
||||||
|
uint64_t transid_max();
|
||||||
|
|
||||||
|
void wait_for_transid(const uint64_t count);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BeesHash {
|
struct BeesHash {
|
||||||
@@ -665,6 +686,8 @@ class BeesRangePair : public pair<BeesFileRange, BeesFileRange> {
|
|||||||
public:
|
public:
|
||||||
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
|
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
|
||||||
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
|
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
|
||||||
|
void shrink_begin(const off_t delta);
|
||||||
|
void shrink_end(const off_t delta);
|
||||||
BeesRangePair copy_closed() const;
|
BeesRangePair copy_closed() const;
|
||||||
bool operator<(const BeesRangePair &that) const;
|
bool operator<(const BeesRangePair &that) const;
|
||||||
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
||||||
@@ -715,6 +738,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
|||||||
shared_ptr<BeesHashTable> m_hash_table;
|
shared_ptr<BeesHashTable> m_hash_table;
|
||||||
shared_ptr<BeesRoots> m_roots;
|
shared_ptr<BeesRoots> m_roots;
|
||||||
Pool<BeesTempFile> m_tmpfile_pool;
|
Pool<BeesTempFile> m_tmpfile_pool;
|
||||||
|
Pool<BtrfsIoctlLogicalInoArgs> m_logical_ino_pool;
|
||||||
|
|
||||||
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
|
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
|
||||||
|
|
||||||
@@ -737,11 +761,14 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
|||||||
shared_ptr<BeesThread> m_progress_thread;
|
shared_ptr<BeesThread> m_progress_thread;
|
||||||
shared_ptr<BeesThread> m_status_thread;
|
shared_ptr<BeesThread> m_status_thread;
|
||||||
|
|
||||||
|
mutex m_progress_mtx;
|
||||||
|
string m_progress_str;
|
||||||
|
|
||||||
void set_root_fd(Fd fd);
|
void set_root_fd(Fd fd);
|
||||||
|
|
||||||
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
||||||
|
|
||||||
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
void scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||||
void rewrite_file_range(const BeesFileRange &bfr);
|
void rewrite_file_range(const BeesFileRange &bfr);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -754,6 +781,8 @@ public:
|
|||||||
|
|
||||||
bool scan_forward(const BeesFileRange &bfr);
|
bool scan_forward(const BeesFileRange &bfr);
|
||||||
|
|
||||||
|
shared_ptr<BtrfsIoctlLogicalInoArgs> logical_ino(uint64_t bytenr, bool all_refs);
|
||||||
|
|
||||||
bool is_root_ro(uint64_t root);
|
bool is_root_ro(uint64_t root);
|
||||||
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
|
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
|
||||||
bool dedup(const BeesRangePair &brp);
|
bool dedup(const BeesRangePair &brp);
|
||||||
@@ -770,6 +799,8 @@ public:
|
|||||||
|
|
||||||
void dump_status();
|
void dump_status();
|
||||||
void show_progress();
|
void show_progress();
|
||||||
|
void set_progress(const string &str);
|
||||||
|
string get_progress();
|
||||||
|
|
||||||
void start();
|
void start();
|
||||||
void stop();
|
void stop();
|
||||||
@@ -832,7 +863,7 @@ public:
|
|||||||
BeesFileRange find_one_match(BeesHash hash);
|
BeesFileRange find_one_match(BeesHash hash);
|
||||||
|
|
||||||
void replace_src(const BeesFileRange &src_bfr);
|
void replace_src(const BeesFileRange &src_bfr);
|
||||||
BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
|
BeesRangePair replace_dst(const BeesFileRange &dst_bfr);
|
||||||
|
|
||||||
bool found_addr() const { return m_found_addr; }
|
bool found_addr() const { return m_found_addr; }
|
||||||
bool found_data() const { return m_found_data; }
|
bool found_data() const { return m_found_data; }
|
||||||
@@ -866,7 +897,10 @@ extern const char *BEES_VERSION;
|
|||||||
extern thread_local default_random_engine bees_generator;
|
extern thread_local default_random_engine bees_generator;
|
||||||
string pretty(double d);
|
string pretty(double d);
|
||||||
void bees_readahead(int fd, off_t offset, size_t size);
|
void bees_readahead(int fd, off_t offset, size_t size);
|
||||||
|
void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2);
|
||||||
void bees_unreadahead(int fd, off_t offset, size_t size);
|
void bees_unreadahead(int fd, off_t offset, size_t size);
|
||||||
|
void bees_throttle(double time_used, const char *context);
|
||||||
string format_time(time_t t);
|
string format_time(time_t t);
|
||||||
|
bool exception_check();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -8,6 +8,7 @@ PROGRAMS = \
|
|||||||
process \
|
process \
|
||||||
progress \
|
progress \
|
||||||
seeker \
|
seeker \
|
||||||
|
table \
|
||||||
task \
|
task \
|
||||||
|
|
||||||
all: test
|
all: test
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
#include "crucible/limits.h"
|
#include "crucible/limits.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
using namespace crucible;
|
using namespace crucible;
|
||||||
|
|
||||||
|
@@ -12,23 +12,49 @@ using namespace std;
|
|||||||
void
|
void
|
||||||
test_progress()
|
test_progress()
|
||||||
{
|
{
|
||||||
|
// On create, begin == end == constructor argument
|
||||||
ProgressTracker<uint64_t> pt(123);
|
ProgressTracker<uint64_t> pt(123);
|
||||||
auto hold = pt.hold(234);
|
|
||||||
auto hold2 = pt.hold(345);
|
|
||||||
assert(pt.begin() == 123);
|
assert(pt.begin() == 123);
|
||||||
assert(pt.end() == 345);
|
assert(pt.end() == 123);
|
||||||
auto hold3 = pt.hold(456);
|
|
||||||
assert(pt.begin() == 123);
|
// Holding a position past the end increases the end (and moves begin to match)
|
||||||
assert(pt.end() == 456);
|
auto hold345 = pt.hold(345);
|
||||||
hold2.reset();
|
|
||||||
assert(pt.begin() == 123);
|
|
||||||
assert(pt.end() == 456);
|
|
||||||
hold.reset();
|
|
||||||
assert(pt.begin() == 345);
|
assert(pt.begin() == 345);
|
||||||
|
assert(pt.end() == 345);
|
||||||
|
|
||||||
|
// Holding a position before begin reduces begin, without changing end
|
||||||
|
auto hold234 = pt.hold(234);
|
||||||
|
assert(pt.begin() == 234);
|
||||||
|
assert(pt.end() == 345);
|
||||||
|
|
||||||
|
// Holding a position past the end increases the end, without affecting begin
|
||||||
|
auto hold456 = pt.hold(456);
|
||||||
|
assert(pt.begin() == 234);
|
||||||
assert(pt.end() == 456);
|
assert(pt.end() == 456);
|
||||||
hold3.reset();
|
|
||||||
|
// Releasing a position in the middle affects neither begin nor end
|
||||||
|
hold345.reset();
|
||||||
|
assert(pt.begin() == 234);
|
||||||
|
assert(pt.end() == 456);
|
||||||
|
|
||||||
|
// Hold another position in the middle to test begin moving forward
|
||||||
|
auto hold400 = pt.hold(400);
|
||||||
|
|
||||||
|
// Releasing a position at the beginning moves begin forward
|
||||||
|
hold234.reset();
|
||||||
|
assert(pt.begin() == 400);
|
||||||
|
assert(pt.end() == 456);
|
||||||
|
|
||||||
|
// Releasing a position at the end doesn't move end backward
|
||||||
|
hold456.reset();
|
||||||
|
assert(pt.begin() == 400);
|
||||||
|
assert(pt.end() == 456);
|
||||||
|
|
||||||
|
// Releasing a position in the middle doesn't move end backward but does move begin forward
|
||||||
|
hold400.reset();
|
||||||
assert(pt.begin() == 456);
|
assert(pt.begin() == 456);
|
||||||
assert(pt.end() == 456);
|
assert(pt.end() == 456);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@@ -19,7 +19,9 @@ seeker_finder(const vector<uint64_t> &vec, uint64_t lower, uint64_t upper)
|
|||||||
if (ub != s.end()) ++ub;
|
if (ub != s.end()) ++ub;
|
||||||
if (ub != s.end()) ++ub;
|
if (ub != s.end()) ++ub;
|
||||||
for (; ub != s.end(); ++ub) {
|
for (; ub != s.end(); ++ub) {
|
||||||
if (*ub > upper) break;
|
if (*ub > upper) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return set<uint64_t>(lb, ub);
|
return set<uint64_t>(lb, ub);
|
||||||
}
|
}
|
||||||
@@ -28,7 +30,7 @@ static bool test_fails = false;
|
|||||||
|
|
||||||
static
|
static
|
||||||
void
|
void
|
||||||
seeker_test(const vector<uint64_t> &vec, size_t target)
|
seeker_test(const vector<uint64_t> &vec, uint64_t const target, bool const always_out = false)
|
||||||
{
|
{
|
||||||
cerr << "Find " << target << " in {";
|
cerr << "Find " << target << " in {";
|
||||||
for (auto i : vec) {
|
for (auto i : vec) {
|
||||||
@@ -36,13 +38,15 @@ seeker_test(const vector<uint64_t> &vec, size_t target)
|
|||||||
}
|
}
|
||||||
cerr << " } = ";
|
cerr << " } = ";
|
||||||
size_t loops = 0;
|
size_t loops = 0;
|
||||||
|
tl_seeker_debug_str = make_shared<ostringstream>();
|
||||||
|
bool local_test_fails = false;
|
||||||
bool excepted = catch_all([&]() {
|
bool excepted = catch_all([&]() {
|
||||||
auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) {
|
const auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) {
|
||||||
++loops;
|
++loops;
|
||||||
return seeker_finder(vec, lower, upper);
|
return seeker_finder(vec, lower, upper);
|
||||||
});
|
}, uint64_t(32));
|
||||||
cerr << found;
|
cerr << found;
|
||||||
size_t my_found = 0;
|
uint64_t my_found = 0;
|
||||||
for (auto i : vec) {
|
for (auto i : vec) {
|
||||||
if (i <= target) {
|
if (i <= target) {
|
||||||
my_found = i;
|
my_found = i;
|
||||||
@@ -52,13 +56,15 @@ seeker_test(const vector<uint64_t> &vec, size_t target)
|
|||||||
cerr << " (correct)";
|
cerr << " (correct)";
|
||||||
} else {
|
} else {
|
||||||
cerr << " (INCORRECT - right answer is " << my_found << ")";
|
cerr << " (INCORRECT - right answer is " << my_found << ")";
|
||||||
test_fails = true;
|
local_test_fails = true;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
cerr << " (" << loops << " loops)" << endl;
|
cerr << " (" << loops << " loops)" << endl;
|
||||||
if (excepted) {
|
if (excepted || local_test_fails || always_out) {
|
||||||
test_fails = true;
|
cerr << dynamic_pointer_cast<ostringstream>(tl_seeker_debug_str)->str();
|
||||||
}
|
}
|
||||||
|
test_fails = test_fails || local_test_fails;
|
||||||
|
tl_seeker_debug_str.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@@ -89,6 +95,39 @@ test_seeker()
|
|||||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max());
|
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max());
|
||||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1);
|
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1);
|
||||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max());
|
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max());
|
||||||
|
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 0);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 1);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 2);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 3);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 4);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 5);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 6);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 7);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 8);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, 9);
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 1 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 2 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 3 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 4 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 5 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 6 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 7 );
|
||||||
|
seeker_test(vector<uint64_t> { 0, 1, 2, 4, 8 }, numeric_limits<uint64_t>::max() - 8 );
|
||||||
|
|
||||||
|
// Pulled from a bees debug log
|
||||||
|
seeker_test(vector<uint64_t> {
|
||||||
|
6821962845,
|
||||||
|
6821962848,
|
||||||
|
6821963411,
|
||||||
|
6821963422,
|
||||||
|
6821963536,
|
||||||
|
6821963539,
|
||||||
|
6821963835, // <- appeared during the search, causing an exception
|
||||||
|
6821963841,
|
||||||
|
6822575316,
|
||||||
|
}, 6821971036, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
63
test/table.cc
Normal file
63
test/table.cc
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
#include "tests.h"
|
||||||
|
|
||||||
|
#include "crucible/table.h"
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void
|
||||||
|
print_table(const Table::Table& t)
|
||||||
|
{
|
||||||
|
cerr << "BEGIN TABLE\n";
|
||||||
|
cerr << t;
|
||||||
|
cerr << "END TABLE\n";
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_table()
|
||||||
|
{
|
||||||
|
Table::Table t;
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Text("Hello, World!"),
|
||||||
|
Table::Text("2"),
|
||||||
|
Table::Text("3"),
|
||||||
|
Table::Text("4"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Text("Greeting"),
|
||||||
|
Table::Text("two"),
|
||||||
|
Table::Text("three"),
|
||||||
|
Table::Text("four"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Fill('-'),
|
||||||
|
Table::Text("ii"),
|
||||||
|
Table::Text("iii"),
|
||||||
|
Table::Text("iv"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.mid(" | ");
|
||||||
|
t.left("| ");
|
||||||
|
t.right(" |");
|
||||||
|
print_table(t);
|
||||||
|
t.insert_col(1, vector<Table::Content> {
|
||||||
|
Table::Text("1"),
|
||||||
|
Table::Text("one"),
|
||||||
|
Table::Text("i"),
|
||||||
|
Table::Text("I"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.at(2, 1) = Table::Text("Two\nLines");
|
||||||
|
print_table(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int, char**)
|
||||||
|
{
|
||||||
|
RUN_A_TEST(test_table());
|
||||||
|
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
Reference in New Issue
Block a user