mirror of
https://github.com/Zygo/bees.git
synced 2025-08-02 05:43:29 +02:00
Compare commits
23 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
124507232f | ||
|
3c5e13c885 | ||
|
a6ca2fa2f6 | ||
|
3f23a0c73f | ||
|
d6732c58e2 | ||
|
75b2067cef | ||
|
da3ef216b1 | ||
|
b7665d49d9 | ||
|
717bdf5eb5 | ||
|
9b60f2b94d | ||
|
8978d63e75 | ||
|
82474b4ef4 | ||
|
73834beb5a | ||
|
c92ba117d8 | ||
|
c354e77634 | ||
|
f21569e88c | ||
|
3d5ebe4d40 | ||
|
3430f16998 | ||
|
7c764a73c8 | ||
|
a9a5cd03a5 | ||
|
299509ce32 | ||
|
d5a99c2f5e | ||
|
fd6c3b3769 |
7
Makefile
7
Makefile
@@ -49,11 +49,6 @@ scripts/%: scripts/%.in
|
||||
|
||||
scripts: scripts/beesd scripts/beesd@.service
|
||||
|
||||
install_tools: ## Install support tools + libs
|
||||
install_tools: src
|
||||
install -Dm755 bin/fiemap $(DESTDIR)$(PREFIX)/bin/fiemap
|
||||
install -Dm755 bin/fiewalk $(DESTDIR)$(PREFIX)/sbin/fiewalk
|
||||
|
||||
install_bees: ## Install bees + libs
|
||||
install_bees: src $(RUN_INSTALL_TESTS)
|
||||
install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees
|
||||
@@ -67,7 +62,7 @@ ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
||||
endif
|
||||
|
||||
install: ## Install distribution
|
||||
install: install_bees install_scripts $(OPTIONAL_INSTALL_TARGETS)
|
||||
install: install_bees install_scripts
|
||||
|
||||
help: ## Show help
|
||||
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
|
||||
|
@@ -17,7 +17,6 @@ Strengths
|
||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||
* Daemon incrementally dedupes new data using btrfs tree search
|
||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](docs/options.md)
|
||||
* Works around btrfs filesystem structure to free more disk space
|
||||
* Persistent hash table for rapid restart after shutdown
|
||||
* Whole-filesystem dedupe - including snapshots
|
||||
@@ -70,6 +69,6 @@ You can also use Github:
|
||||
Copyright & License
|
||||
-------------------
|
||||
|
||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
||||
Copyright 2015-2023 Zygo Blaxell <bees@furryterror.org>.
|
||||
|
||||
GPL (version 3 or later).
|
||||
|
@@ -7,23 +7,24 @@ First, a warning that is not specific to bees:
|
||||
severe regression that can lead to fatal metadata corruption.**
|
||||
This issue is fixed in kernel 5.4.14 and later.
|
||||
|
||||
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
|
||||
with recent LTS and -stable updates.** The latest released kernel as
|
||||
of this writing is 5.18.18.
|
||||
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, 5.15,
|
||||
6.0, or 6.1, with recent LTS and -stable updates.** The latest released
|
||||
kernel as of this writing is 6.4.1.
|
||||
|
||||
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
|
||||
some issues. Older kernels will be slower (a little slower or a lot
|
||||
slower depending on which issues are triggered). Not all fixes are
|
||||
backported.
|
||||
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with some
|
||||
issues. Older kernels will be slower (a little slower or a lot slower
|
||||
depending on which issues are triggered). Not all fixes are backported.
|
||||
|
||||
Obsolete non-LTS kernels have a variety of unfixed issues and should
|
||||
not be used with btrfs. For details see the table below.
|
||||
|
||||
bees requires btrfs kernel API version 4.2 or higher, and does not work
|
||||
on older kernels.
|
||||
at all on older kernels.
|
||||
|
||||
bees will detect and use btrfs kernel API up to version 4.15 if present.
|
||||
In some future bees release, this API version may become mandatory.
|
||||
Some bees features rely on kernel 4.15 to work, and these features will
|
||||
not be available on older kernels. Currently, bees is still usable on
|
||||
older kernels with degraded performance or with options disabled, but
|
||||
support for older kernels may be removed.
|
||||
|
||||
|
||||
|
||||
@@ -58,14 +59,17 @@ These bugs are particularly popular among bees users, though not all are specifi
|
||||
| - | 5.8 | deadlock in `TREE_SEARCH` ioctl (core component of bees filesystem scanner), followed by regression in deadlock fix | 4.4.237, 4.9.237, 4.14.199, 4.19.146, 5.4.66, 5.8.10 and later | a48b73eca4ce btrfs: fix potential deadlock in the search ioctl, 1c78544eaa46 btrfs: fix wrong address when faulting in pages in the search ioctl
|
||||
| 5.7 | 5.10 | kernel crash if balance receives fatal signal e.g. Ctrl-C | 5.4.93, 5.10.11, 5.11 and later | 18d3bff411c8 btrfs: don't get an EINTR during drop_snapshot for reloc
|
||||
| 5.10 | 5.10 | 20x write performance regression | 5.10.8, 5.11 and later | e076ab2a2ca7 btrfs: shrink delalloc pages instead of full inodes
|
||||
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
||||
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.4.125, 5.10.43, 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
||||
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
||||
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
||||
| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
|
||||
| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
|
||||
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
||||
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl | - | workaround: reduce bees thread count to 1 with `-c1`
|
||||
| 5.12 | 6.0 | space cache corruption and potential double allocations | 5.15.65, 5.19.6, 6.0 and later | ced8ecf026fd btrfs: fix space cache corruption and potential double allocations
|
||||
| 6.3, backported to 5.15.107, 6.1.24, 6.2.11 | 6.3 | vmalloc error, failed to allocate pages | 6.3.10, 6.4 and later. Bug (f349b15e183d "mm: vmalloc: avoid warn_alloc noise caused by fatal signal" in v6.3-rc6) backported to 6.1.24, 6.2.11, and 5.15.107. | 95a301eefa82 mm/vmalloc: do not output a spurious warning when huge vmalloc() fails
|
||||
| 6.2 | 6.3 | `IGNORE_OFFSET` flag ignored in `LOGICAL_INO` ioctl | 6.2.16, 6.3.3, 6.4 and later | 0cad8f14d70c btrfs: fix backref walking not returning all inode refs
|
||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl on the same extent | - | workaround: avoid doing that
|
||||
|
||||
"Last bad kernel" refers to that version's last stable update from
|
||||
kernel.org. Distro kernels may backport additional fixes. Consult
|
||||
@@ -80,21 +84,45 @@ through 5.4.13 inclusive.
|
||||
A "-" for "first bad kernel" indicates the bug has been present since
|
||||
the relevant feature first appeared in btrfs.
|
||||
|
||||
A "-" for "last bad kernel" indicates the bug has not yet been fixed as
|
||||
of 5.18.18.
|
||||
A "-" for "last bad kernel" indicates the bug has not yet been fixed in
|
||||
current kernels (see top of this page for which kernel version that is).
|
||||
|
||||
In cases where issues are fixed by commits spread out over multiple
|
||||
kernel versions, "fixed kernel version" refers to the version that
|
||||
contains all components of the fix.
|
||||
contains the last committed component of the fix.
|
||||
|
||||
|
||||
Workarounds for known kernel bugs
|
||||
---------------------------------
|
||||
|
||||
* **Hangs with high worker thread counts**: On kernels newer than
|
||||
5.4, multiple threads running `LOGICAL_INO` and dedupe ioctls
|
||||
at the same time can lead to a kernel hang. The workaround is
|
||||
to reduce the thread count to 1 with `-c1`.
|
||||
* **Hangs with concurrent `LOGICAL_INO` and dedupe**: on all
|
||||
kernel versions so far, multiple threads running `LOGICAL_INO`
|
||||
and dedupe ioctls at the same time on the same inodes or extents
|
||||
can lead to a kernel hang. The kernel enters an infinite loop in
|
||||
`add_all_parents`, where `count` is 0, `ref->count` is 1, and
|
||||
`btrfs_next_item` or `btrfs_next_old_item` never find a matching ref).
|
||||
|
||||
bees has two workarounds for this bug: 1. schedule work so that multiple
|
||||
threads do not simultaneously access the same inode or the same extent,
|
||||
and 2. use a brute-force global lock within bees that prevents any
|
||||
thread from running `LOGICAL_INO` while any other thread is running
|
||||
dedupe.
|
||||
|
||||
Workaround #1 isn't really a workaround, since we want to do the same
|
||||
thing for unrelated performance reasons. If multiple threads try to
|
||||
perform dedupe operations on the same extent or inode, btrfs will make
|
||||
all the threads wait for the same locks anyway, so it's better to have
|
||||
bees find some other inode or extent to work on while waiting for btrfs
|
||||
to finish.
|
||||
|
||||
Workaround #2 doesn't seem to be needed after implementing workaround
|
||||
#1, but it's better to be slightly slower than to hang one CPU core
|
||||
and the filesystem until the kernel is rebooted.
|
||||
|
||||
It is still theoretically possible to trigger the kernel bug when
|
||||
running bees at the same time as other dedupers, or other programs
|
||||
that use `LOGICAL_INO` like `btdu`; however, it's extremely difficult
|
||||
to reproduce the bug without closely cooperating threads.
|
||||
|
||||
* **Slow backrefs** (aka toxic extents): Under certain conditions,
|
||||
if the number of references to a single shared extent grows too
|
||||
@@ -110,8 +138,8 @@ Workarounds for known kernel bugs
|
||||
at this time of writing only bees has a workaround for this bug.
|
||||
|
||||
This workaround is less necessary for kernels 5.4.96, 5.7 and later,
|
||||
though it can still take 2 ms of CPU to resolve each extent ref on a
|
||||
fast machine on a large, heavily fragmented file.
|
||||
though the bees workaround can still be triggered on newer kernels
|
||||
by changes in btrfs since kernel version 5.1.
|
||||
|
||||
* **dedupe breaks `btrfs send` in old kernels**. The bees option
|
||||
`--workaround-btrfs-send` prevents any modification of read-only subvols
|
||||
@@ -127,8 +155,6 @@ Workarounds for known kernel bugs
|
||||
Unfixed kernel bugs
|
||||
-------------------
|
||||
|
||||
As of 5.18.18:
|
||||
|
||||
* **The kernel does not permit `btrfs send` and dedupe to run at the
|
||||
same time**. Recent kernels no longer crash, but now refuse one
|
||||
operation with an error if the other operation was already running.
|
||||
|
@@ -8,44 +8,35 @@ bees has been tested in combination with the following:
|
||||
* HOLE extents and btrfs no-holes feature
|
||||
* Other deduplicators, reflink copies (though bees may decide to redo their work)
|
||||
* btrfs snapshots and non-snapshot subvols (RW and RO)
|
||||
* Concurrent file modification (e.g. PostgreSQL and sqlite databases, build daemons)
|
||||
* all btrfs RAID profiles
|
||||
* Concurrent file modification (e.g. PostgreSQL and sqlite databases, VMs, build daemons)
|
||||
* All btrfs RAID profiles
|
||||
* IO errors during dedupe (read errors will throw exceptions, bees will catch them and skip over the affected extent)
|
||||
* Filesystems mounted *with* the flushoncommit option ([lots of harmless kernel log warnings on 4.15 and later](btrfs-kernel.md))
|
||||
* Filesystems mounted *without* the flushoncommit option
|
||||
* Filesystems mounted with or without the `flushoncommit` option
|
||||
* 4K filesystem data block size / clone alignment
|
||||
* 64-bit and 32-bit LE host CPUs (amd64, x86, arm)
|
||||
* Huge files (>1TB--although Btrfs performance on such files isn't great in general)
|
||||
* filesystems up to 30T+ bytes, 100M+ files
|
||||
* Large files (kernel 5.4 or later strongly recommended)
|
||||
* Filesystems up to 90T+ bytes, 1000M+ files
|
||||
* btrfs receive
|
||||
* btrfs nodatacow/nodatasum inode attribute or mount option (bees skips all nodatasum files)
|
||||
* open(O_DIRECT) (seems to work as well--or as poorly--with bees as with any other btrfs feature)
|
||||
* lvmcache: no problems observed in testing with recent kernels or reported by users in the last year.
|
||||
* lvm dm-cache, writecache
|
||||
|
||||
Bad Btrfs Feature Interactions
|
||||
------------------------------
|
||||
|
||||
bees has been tested in combination with the following, and various problems are known:
|
||||
|
||||
* bcache: no data-losing problems observed in testing with recent kernels
|
||||
or reported by users in the last year. Some issues observed with
|
||||
bcache interacting badly with some SSD models' firmware, but so far
|
||||
this only causes temporary loss of service, not filesystem damage.
|
||||
This behavior does not seem to be specific to bees (ordinary filesystem
|
||||
tests with rsync and snapshots will reproduce it), but it does prevent
|
||||
any significant testing of bees on bcache.
|
||||
|
||||
* btrfs send: there are bugs in `btrfs send` that can be triggered by bees.
|
||||
The [`--workaround-btrfs-send` option](options.md) works around this issue
|
||||
by preventing bees from modifying read-only snapshots.
|
||||
* btrfs send: there are bugs in `btrfs send` that can be triggered by
|
||||
bees on old kernels. The [`--workaround-btrfs-send` option](options.md)
|
||||
works around this issue by preventing bees from modifying read-only
|
||||
snapshots.
|
||||
|
||||
* btrfs qgroups: very slow, sometimes hangs...and it's even worse when
|
||||
bees is running.
|
||||
|
||||
* btrfs autodefrag mount option: hangs and high CPU usage problems
|
||||
reported by users. bees cannot distinguish autodefrag activity from
|
||||
normal filesystem activity and will likely try to undo the autodefrag
|
||||
if duplicate copies of the defragmented data exist.
|
||||
* btrfs autodefrag mount option: bees cannot distinguish autodefrag
|
||||
activity from normal filesystem activity, and may try to undo the
|
||||
autodefrag if duplicate copies of the defragmented data exist.
|
||||
|
||||
Untested Btrfs Feature Interactions
|
||||
-----------------------------------
|
||||
@@ -54,9 +45,10 @@ bees has not been tested with the following, and undesirable interactions may oc
|
||||
|
||||
* Non-4K filesystem data block size (should work if recompiled)
|
||||
* Non-equal hash (SUM) and filesystem data block (CLONE) sizes (need to fix that eventually)
|
||||
* btrfs seed filesystems (does anyone even use those?)
|
||||
* btrfs out-of-tree kernel patches (e.g. in-kernel dedupe or encryption)
|
||||
* btrfs seed filesystems (no particular reason it wouldn't work, but no one has reported trying)
|
||||
* btrfs out-of-tree kernel patches (e.g. in-kernel dedupe, encryption, extent tree v2)
|
||||
* btrfs-convert from ext2/3/4 (never tested, might run out of space or ignore significant portions of the filesystem due to sanity checks)
|
||||
* btrfs mixed block groups (don't know a reason why it would *not* work, but never tested)
|
||||
* flashcache: an out-of-tree cache-HDD-on-SSD block layer helper.
|
||||
* Host CPUs with exotic page sizes, alignment requirements, or endianness (ppc, alpha, sparc, strongarm, s390, mips, m68k...)
|
||||
* bcache: used to be in the "bad" list, now in the "untested" list because nobody is rigorously testing, and bcache bugs come and go
|
||||
* flashcache: an out-of-tree cache-HDD-on-SSD block layer helper
|
||||
|
@@ -8,9 +8,10 @@ are reasonable in most cases.
|
||||
Hash Table Sizing
|
||||
-----------------
|
||||
|
||||
Hash table entries are 16 bytes per data block. The hash table stores
|
||||
the most recently read unique hashes. Once the hash table is full,
|
||||
each new entry in the table evicts an old entry.
|
||||
Hash table entries are 16 bytes per data block. The hash table stores the
|
||||
most recently read unique hashes. Once the hash table is full, each new
|
||||
entry added to the table evicts an old entry. This makes the hash table
|
||||
a sliding window over the most recently scanned data from the filesystem.
|
||||
|
||||
Here are some numbers to estimate appropriate hash table sizes:
|
||||
|
||||
@@ -25,9 +26,11 @@ Here are some numbers to estimate appropriate hash table sizes:
|
||||
Notes:
|
||||
|
||||
* If the hash table is too large, no extra dedupe efficiency is
|
||||
obtained, and the extra space just wastes RAM. Extra space can also slow
|
||||
bees down by preventing old data from being evicted, so bees wastes time
|
||||
looking for matching data that is no longer present on the filesystem.
|
||||
obtained, and the extra space wastes RAM. If the hash table contains
|
||||
more block records than there are blocks in the filesystem, the extra
|
||||
space can slow bees down. A table that is too large prevents obsolete
|
||||
data from being evicted, so bees wastes time looking for matching data
|
||||
that is no longer present on the filesystem.
|
||||
|
||||
* If the hash table is too small, bees extrapolates from matching
|
||||
blocks to find matching adjacent blocks in the filesystem that have been
|
||||
@@ -36,6 +39,10 @@ one block in common between two extents in order to be able to dedupe
|
||||
the entire extents. This provides significantly more dedupe hit rate
|
||||
per hash table byte than other dedupe tools.
|
||||
|
||||
* There is a fairly wide range of usable hash sizes, and performances
|
||||
degrades according to a smooth probabilistic curve in both directions.
|
||||
Double or half the optimium size usually works just as well.
|
||||
|
||||
* When counting unique data in compressed data blocks to estimate
|
||||
optimum hash table size, count the *uncompressed* size of the data.
|
||||
|
||||
@@ -66,11 +73,11 @@ data on an uncompressed filesystem. Dedupe efficiency falls dramatically
|
||||
with hash tables smaller than 128MB/TB as the average dedupe extent size
|
||||
is larger than the largest possible compressed extent size (128KB).
|
||||
|
||||
* **Short writes** also shorten the average extent length and increase
|
||||
optimum hash table size. If a database writes to files randomly using
|
||||
4K page writes, all of these extents will be 4K in length, and the hash
|
||||
table size must be increased to retain each one (or the user must accept
|
||||
a lower dedupe hit rate).
|
||||
* **Short writes or fragmentation** also shorten the average extent
|
||||
length and increase optimum hash table size. If a database writes to
|
||||
files randomly using 4K page writes, all of these extents will be 4K
|
||||
in length, and the hash table size must be increased to retain each one
|
||||
(or the user must accept a lower dedupe hit rate).
|
||||
|
||||
Defragmenting files that have had many short writes increases the
|
||||
extent length and therefore reduces the optimum hash table size.
|
||||
|
@@ -296,6 +296,7 @@ resolve
|
||||
|
||||
The `resolve` event group consists of operations related to translating a btrfs virtual block address (i.e. physical block address) to a `(root, inode, offset)` tuple (i.e. locating and opening the file containing a matching block). `resolve` is the top level, `chase` and `adjust` are the lower two levels.
|
||||
|
||||
* `resolve_empty`: The `LOGICAL_INO` ioctl returned successfully with an empty reference list (0 items).
|
||||
* `resolve_fail`: The `LOGICAL_INO` ioctl returned an error.
|
||||
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
||||
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
||||
|
@@ -51,81 +51,40 @@ loops early. The exception text in this case is:
|
||||
Terminating bees with SIGTERM
|
||||
-----------------------------
|
||||
|
||||
bees is designed to survive host crashes, so it is safe to terminate
|
||||
bees using SIGKILL; however, when bees next starts up, it will repeat
|
||||
some work that was performed between the last bees crawl state save point
|
||||
and the SIGKILL (up to 15 minutes). If bees is stopped and started less
|
||||
than once per day, then this is not a problem as the proportional impact
|
||||
is quite small; however, users who stop and start bees daily or even
|
||||
more often may prefer to have a clean shutdown with SIGTERM so bees can
|
||||
restart faster.
|
||||
bees is designed to survive host crashes, so it is safe to terminate bees
|
||||
using SIGKILL; however, when bees next starts up, it will repeat some
|
||||
work that was performed between the last bees crawl state save point
|
||||
and the SIGKILL (up to 15 minutes), and a large hash table may not be
|
||||
completely written back to disk, so some duplicate matches will be lost.
|
||||
|
||||
bees handling of SIGTERM can take a long time on machines with some or
|
||||
all of:
|
||||
If bees is stopped and started less than once per week, then this is not
|
||||
a problem as the proportional impact is quite small; however, users who
|
||||
stop and start bees daily or even more often may prefer to have a clean
|
||||
shutdown with SIGTERM so bees can restart faster.
|
||||
|
||||
* Large RAM and `vm.dirty_ratio`
|
||||
* Large number of active bees worker threads
|
||||
* Large number of bees temporary files (proportional to thread count)
|
||||
* Large hash table size
|
||||
* Large filesystem size
|
||||
* High IO latency, especially "low power" spinning disks
|
||||
* High filesystem activity, especially duplicate data writes
|
||||
The shutdown procedure performs these steps:
|
||||
|
||||
Each of these factors individually increases the total time required
|
||||
to perform a clean bees shutdown. When combined, the factors can
|
||||
multiply with each other, dramatically increasing the time required to
|
||||
flush bees state to disk.
|
||||
|
||||
On a large system with many of the above factors present, a "clean"
|
||||
bees shutdown can take more than 20 minutes. Even a small machine
|
||||
(16GB RAM, 1GB hash table, 1TB NVME disk) can take several seconds to
|
||||
complete a SIGTERM shutdown.
|
||||
|
||||
The shutdown procedure performs potentially long-running tasks in
|
||||
this order:
|
||||
|
||||
1. Worker threads finish executing their current Task and exit.
|
||||
Threads executing `LOGICAL_INO` ioctl calls usually finish quickly,
|
||||
but btrfs imposes no limit on the ioctl's running time, so it
|
||||
can take several minutes in rare bad cases. If there is a btrfs
|
||||
commit already in progress on the filesystem, then most worker
|
||||
threads will be blocked until the btrfs commit is finished.
|
||||
|
||||
2. Crawl state is saved to `$BEESHOME`. This normally completes
|
||||
relatively quickly (a few seconds at most). This is the most
|
||||
1. Crawl state is saved to `$BEESHOME`. This is the most
|
||||
important bees state to save to disk as it directly impacts
|
||||
restart time, so it is done as early as possible (but no earlier).
|
||||
restart time, so it is done as early as possible
|
||||
|
||||
3. Hash table is written to disk. Normally the hash table is
|
||||
trickled back to disk at a rate of about 2GB per hour;
|
||||
2. Hash table is written to disk. Normally the hash table is
|
||||
trickled back to disk at a rate of about 128KiB per second;
|
||||
however, SIGTERM causes bees to attempt to flush the whole table
|
||||
immediately. If bees has recently been idle then the hash table is
|
||||
likely already flushed to disk, so this step will finish quickly;
|
||||
however, if bees has recently been active and the hash table is
|
||||
large relative to RAM size, the blast of rapidly written data
|
||||
can force the Linux VFS to block all writes to the filesystem
|
||||
for sufficient time to complete all pending btrfs metadata
|
||||
writes which accumulated during the btrfs commit before bees
|
||||
received SIGTERM...and _then_ let bees write out the hash table.
|
||||
The time spent here depends on the size of RAM, speed of disks,
|
||||
and aggressiveness of competing filesystem workloads.
|
||||
immediately. The time spent here depends on the size of RAM, speed
|
||||
of disks, and aggressiveness of competing filesystem workloads.
|
||||
It can trigger `vm.dirty_bytes` limits and block other processes
|
||||
writing to the filesystem for a while.
|
||||
|
||||
4. bees temporary files are closed, which implies deletion of their
|
||||
inodes. These are files which consist entirely of shared extent
|
||||
structures, and btrfs takes an unusually long time to delete such
|
||||
files (up to a few minutes for each on slow spinning disks).
|
||||
3. The bees process calls `_exit`, which terminates all running
|
||||
worker threads, closes and deletes all temporary files. This
|
||||
can take a while _after_ the bees process exits, especially on
|
||||
slow spinning disks.
|
||||
|
||||
If bees is terminated with SIGKILL, only step #1 and #4 are performed (the
|
||||
kernel performs these automatically if bees exits). This reduces the
|
||||
shutdown time at the cost of increased startup time.
|
||||
|
||||
Balances
|
||||
--------
|
||||
|
||||
First, read [`LOGICAL_INO` and btrfs balance WARNING](btrfs-kernel.md).
|
||||
bees will suspend operations during a btrfs balance to work around
|
||||
kernel bugs.
|
||||
|
||||
A btrfs balance relocates data on disk by making a new copy of the
|
||||
data, replacing all references to the old data with references to the
|
||||
new copy, and deleting the old copy. To bees, this is the same as any
|
||||
@@ -175,7 +134,9 @@ the beginning.
|
||||
|
||||
Each time bees dedupes an extent that is referenced by a snapshot,
|
||||
the entire metadata page in the snapshot subvol (16KB by default) must
|
||||
be CoWed in btrfs. This can result in a substantial increase in btrfs
|
||||
be CoWed in btrfs. Since all references must be removed at the same
|
||||
time, this CoW operation is repeated in every snapshot containing the
|
||||
duplicate data. This can result in a substantial increase in btrfs
|
||||
metadata size if there are many snapshots on a filesystem.
|
||||
|
||||
Normally, metadata is small (less than 1% of the filesystem) and dedupe
|
||||
@@ -252,17 +213,18 @@ Other Gotchas
|
||||
filesystem while `LOGICAL_INO` is running. Generally the CPU spends
|
||||
most of the runtime of the `LOGICAL_INO` ioctl running the kernel,
|
||||
so on a single-core CPU the entire system can freeze up for a second
|
||||
during operations on toxic extents.
|
||||
during operations on toxic extents. Note this only occurs on older
|
||||
kernels. See [the slow backrefs kernel bug section](btrfs-kernel.md).
|
||||
|
||||
* If a process holds a directory FD open, the subvol containing the
|
||||
directory cannot be deleted (`btrfs sub del` will start the deletion
|
||||
process, but it will not proceed past the first open directory FD).
|
||||
`btrfs-cleaner` will simply skip over the directory *and all of its
|
||||
children* until the FD is closed. bees avoids this gotcha by closing
|
||||
all of the FDs in its directory FD cache every 10 btrfs transactions.
|
||||
all of the FDs in its directory FD cache every btrfs transaction.
|
||||
|
||||
* If a file is deleted while bees is caching an open FD to the file,
|
||||
bees continues to scan the file. For very large files (e.g. VM
|
||||
images), the deletion of the file can be delayed indefinitely.
|
||||
To limit this delay, bees closes all FDs in its file FD cache every
|
||||
10 btrfs transactions.
|
||||
btrfs transaction.
|
||||
|
@@ -8,10 +8,12 @@ bees uses checkpoints for persistence to eliminate the IO overhead of a
|
||||
transactional data store. On restart, bees will dedupe any data that
|
||||
was added to the filesystem since the last checkpoint. Checkpoints
|
||||
occur every 15 minutes for scan progress, stored in `beescrawl.dat`.
|
||||
The hash table trickle-writes to disk at 4GB/hour to `beeshash.dat`.
|
||||
An hourly performance report is written to `beesstats.txt`. There are
|
||||
no special requirements for bees hash table storage--`.beeshome` could
|
||||
be stored on a different btrfs filesystem, ext4, or even CIFS.
|
||||
The hash table trickle-writes to disk at 128KiB/s to `beeshash.dat`,
|
||||
but will flush immediately if bees is terminated by SIGTERM.
|
||||
|
||||
There are no special requirements for bees hash table storage--`.beeshome`
|
||||
could be stored on a different btrfs filesystem, ext4, or even CIFS (but
|
||||
not MS-DOS--beeshome does need filenames longer than 8.3).
|
||||
|
||||
bees uses a persistent dedupe hash table with a fixed size configured
|
||||
by the user. Any size of hash table can be dedicated to dedupe. If a
|
||||
@@ -20,7 +22,7 @@ small as 128KB.
|
||||
|
||||
The bees hash table is loaded into RAM at startup and `mlock`ed so it
|
||||
will not be swapped out by the kernel (if swap is permitted, performance
|
||||
degrades to nearly zero).
|
||||
degrades to nearly zero, for both bees and the swap device).
|
||||
|
||||
bees scans the filesystem in a single pass which removes duplicate
|
||||
extents immediately after they are detected. There are no distinct
|
||||
@@ -83,12 +85,12 @@ of these functions in userspace, at the expense of encountering [some
|
||||
kernel bugs in `LOGICAL_INO` performance](btrfs-kernel.md).
|
||||
|
||||
bees uses only the data-safe `FILE_EXTENT_SAME` (aka `FIDEDUPERANGE`)
|
||||
kernel operations to manipulate user data, so it can dedupe live data
|
||||
(e.g. build servers, sqlite databases, VM disk images). It does not
|
||||
modify file attributes or timestamps.
|
||||
kernel ioctl to manipulate user data, so it can dedupe live data
|
||||
(e.g. build servers, sqlite databases, VM disk images). bees does not
|
||||
modify file attributes or timestamps in deduplicated files.
|
||||
|
||||
When bees has scanned all of the data, bees will pause until 10
|
||||
transactions have been completed in the btrfs filesystem. bees tracks
|
||||
When bees has scanned all of the data, bees will pause until a new
|
||||
transaction has completed in the btrfs filesystem. bees tracks
|
||||
the current btrfs transaction ID over time so that it polls less often
|
||||
on quiescent filesystems and more often on busy filesystems.
|
||||
|
||||
|
@@ -17,7 +17,6 @@ Strengths
|
||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||
* Daemon incrementally dedupes new data using btrfs tree search
|
||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](options.md)
|
||||
* Works around btrfs filesystem structure to free more disk space
|
||||
* Persistent hash table for rapid restart after shutdown
|
||||
* Whole-filesystem dedupe - including snapshots
|
||||
@@ -70,6 +69,6 @@ You can also use Github:
|
||||
Copyright & License
|
||||
-------------------
|
||||
|
||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
||||
Copyright 2015-2023 Zygo Blaxell <bees@furryterror.org>.
|
||||
|
||||
GPL (version 3 or later).
|
||||
|
@@ -4,7 +4,7 @@ Building bees
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
* C++11 compiler (tested with GCC 4.9, 6.3.0, 8.1.0)
|
||||
* C++11 compiler (tested with GCC 8.1.0, 12.2.0)
|
||||
|
||||
Sorry. I really like closures and shared_ptr, so support
|
||||
for earlier compiler versions is unlikely.
|
||||
@@ -19,7 +19,7 @@ Dependencies
|
||||
|
||||
* [Linux kernel version](btrfs-kernel.md) gets its own page.
|
||||
|
||||
* markdown for documentation
|
||||
* markdown to build the documentation
|
||||
|
||||
* util-linux version that provides `blkid` command for the helper
|
||||
script `scripts/beesd` to work
|
||||
|
@@ -2,8 +2,8 @@ Features You Might Expect That bees Doesn't Have
|
||||
------------------------------------------------
|
||||
|
||||
* There's no configuration file (patches welcome!). There are
|
||||
some tunables hardcoded in the source that could eventually become
|
||||
configuration options. There's also an incomplete option parser
|
||||
some tunables hardcoded in the source (`src/bees.h`) that could eventually
|
||||
become configuration options. There's also an incomplete option parser
|
||||
(patches welcome!).
|
||||
|
||||
* The bees process doesn't fork and writes its log to stdout/stderr.
|
||||
@@ -43,3 +43,6 @@ compression method or not compress the data (patches welcome!).
|
||||
* It is theoretically possible to resize the hash table without starting
|
||||
over with a new full-filesystem scan; however, this feature has not been
|
||||
implemented yet.
|
||||
|
||||
* btrfs maintains csums of data blocks which bees could use to improve
|
||||
scan speeds, but bees doesn't use them yet.
|
||||
|
@@ -69,9 +69,11 @@ namespace crucible {
|
||||
|
||||
uint64_t get_flags() const;
|
||||
void set_flags(uint64_t new_flags);
|
||||
void set_logical(uint64_t new_logical);
|
||||
void set_size(uint64_t new_size);
|
||||
|
||||
virtual void do_ioctl(int fd);
|
||||
virtual bool do_ioctl_nothrow(int fd);
|
||||
void do_ioctl(int fd);
|
||||
bool do_ioctl_nothrow(int fd);
|
||||
|
||||
struct BtrfsInodeOffsetRootSpan {
|
||||
using iterator = BtrfsInodeOffsetRoot*;
|
||||
|
@@ -4,13 +4,20 @@
|
||||
#include "crucible/error.h"
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
/// A class to track progress of multiple workers using only two points:
|
||||
/// the first and last incomplete state. The first incomplete
|
||||
/// state can be recorded as a checkpoint to resume later on.
|
||||
/// The last completed state is the starting point for workers that
|
||||
/// need something to do.
|
||||
template <class T>
|
||||
class ProgressTracker {
|
||||
struct ProgressTrackerState;
|
||||
@@ -19,8 +26,16 @@ namespace crucible {
|
||||
using value_type = T;
|
||||
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
||||
|
||||
/// Create ProgressTracker with initial begin and end state 'v'.
|
||||
ProgressTracker(const value_type &v);
|
||||
|
||||
/// The first incomplete state. This is not "sticky",
|
||||
/// it will revert to the end state if there are no
|
||||
/// items in progress.
|
||||
value_type begin() const;
|
||||
|
||||
/// The last incomplete state. This is "sticky",
|
||||
/// it can only increase and never decrease.
|
||||
value_type end() const;
|
||||
|
||||
ProgressHolder hold(const value_type &v);
|
||||
@@ -31,7 +46,7 @@ namespace crucible {
|
||||
struct ProgressTrackerState {
|
||||
using key_type = pair<value_type, ProgressHolderState *>;
|
||||
mutex m_mutex;
|
||||
map<key_type, bool> m_in_progress;
|
||||
set<key_type> m_in_progress;
|
||||
value_type m_begin;
|
||||
value_type m_end;
|
||||
};
|
||||
@@ -39,6 +54,7 @@ namespace crucible {
|
||||
class ProgressHolderState {
|
||||
shared_ptr<ProgressTrackerState> m_state;
|
||||
const value_type m_value;
|
||||
using key_type = typename ProgressTrackerState::key_type;
|
||||
public:
|
||||
ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v);
|
||||
~ProgressHolderState();
|
||||
@@ -86,7 +102,11 @@ namespace crucible {
|
||||
m_value(v)
|
||||
{
|
||||
unique_lock<mutex> lock(m_state->m_mutex);
|
||||
m_state->m_in_progress[make_pair(m_value, this)] = true;
|
||||
const auto rv = m_state->m_in_progress.insert(key_type(m_value, this));
|
||||
THROW_CHECK1(runtime_error, m_value, rv.second);
|
||||
// Set the beginning to the first existing in-progress item
|
||||
m_state->m_begin = m_state->m_in_progress.begin()->first;
|
||||
// If this value is past the end, move the end, but don't go backwards
|
||||
if (m_state->m_end < m_value) {
|
||||
m_state->m_end = m_value;
|
||||
}
|
||||
@@ -96,17 +116,15 @@ namespace crucible {
|
||||
ProgressTracker<T>::ProgressHolderState::~ProgressHolderState()
|
||||
{
|
||||
unique_lock<mutex> lock(m_state->m_mutex);
|
||||
m_state->m_in_progress[make_pair(m_value, this)] = false;
|
||||
auto p = m_state->m_in_progress.begin();
|
||||
while (p != m_state->m_in_progress.end()) {
|
||||
if (p->second) {
|
||||
break;
|
||||
}
|
||||
if (m_state->m_begin < p->first.first) {
|
||||
m_state->m_begin = p->first.first;
|
||||
}
|
||||
m_state->m_in_progress.erase(p);
|
||||
p = m_state->m_in_progress.begin();
|
||||
const auto rv = m_state->m_in_progress.erase(key_type(m_value, this));
|
||||
// THROW_CHECK2(runtime_error, m_value, rv, rv == 1);
|
||||
assert(rv == 1);
|
||||
if (m_state->m_in_progress.empty()) {
|
||||
// If we made the list empty, then m_begin == m_end
|
||||
m_state->m_begin = m_state->m_end;
|
||||
} else {
|
||||
// If we deleted the first element, then m_begin = current first element
|
||||
m_state->m_begin = m_state->m_in_progress.begin()->first;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -548,7 +548,7 @@ namespace crucible {
|
||||
#endif
|
||||
const uint64_t logical_end = logical + count * block_size();
|
||||
BtrfsTreeItem bti = rlower_bound(logical);
|
||||
size_t loops = 0;
|
||||
size_t __attribute__((unused)) loops = 0;
|
||||
BCTFGS_DEBUG("get_sums " << to_hex(logical) << ".." << to_hex(logical_end) << endl);
|
||||
while (!!bti) {
|
||||
BCTFGS_DEBUG("get_sums[" << loops << "]: " << bti << endl);
|
||||
|
12
lib/fs.cc
12
lib/fs.cc
@@ -315,6 +315,18 @@ namespace crucible {
|
||||
return m_flags;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsIoctlLogicalInoArgs::set_logical(uint64_t new_logical)
|
||||
{
|
||||
m_logical = new_logical;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsIoctlLogicalInoArgs::set_size(uint64_t new_size)
|
||||
{
|
||||
m_container_size = new_size;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
||||
{
|
||||
|
@@ -757,6 +757,15 @@ BeesResolveAddrResult::BeesResolveAddrResult()
|
||||
{
|
||||
}
|
||||
|
||||
shared_ptr<BtrfsIoctlLogicalInoArgs>
|
||||
BeesContext::logical_ino(const uint64_t logical, const bool all_refs)
|
||||
{
|
||||
const auto rv = m_logical_ino_pool();
|
||||
rv->set_logical(logical);
|
||||
rv->set_flags(all_refs ? BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET : 0);
|
||||
return rv;
|
||||
}
|
||||
|
||||
BeesResolveAddrResult
|
||||
BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
{
|
||||
@@ -768,7 +777,8 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
// transaction latency, competing threads, and freeze/SIGSTOP
|
||||
// pausing the bees process.
|
||||
|
||||
BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
|
||||
const auto log_ino_ptr = logical_ino(addr.get_physical_or_zero(), false);
|
||||
auto &log_ino = *log_ino_ptr;
|
||||
|
||||
// Time how long this takes
|
||||
Timer resolve_timer;
|
||||
@@ -811,6 +821,10 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
|
||||
// Avoid performance problems - pretend resolve failed if there are too many refs
|
||||
const size_t rv_count = log_ino.m_iors.size();
|
||||
if (!rv_count) {
|
||||
BEESLOGDEBUG("LOGICAL_INO returned 0 refs at " << to_hex(addr));
|
||||
BEESCOUNT(resolve_empty);
|
||||
}
|
||||
if (rv_count < BEES_MAX_EXTENT_REF_COUNT) {
|
||||
rv.m_biors = vector<BtrfsInodeOffsetRoot>(log_ino.m_iors.begin(), log_ino.m_iors.end());
|
||||
} else {
|
||||
@@ -822,7 +836,7 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
if (sys_usage_delta < BEES_TOXIC_SYS_DURATION) {
|
||||
rv.m_is_toxic = false;
|
||||
} else {
|
||||
BEESLOGNOTICE("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << rv_count);
|
||||
BEESLOGDEBUG("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << rv_count);
|
||||
BEESCOUNT(resolve_toxic);
|
||||
rv.m_is_toxic = true;
|
||||
}
|
||||
@@ -910,6 +924,9 @@ BeesContext::start()
|
||||
m_tmpfile_pool.generator([=]() -> shared_ptr<BeesTempFile> {
|
||||
return make_shared<BeesTempFile>(shared_from_this());
|
||||
});
|
||||
m_logical_ino_pool.generator([]() {
|
||||
return make_shared<BtrfsIoctlLogicalInoArgs>(0);
|
||||
});
|
||||
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
|
||||
catch_all([&](){
|
||||
btf->reset();
|
||||
|
@@ -500,7 +500,8 @@ BeesRoots::transid_max_nocache()
|
||||
|
||||
// We look for the root of the extent tree and read its transid.
|
||||
// Should run in O(1) time and be fairly reliable.
|
||||
const auto bti = m_root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID);
|
||||
BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
|
||||
const auto bti = root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID);
|
||||
BEESTRACE("extracting transid from " << bti);
|
||||
const auto rv = bti.transid();
|
||||
|
||||
@@ -514,7 +515,12 @@ BeesRoots::transid_max_nocache()
|
||||
uint64_t
|
||||
BeesRoots::transid_max()
|
||||
{
|
||||
return m_transid_re.count();
|
||||
const auto rv = m_transid_re.count();
|
||||
// transid must be greater than zero, or we did something very wrong
|
||||
THROW_CHECK1(runtime_error, rv, rv > 0);
|
||||
// transid must be less than max, or we did something very wrong
|
||||
THROW_CHECK1(runtime_error, rv, rv < numeric_limits<uint64_t>::max());
|
||||
return rv;
|
||||
}
|
||||
|
||||
struct BeesFileCrawl {
|
||||
@@ -927,7 +933,6 @@ BeesRoots::state_load()
|
||||
|
||||
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
|
||||
m_ctx(ctx),
|
||||
m_root_fetcher(ctx->root_fd()),
|
||||
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
|
||||
m_crawl_thread("crawl_transid"),
|
||||
m_writeback_thread("crawl_writeback")
|
||||
@@ -1101,7 +1106,8 @@ BeesRoots::is_root_ro(uint64_t root)
|
||||
|
||||
BEESTRACE("checking subvol flags on root " << root);
|
||||
|
||||
const auto item = m_root_fetcher.root(root);
|
||||
BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
|
||||
const auto item = root_fetcher.root(root);
|
||||
// If we can't access the subvol's root item...guess it's ro?
|
||||
if (!item || item.root_flags() & BTRFS_ROOT_SUBVOL_RDONLY) {
|
||||
return true;
|
||||
|
@@ -534,7 +534,6 @@ class BeesScanMode;
|
||||
class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
BtrfsRootFetcher m_root_fetcher;
|
||||
BeesStringFile m_crawl_state_file;
|
||||
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
|
||||
mutex m_mutex;
|
||||
@@ -715,6 +714,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
||||
shared_ptr<BeesHashTable> m_hash_table;
|
||||
shared_ptr<BeesRoots> m_roots;
|
||||
Pool<BeesTempFile> m_tmpfile_pool;
|
||||
Pool<BtrfsIoctlLogicalInoArgs> m_logical_ino_pool;
|
||||
|
||||
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
|
||||
|
||||
@@ -754,6 +754,8 @@ public:
|
||||
|
||||
bool scan_forward(const BeesFileRange &bfr);
|
||||
|
||||
shared_ptr<BtrfsIoctlLogicalInoArgs> logical_ino(uint64_t bytenr, bool all_refs);
|
||||
|
||||
bool is_root_ro(uint64_t root);
|
||||
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
|
||||
bool dedup(const BeesRangePair &brp);
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include "crucible/limits.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace crucible;
|
||||
|
||||
|
@@ -12,23 +12,49 @@ using namespace std;
|
||||
void
|
||||
test_progress()
|
||||
{
|
||||
// On create, begin == end == constructor argument
|
||||
ProgressTracker<uint64_t> pt(123);
|
||||
auto hold = pt.hold(234);
|
||||
auto hold2 = pt.hold(345);
|
||||
assert(pt.begin() == 123);
|
||||
assert(pt.end() == 345);
|
||||
auto hold3 = pt.hold(456);
|
||||
assert(pt.begin() == 123);
|
||||
assert(pt.end() == 456);
|
||||
hold2.reset();
|
||||
assert(pt.begin() == 123);
|
||||
assert(pt.end() == 456);
|
||||
hold.reset();
|
||||
assert(pt.end() == 123);
|
||||
|
||||
// Holding a position past the end increases the end (and moves begin to match)
|
||||
auto hold345 = pt.hold(345);
|
||||
assert(pt.begin() == 345);
|
||||
assert(pt.end() == 345);
|
||||
|
||||
// Holding a position before begin reduces begin, without changing end
|
||||
auto hold234 = pt.hold(234);
|
||||
assert(pt.begin() == 234);
|
||||
assert(pt.end() == 345);
|
||||
|
||||
// Holding a position past the end increases the end, without affecting begin
|
||||
auto hold456 = pt.hold(456);
|
||||
assert(pt.begin() == 234);
|
||||
assert(pt.end() == 456);
|
||||
hold3.reset();
|
||||
|
||||
// Releasing a position in the middle affects neither begin nor end
|
||||
hold345.reset();
|
||||
assert(pt.begin() == 234);
|
||||
assert(pt.end() == 456);
|
||||
|
||||
// Hold another position in the middle to test begin moving forward
|
||||
auto hold400 = pt.hold(400);
|
||||
|
||||
// Releasing a position at the beginning moves begin forward
|
||||
hold234.reset();
|
||||
assert(pt.begin() == 400);
|
||||
assert(pt.end() == 456);
|
||||
|
||||
// Releasing a position at the end doesn't move end backward
|
||||
hold456.reset();
|
||||
assert(pt.begin() == 400);
|
||||
assert(pt.end() == 456);
|
||||
|
||||
// Releasing a position in the middle doesn't move end backward but does move begin forward
|
||||
hold400.reset();
|
||||
assert(pt.begin() == 456);
|
||||
assert(pt.end() == 456);
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
|
@@ -28,7 +28,7 @@ static bool test_fails = false;
|
||||
|
||||
static
|
||||
void
|
||||
seeker_test(const vector<uint64_t> &vec, size_t target)
|
||||
seeker_test(const vector<uint64_t> &vec, uint64_t const target)
|
||||
{
|
||||
cerr << "Find " << target << " in {";
|
||||
for (auto i : vec) {
|
||||
@@ -42,7 +42,7 @@ seeker_test(const vector<uint64_t> &vec, size_t target)
|
||||
return seeker_finder(vec, lower, upper);
|
||||
});
|
||||
cerr << found;
|
||||
size_t my_found = 0;
|
||||
uint64_t my_found = 0;
|
||||
for (auto i : vec) {
|
||||
if (i <= target) {
|
||||
my_found = i;
|
||||
|
Reference in New Issue
Block a user