Fixes a bad grep pattern caused by dffd6e0

Fixes #233
Get rid of errors by using grep -E
2025-08-02 13:53:28 +02:00 · 2022-10-13 16:32:48 -04:00 · 2022-10-05 22:36:33 -03:00 · 2022-10-05 22:36:33 -03:00 · 2022-10-05 22:36:33 -03:00
57 changed files with 1812 additions and 3319 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,7 @@
 *.[ao]
 *.bak
-*.dep
 *.new
-*.tmp
+*.dep
 *.so*
 Doxyfile
 README.html
--- a/Defines.mk
+++ b/Defines.mk
@@ -2,7 +2,6 @@ MAKE += PREFIX=$(PREFIX) LIBEXEC_PREFIX=$(LIBEXEC_PREFIX) ETC_PREFIX=$(ETC_PREFI

 define TEMPLATE_COMPILER =
 sed $< >$@ \
-		-e's#@DESTDIR@#$(DESTDIR)#' \
 		-e's#@PREFIX@#$(PREFIX)#' \
 		-e's#@ETC_PREFIX@#$(ETC_PREFIX)#' \
 		-e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#'
--- a/9
+++ b/9
@@ -49,6 +49,11 @@ scripts/%: scripts/%.in

 scripts: scripts/beesd scripts/beesd@.service

+install_tools: ## Install support tools + libs
+install_tools: src
+	install -Dm755 bin/fiemap $(DESTDIR)$(PREFIX)/bin/fiemap
+	install -Dm755 bin/fiewalk $(DESTDIR)$(PREFIX)/sbin/fiewalk
+
 install_bees: ## Install bees + libs
 install_bees: src $(RUN_INSTALL_TESTS)
 	install -Dm755 bin/bees	$(DESTDIR)$(LIBEXEC_PREFIX)/bees
@@ -56,13 +61,13 @@ install_bees: src $(RUN_INSTALL_TESTS)
 install_scripts: ## Install scipts
 install_scripts: scripts
 	install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd
-	install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample
+	install -Dm644 scripts/beesd.conf.sample $(DESTDIR)/$(ETC_PREFIX)/bees/beesd.conf.sample
 ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
 	install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
 endif

 install: ## Install distribution
-install: install_bees install_scripts
+install: install_bees install_scripts $(OPTIONAL_INSTALL_TARGETS)

 help: ## Show help
 	@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Strengths
 * Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
 * Daemon incrementally dedupes new data using btrfs tree search
 * Works with btrfs compression - dedupe any combination of compressed and uncompressed files
- * **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](docs/options.md)
+ * **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](docs/options.md)
 * Works around btrfs filesystem structure to free more disk space
 * Persistent hash table for rapid restart after shutdown
 * Whole-filesystem dedupe - including snapshots
@@ -70,6 +70,6 @@ You can also use Github:
 Copyright & License
 -------------------

-Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
+Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.

 GPL (version 3 or later).
--- a/docs/btrfs-kernel.md
+++ b/docs/btrfs-kernel.md
@@ -9,7 +9,7 @@ This issue is fixed in kernel 5.4.14 and later.

 **Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
 with recent LTS and -stable updates.**  The latest released kernel as
-of this writing is 5.18.18.
+of this writing is 5.12.3.

 4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
 some issues.  Older kernels will be slower (a little slower or a lot
@@ -31,7 +31,7 @@ In some future bees release, this API version may become mandatory.
 Kernel Bug Tracking Table
 -------------------------

-These bugs are particularly popular among bees users, though not all are specifically relevant to bees:
+These bugs are particularly popular among bees users:

 | First bad kernel | Last bad kernel | Issue Description | Fixed Kernel Versions | Fix Commit
 | :---: | :---: | --- | :---: | ---
@@ -61,11 +61,7 @@ These bugs are particularly popular among bees users, though not all are specifi
 | 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
 | - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
 | - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
-| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
-| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
-| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
-| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
-| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl | - | workaround: reduce bees thread count to 1 with `-c1`
+| 4.15 | - | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | - | workaround:  comment out the `WARN_ON`

 "Last bad kernel" refers to that version's last stable update from
 kernel.org.  Distro kernels may backport additional fixes.  Consult
@@ -81,7 +77,7 @@ A "-" for "first bad kernel" indicates the bug has been present since
 the relevant feature first appeared in btrfs.

 A "-" for "last bad kernel" indicates the bug has not yet been fixed as
-of 5.18.18.
+of 5.8.14.

 In cases where issues are fixed by commits spread out over multiple
 kernel versions, "fixed kernel version" refers to the version that
@@ -91,10 +87,15 @@ contains all components of the fix.
 Workarounds for known kernel bugs
 ---------------------------------

-* **Hangs with high worker thread counts**:  On kernels newer than
-  5.4, multiple threads running `LOGICAL_INO` and dedupe ioctls
-  at the same time can lead to a kernel hang.  The workaround is
-  to reduce the thread count to 1 with `-c1`.
+* **Tree mod log issues**:  bees will detect that a btrfs balance is
+  running, and pause bees activity until the balance is done.  This avoids
+  running both the `LOGICAL_INO` ioctl and btrfs balance at the same time,
+  which avoids kernel crashes on old kernel versions.
+
+  The numbers for "tree mod log issue #" in the above table are arbitrary.
+  There are a lot of them, and they all behave fairly similarly.
+
+  This workaround is less necessary for kernels 5.4.19 and later.

 * **Slow backrefs** (aka toxic extents):  Under certain conditions,
  if the number of references to a single shared extent grows too
@@ -127,7 +128,7 @@ Workarounds for known kernel bugs
 Unfixed kernel bugs
 -------------------

-As of 5.18.18:
+As of 5.12.3:

 * **The kernel does not permit `btrfs send` and dedupe to run at the
  same time**.  Recent kernels no longer crash, but now refuse one
@@ -150,3 +151,22 @@ As of 5.18.18:
  still saves some IO.

  `btrfs receive` is not affected by this issue.
+
+* **Spurious warnings in `fs/fs-writeback.c`** on kernel 4.15 and later
+  when filesystem is mounted with `flushoncommit`.  These
+  seem to be harmless (there are other locks which prevent
+  concurrent umount of the filesystem), but the underlying
+  problems that trigger the `WARN_ON` are [not trivial to
+  fix](https://www.spinics.net/lists/linux-btrfs/msg87752.html).
+
+  The warnings can be especially voluminous when bees is running.
+
+  Workarounds:
+
+  1. mount with `-o noflushoncommit`
+  2. patch kernel to remove warning in `fs/fs-writeback.c`.
+
+  Note that using kernels 4.14 and earlier is *not* a viable workaround
+  for this issue, because kernels 4.14 and earlier will eventually
+  deadlock when a filesystem is mounted with `-o flushoncommit` (a single
+  commit fixes one bug and introduces the other).
--- a/docs/config.md
+++ b/docs/config.md
@@ -94,75 +94,38 @@ every time a new client machine's data is added to the server.
 Scanning modes for multiple subvols
 -----------------------------------

-The `--scan-mode` option affects how bees schedules worker threads
-between subvolumes.  Scan modes are an experimental feature and will
-likely be deprecated in favor of a better solution.
+The `--scan-mode` option affects how bees divides resources between
+subvolumes.  This is particularly relevant when there are snapshots,
+as there are tradeoffs to be made depending on how snapshots are used
+on the filesystem.

-Scan mode can be changed at any time by restarting bees with a different
-mode option.  Scan state tracking is the same for all of the currently
-implemented modes.  The difference between the modes is the order in
-which subvols are selected.
+Note that if a filesystem has only one subvolume (i.e. the root,
+subvol ID 5) then the `--scan-mode` option has no effect, as there is
+only one subvolume to scan.

-If a filesystem has only one subvolume with data in it, then the
-`--scan-mode` option has no effect.  In this case, there is only one
-subvolume to scan, so worker threads will all scan that one.
+The default mode is mode 0, "lockstep".  In this mode, each inode of each
+subvol is scanned at the same time, before moving to the next inode in
+each subvol.  This maximizes the likelihood that all of the references to
+a snapshot of a file are scanned at the same time, which takes advantage
+of VFS caching in the Linux kernel.  If snapshots are created very often,
+bees will not make very good progress as it constantly restarts the
+filesystem scan from the beginning each time a new snapshot is created.

-Within a subvol, there is a single optimal scan order:  files are scanned
-in ascending numerical inode order.  Each worker will scan a different
-inode to avoid having the threads contend with each other for locks.
-File data is read sequentially and in order, but old blocks from earlier
-scans are skipped.
+Scan mode 1, "independent", simply scans every subvol independently
+in parallel.  Each subvol's scanner shares time equally with all other
+subvol scanners.  Whenever a new subvol appears, a new scanner is
+created and the new subvol scanner doesn't affect the behavior of any
+existing subvol scanner.

-Between subvols, there are several scheduling algorithms with different
-trade-offs:
-
-Scan mode 0, "lockstep", scans the same inode number in each subvol at
-close to the same time.  This is useful if the subvols are snapshots
-with a common ancestor, since the same inode number in each subvol will
-have similar or identical contents.  This maximizes the likelihood
-that all of the references to a snapshot of a file are scanned at
-close to the same time, improving dedupe hit rate and possibly taking
-advantage of VFS caching in the Linux kernel.  If the subvols are
-unrelated (i.e. not snapshots of a single subvol) then this mode does
-not provide significant benefit over random selection.  This mode uses
-smaller amounts of temporary space for shorter periods of time when most
-subvols are snapshots.  When a new snapshot is created, this mode will
-stop scanning other subvols and scan the new snapshot until the same
-inode number is reached in each subvol, which will effectively stop
-dedupe temporarily as this data has already been scanned and deduped
-in the other snapshots.
-
-Scan mode 1, "independent", scans the next inode with new data in each
-subvol.  Each subvol's scanner shares inodes uniformly with all other
-subvol scanners until the subvol has no new inodes left.  This mode makes
-continuous forward progress across the filesystem and provides average
-performance across a variety of workloads, but is slow to respond to new
-data, and may spend a lot of time deduping short-lived subvols that will
-soon be deleted when it is preferable to dedupe long-lived subvols that
-will be the origin of future snapshots.  When a new snapshot is created,
-previous subvol scans continue as before, but the time is now divided
-among one more subvol.
-
-Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
-ID order, processing each subvol completely before proceeding to the
-next subvol.  This avoids spending time scanning short-lived snapshots
-that will be deleted before they can be fully deduped (e.g. those used
-for `btrfs send`).  Scanning is concentrated on older subvols that are
-more likely to be origin subvols for future snapshots, eliminating the
-need to dedupe future snapshots separately.  This mode uses the largest
-amount of temporary space for the longest time, and typically requires
-a larger hash table to maintain dedupe hit rate.
-
-Scan mode 3, "recent", scans the subvols with the highest `min_transid`
-value first (i.e. the ones that were most recently completely scanned),
-then falls back to "independent" mode to break ties.  This interrupts
-long scans of old subvols to give a rapid dedupe response to new data,
-then returns to the old subvols after the new data is scanned.  It is
-useful for large filesystems with multiple active subvols and rotating
-snapshots, where the first-pass scan can take months, but new duplicate
-data appears every day.
-
-The default scan mode is 1, "independent".
+Scan mode 2, "sequential", processes each subvol completely before
+proceeding to the next subvol.  This is a good mode when using bees for
+the first time on a filesystem that already has many existing snapshots
+and a high rate of new snapshot creation.  Short-lived snapshots
+(e.g. those used for `btrfs send`) are effectively ignored, and bees
+directs its efforts toward older subvols that are more likely to be
+origin subvols for snapshots.  By deduping origin subvols first, bees
+ensures that future snapshots will already be deduplicated and do not
+need to be deduplicated again.

 If you are using bees for the first time on a filesystem with many
 existing snapshots, you should read about [snapshot gotchas](gotchas.md).
--- a/docs/event-counters.md
+++ b/docs/event-counters.md
@@ -67,12 +67,11 @@ The `adjust` event group consists of operations related to translating stored vi
 * `adjust_exact`: A block address from the hash table corresponding to an uncompressed data block was processed to find its `(root, inode, offset)` references.
 * `adjust_exact_correct`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches another block bees has already read.
 * `adjust_exact_wrong`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches the hash but not the data from another block bees has already read (i.e. there was a hash collision).
- * `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block in an uncompressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
+ * `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
 * `adjust_miss`: A block address was retrieved from the hash table and resolved to a physical block containing a hash that does not match the hash from another block bees has already read (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
 * `adjust_needle_too_long`: A block address was retrieved from the hash table, but when the corresponding extent item was retrieved, its offset or length were out of range to be a match (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
 * `adjust_no_match`: A hash collision occurred (i.e. a block on disk was located with the same hash as the hash table entry but different data) .  Effectively an alias for `hash_collision` as it is not possible to have one event without the other.
 * `adjust_offset_high`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item ends before the desired block in the extent data.
- * `adjust_offset_hit`: A block address was retrieved from the hash table and resolved to a physical block in a compressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
 * `adjust_offset_low`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item begins after the desired block in the extent data.
 * `adjust_try`: A block address and extent item candidate were passed to `BeesResolver::adjust_offset` for processing.

@@ -118,7 +117,6 @@ crawl

 The `crawl` event group consists of operations related to scanning btrfs trees to find new extent refs to scan for dedupe.

- * `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
 * `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
 * `crawl_create`: A new subvol crawler was created.
 * `crawl_done`: One pass over all subvols on the filesystem was completed.
@@ -134,6 +132,7 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
 * `crawl_nondata`: An item in the search results is not data.
 * `crawl_prealloc`: An extent item in the search results refers to a `PREALLOC` extent.
 * `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
+ * `crawl_restart`: A subvol crawl was restarted with a new `min_transid..max_transid` range.
 * `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
 * `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
 * `crawl_unknown`: An extent item in the search results has an unrecognized type.
@@ -300,7 +299,6 @@ The `resolve` event group consists of operations related to translating a btrfs
 * `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
 * `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
 * `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
- * `resolve_overflow`: The `LOGICAL_INO` ioctl returned more than 655050 extents (the limit of the v2 ioctl).
 * `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.

 root
@@ -335,7 +333,6 @@ The `scan` event group consists of operations related to scanning incoming data.
 * `scan_eof`: Scan past EOF was attempted.
 * `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
 * `scan_extent`: An extent was scanned (`scan_one_extent`).
- * `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned.  No action is currently taken for these--they are merely counted.
 * `scan_forward`: A logical byte range was scanned (`scan_forward`).
 * `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
 * `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
@@ -363,8 +360,6 @@ scanf

 The `scanf` event group consists of operations related to `BeesContext::scan_forward`.  This is the entry point where `crawl` schedules new data for scanning.

- * `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
- * `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
 * `scanf_extent`: A btrfs extent item was scanned.
 * `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
 * `scanf_total`: A logical byte range of a file was scanned.
--- a/docs/gotchas.md
+++ b/docs/gotchas.md
@@ -45,7 +45,7 @@ bees will loop billions of times considering all possibilities.  This is
 a waste of time, so an exception is currently used to break out of such
 loops early.  The exception text in this case is:

-	`FIXME: too many duplicate candidates, bailing out here`
+	`FIXME: bailing out here, need to fix this further up the call stack`


 Terminating bees with SIGTERM
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,7 +17,7 @@ Strengths
 * Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
 * Daemon incrementally dedupes new data using btrfs tree search
 * Works with btrfs compression - dedupe any combination of compressed and uncompressed files
- * **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](options.md)
+ * **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](options.md)
 * Works around btrfs filesystem structure to free more disk space
 * Persistent hash table for rapid restart after shutdown
 * Whole-filesystem dedupe - including snapshots
@@ -70,6 +70,6 @@ You can also use Github:
 Copyright & License
 -------------------

-Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
+Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.

 GPL (version 3 or later).
--- a/docs/install.md
+++ b/docs/install.md
@@ -80,7 +80,7 @@ within a temporary runtime directory.
 Packaging
 ---------

-See 'Dependencies' above. Package maintainers can pick ideas for building and
+See 'Dependencies' below. Package maintainers can pick ideas for building and
 configuring the source package from the Gentoo ebuild:

 <https://github.com/gentoo/gentoo/tree/master/sys-fs/bees>
--- a/docs/options.md
+++ b/docs/options.md
@@ -40,16 +40,16 @@

 * `--scan-mode MODE` or `-m`

- Specify extent scanning algorithm.
+ Specify extent scanning algorithm.  Default `MODE` is 0.
 **EXPERIMENTAL** feature that may go away.

-  * Mode 0: lockstep
-  * Mode 1: independent
-  * Mode 2: sequential
-  * Mode 3: recent
-
- For details of the different scanning modes and the default value of
- this option, see [bees configuration](config.md).
+  * Mode 0: scan extents in ascending order of (inode, subvol, offset).
+  Keeps shared extents between snapshots together.  Reads files sequentially.
+  Minimizes temporary space usage.
+  * Mode 1: scan extents from all subvols in parallel.  Good performance
+  on non-spinning media when subvols are unrelated.
+  * Mode 2: scan all extents from one subvol at a time.  Good sequential
+  read performance for spinning media.  Maximizes temporary space usage.

 ## Workarounds

--- a/docs/wrong.md
+++ b/docs/wrong.md
@@ -134,7 +134,7 @@ ulimit -c 0

 # If there were core files, generate reports for them
 for x in core*; do
-	if [ -e "$x" ]; then
+	if [ -e "$x" ]; then 
 		gdb --core="$x" \
 		--eval-command='set pagination off' \
 		--eval-command='info shared' \
--- a/include/crucible/btrfs-tree.h
+++ b/include/crucible/btrfs-tree.h
@@ -1,204 +0,0 @@
-#ifndef CRUCIBLE_BTRFS_TREE_H
-#define CRUCIBLE_BTRFS_TREE_H
-
-#include "crucible/fd.h"
-#include "crucible/fs.h"
-#include "crucible/bytevector.h"
-
-namespace crucible {
-	using namespace std;
-
-	class BtrfsTreeItem {
-		uint64_t m_objectid = 0;
-		uint64_t m_offset = 0;
-		uint64_t m_transid = 0;
-		ByteVector m_data;
-		uint8_t m_type = 0;
-	public:
-		uint64_t objectid() const { return m_objectid; }
-		uint64_t offset() const { return m_offset; }
-		uint64_t transid() const { return m_transid; }
-		uint8_t type() const { return m_type; }
-		const ByteVector data() const { return m_data; }
-		BtrfsTreeItem() = default;
-		BtrfsTreeItem(const BtrfsIoctlSearchHeader &bish);
-		BtrfsTreeItem& operator=(const BtrfsIoctlSearchHeader &bish);
-		bool operator!() const;
-
-		/// Member access methods.  Invoking a method on the
-		/// wrong type of item will throw an exception.
-
-		/// @{ Block group items
-		uint64_t block_group_flags() const;
-		uint64_t block_group_used() const;
-		/// @}
-
-		/// @{ Chunk items
-		uint64_t chunk_length() const;
-		uint64_t chunk_type() const;
-		/// @}
-
-		/// @{ Dev extent items (physical byte ranges)
-		uint64_t dev_extent_chunk_offset() const;
-		uint64_t dev_extent_length() const;
-		/// @}
-
-		/// @{ Dev items (devices)
-		uint64_t dev_item_total_bytes() const;
-		uint64_t dev_item_bytes_used() const;
-		/// @}
-
-		/// @{ Inode items
-		uint64_t inode_size() const;
-		/// @}
-
-		/// @{ Extent refs (EXTENT_DATA)
-		uint64_t file_extent_logical_bytes() const;
-		uint64_t file_extent_generation() const;
-		uint64_t file_extent_offset() const;
-		uint64_t file_extent_bytenr() const;
-		uint8_t file_extent_type() const;
-		btrfs_compression_type file_extent_compression() const;
-		/// @}
-
-		/// @{ Extent items (EXTENT_ITEM)
-		uint64_t extent_begin() const;
-		uint64_t extent_end() const;
-		uint64_t extent_generation() const;
-		/// @}
-
-		/// @{ Root items
-		uint64_t root_flags() const;
-		/// @}
-
-		/// @{ Root backref items.
-		uint64_t root_ref_dirid() const;
-		string root_ref_name() const;
-		uint64_t root_ref_parent_rootid() const;
-		/// @}
-	};
-
-	ostream &operator<<(ostream &os, const BtrfsTreeItem &bti);
-
-	class BtrfsTreeFetcher {
-	protected:
-		Fd m_fd;
-		BtrfsIoctlSearchKey m_sk;
-		uint64_t m_tree = 0;
-		uint64_t m_min_transid = 0;
-		uint64_t m_max_transid = numeric_limits<uint64_t>::max();
-		uint64_t m_block_size = 0;
-		uint64_t m_lookbehind_size = 0;
-		uint64_t m_scale_size = 0;
-		uint8_t m_type = 0;
-
-		uint64_t scale_logical(uint64_t logical) const;
-		uint64_t unscale_logical(uint64_t logical) const;
-		const static uint64_t s_max_logical = numeric_limits<uint64_t>::max();
-		uint64_t scaled_max_logical() const;
-
-		virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t object);
-		virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr);
-		virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) = 0;
-		virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) = 0;
-		virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) = 0;
-		Fd fd() const;
-		void fd(Fd fd);
-	public:
-		virtual ~BtrfsTreeFetcher() = default;
-		BtrfsTreeFetcher(Fd new_fd);
-		void type(uint8_t type);
-		void tree(uint64_t tree);
-		void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
-		/// Block size (sectorsize) of filesystem
-		uint64_t block_size() const;
-		/// Fetch last object < logical, null if not found
-		BtrfsTreeItem prev(uint64_t logical);
-		/// Fetch first object > logical, null if not found
-		BtrfsTreeItem next(uint64_t logical);
-		/// Fetch object at exactly logical, null if not found
-		BtrfsTreeItem at(uint64_t);
-		/// Fetch first object >= logical
-		BtrfsTreeItem lower_bound(uint64_t logical);
-		/// Fetch last object <= logical
-		BtrfsTreeItem rlower_bound(uint64_t logical);
-
-		/// Estimated distance between objects
-		virtual uint64_t lookbehind_size() const;
-		virtual void lookbehind_size(uint64_t);
-
-		/// Scale size (normally block size but must be set to 1 for fs trees)
-		uint64_t scale_size() const;
-		void scale_size(uint64_t);
-	};
-
-	class BtrfsTreeObjectFetcher : public BtrfsTreeFetcher {
-	protected:
-		virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t logical) override;
-		virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override;
-		virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override;
-		virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override;
-	public:
-		using BtrfsTreeFetcher::BtrfsTreeFetcher;
-	};
-
-	class BtrfsTreeOffsetFetcher : public BtrfsTreeFetcher {
-	protected:
-		uint64_t m_objectid = 0;
-		virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t offset) override;
-		virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override;
-		virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override;
-		virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override;
-	public:
-		using BtrfsTreeFetcher::BtrfsTreeFetcher;
-		void objectid(uint64_t objectid);
-		uint64_t objectid() const;
-	};
-
-	class BtrfsCsumTreeFetcher : public BtrfsTreeOffsetFetcher {
-	public:
-		const uint32_t BTRFS_CSUM_TYPE_UNKNOWN = uint32_t(1) << 16;
-	private:
-		size_t		m_sum_size = 0;
-		uint32_t	m_sum_type = BTRFS_CSUM_TYPE_UNKNOWN;
-	public:
-		BtrfsCsumTreeFetcher(const Fd &fd);
-
-		uint32_t sum_type() const;
-		size_t sum_size() const;
-		void get_sums(uint64_t logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t count)> output);
-	};
-
-	/// Fetch extent items from extent tree
-	class BtrfsExtentItemFetcher : public BtrfsTreeObjectFetcher {
-	public:
-		BtrfsExtentItemFetcher(const Fd &fd);
-	};
-
-	/// Fetch extent refs from an inode
-	class BtrfsExtentDataFetcher : public BtrfsTreeOffsetFetcher {
-	public:
-		BtrfsExtentDataFetcher(const Fd &fd);
-	};
-
-	/// Fetch inodes from a subvol
-	class BtrfsFsTreeFetcher : public BtrfsTreeObjectFetcher {
-	public:
-		BtrfsFsTreeFetcher(const Fd &fd, uint64_t subvol);
-	};
-
-	class BtrfsInodeFetcher : public BtrfsTreeObjectFetcher {
-	public:
-		BtrfsInodeFetcher(const Fd &fd);
-		BtrfsTreeItem stat(uint64_t subvol, uint64_t inode);
-	};
-
-	class BtrfsRootFetcher : public BtrfsTreeObjectFetcher {
-	public:
-		BtrfsRootFetcher(const Fd &fd);
-		BtrfsTreeItem root(uint64_t subvol);
-	};
-
-}
-
-#endif
--- a/include/crucible/btrfs.h
+++ b/include/crucible/btrfs.h
@@ -216,28 +216,7 @@ enum btrfs_compression_type {
 	#define BTRFS_FS_INFO_FLAG_CSUM_INFO                    (1 << 0)
 #endif

-#ifndef BTRFS_FS_INFO_FLAG_GENERATION
-/* Request information about filesystem generation */
-#define BTRFS_FS_INFO_FLAG_GENERATION                   (1 << 1)
-#endif
-
-#ifndef BTRFS_FS_INFO_FLAG_METADATA_UUID
-/* Request information about filesystem metadata UUID */
-#define BTRFS_FS_INFO_FLAG_METADATA_UUID                (1 << 2)
-#endif
-
-// BTRFS_CSUM_TYPE_CRC32 was a #define from 2008 to 2019.
-// After that, it's an enum with the other 3 types.
-// So if we do _not_ have CRC32 defined, it means we have the other 3;
-// if we _do_ have CRC32 defined, it means we need the other 3.
-// This seems likely to break some day.
-#ifdef BTRFS_CSUM_TYPE_CRC32
-	#define BTRFS_CSUM_TYPE_XXHASH 1
-	#define BTRFS_CSUM_TYPE_SHA256 2
-	#define BTRFS_CSUM_TYPE_BLAKE2 3
-#endif
-
-struct btrfs_ioctl_fs_info_args_v3 {
+struct btrfs_ioctl_fs_info_args_v2 {
 	__u64 max_id;                           /* out */
 	__u64 num_devices;                      /* out */
 	__u8 fsid[BTRFS_FSID_SIZE];             /* out */
@@ -248,9 +227,7 @@ struct btrfs_ioctl_fs_info_args_v3 {
 	__u16 csum_type;                        /* out */
 	__u16 csum_size;                        /* out */
 	__u64 flags;                            /* in/out */
-	__u64 generation;                       /* out */
-	__u8 metadata_uuid[BTRFS_FSID_SIZE];    /* out */
-	__u8 reserved[944];                     /* pad to 1k */
+	__u8 reserved[968];                     /* pad to 1k */
 };

 #endif // CRUCIBLE_BTRFS_H
--- a/include/crucible/bytevector.h
+++ b/include/crucible/bytevector.h
@@ -1,79 +0,0 @@
-#ifndef _CRUCIBLE_BYTEVECTOR_H_
-#define _CRUCIBLE_BYTEVECTOR_H_
-
-#include <crucible/error.h>
-
-#include <memory>
-#include <mutex>
-#include <ostream>
-
-#include <cstdint>
-#include <cstdlib>
-
-namespace crucible {
-	using namespace std;
-	// new[] is a little slower than malloc
-	// shared_ptr is about 2x slower than unique_ptr
-	// vector<uint8_t> is ~160x slower
-	// so we won't bother with unique_ptr because we can't do shared copies with it
-
-	class ByteVector {
-	public:
-		using Pointer = shared_ptr<uint8_t>;
-		using value_type = Pointer::element_type;
-		using iterator = value_type*;
-
-		ByteVector() = default;
-		ByteVector(const ByteVector &that);
-		ByteVector& operator=(const ByteVector &that);
-		ByteVector(size_t size);
-		ByteVector(const ByteVector &that, size_t start, size_t length);
-		ByteVector(iterator begin, iterator end, size_t min_size = 0);
-
-		ByteVector at(size_t start, size_t length) const;
-
-		value_type& at(size_t) const;
-		iterator begin() const;
-		void clear();
-		value_type* data() const;
-		bool empty() const;
-		iterator end() const;
-		value_type& operator[](size_t) const;
-		size_t size() const;
-		bool operator==(const ByteVector &that) const;
-
-		// this version of erase only works at the beginning or end of the buffer, else throws exception
-		void erase(iterator first);
-		void erase(iterator first, iterator last);
-
-		// An important use case is ioctls that have a fixed-size header struct
-		// followed by a buffer for further arguments.  These templates avoid
-		// doing reinterpret_casts every time.
-		template <class T> ByteVector(const T& object, size_t min_size);
-		template <class T> T* get() const;
-	private:
-		Pointer m_ptr;
-		size_t m_size = 0;
-		mutable mutex m_mutex;
-	friend ostream & operator<<(ostream &os, const ByteVector &bv);
-	};
-
-	template <class T>
-	ByteVector::ByteVector(const T& object, size_t min_size)
-	{
-		const auto size = max(min_size, sizeof(T));
-		m_ptr = Pointer(static_cast<value_type*>(malloc(size)), free);
-		memcpy(m_ptr.get(), &object, sizeof(T));
-		m_size = size;
-	}
-
-	template <class T>
-	T*
-	ByteVector::get() const
-	{
-		THROW_CHECK2(out_of_range, size(), sizeof(T), size() >= sizeof(T));
-		return reinterpret_cast<T*>(data());
-	}
-}
-
-#endif // _CRUCIBLE_BYTEVECTOR_H_
--- a/include/crucible/cache.h
+++ b/include/crucible/cache.h
@@ -30,7 +30,7 @@ namespace crucible {
 		map<Key, ListIter>	m_map;
 		LockSet<Key>		m_lockset;
 		size_t			m_max_size;
-		mutable mutex		m_mutex;
+		mutex			m_mutex;

 		void check_overflow();
 		void recent_use(ListIter vp);
@@ -48,7 +48,6 @@ namespace crucible {
 		void expire(Arguments... args);
 		void insert(const Return &r, Arguments... args);
 		void clear();
-		size_t size() const;
 	};

 	template <class Return, class... Arguments>
@@ -191,14 +190,6 @@ namespace crucible {
 		lock.unlock();
 	}

-	template <class Return, class... Arguments>
-	size_t
-	LRUCache<Return, Arguments...>::size() const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		return m_map.size();
-	}
-
 	template<class Return, class... Arguments>
 	Return
 	LRUCache<Return, Arguments...>::operator()(Arguments... args)
--- a/include/crucible/endian.h
+++ b/include/crucible/endian.h
@@ -28,7 +28,7 @@ namespace crucible {
 	};

 	template<> struct le_to_cpu_helper<uint16_t> {
-		uint16_t operator()(const uint16_t v) { return le16toh(v); }
+		uint16_t operator()(const uint16_t v) { return le64toh(v); }
 	};

 	template<> struct le_to_cpu_helper<uint8_t> {
--- a/include/crucible/error.h
+++ b/include/crucible/error.h
@@ -126,13 +126,6 @@ namespace crucible {
 	} \
 } while(0)

-#define THROW_CHECK4(type, value1, value2, value3, value4, expr) do { \
-	if (!(expr)) { \
-		THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) << ", " #value4 << " = " << (value4) \
-			<< " failed constraint check (" << #expr << ")"); \
-	} \
-} while(0)
-
 #define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \
 	if (!((value1) op (value2))) { \
 		THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \
--- a/include/crucible/extentwalker.h
+++ b/include/crucible/extentwalker.h
@@ -42,6 +42,9 @@ namespace crucible {
 		uint64_t bytenr() const;
 		bool operator==(const Extent &that) const;
 		bool operator!=(const Extent &that) const { return !(*this == that); }
+
+		Extent() = default;
+		Extent(const Extent &e) = default;
 	};

 	class ExtentWalker {
--- a/include/crucible/fd.h
+++ b/include/crucible/fd.h
@@ -1,7 +1,6 @@
 #ifndef CRUCIBLE_FD_H
 #define CRUCIBLE_FD_H

-#include "crucible/bytevector.h"
 #include "crucible/namedptr.h"

 #include <cstring>
@@ -27,9 +26,9 @@
 namespace crucible {
 	using namespace std;

-	/// File descriptor owner object.  It closes them when destroyed.
-	/// Most of the functions here don't use it because these functions don't own FDs.
-	/// All good names for such objects are taken.
+	// IOHandle is a file descriptor owner object.  It closes them when destroyed.
+	// Most of the functions here don't use it because these functions don't own FDs.
+	// All good names for such objects are taken.
 	class IOHandle {
 		IOHandle(const IOHandle &) = delete;
 		IOHandle(IOHandle &&) = delete;
@@ -43,7 +42,6 @@ namespace crucible {
 		int get_fd() const;
 	};

-	/// Copyable file descriptor.
 	class Fd {
 		static NamedPtr<IOHandle, int> s_named_ptr;
 		shared_ptr<IOHandle> m_handle;
@@ -63,29 +61,24 @@ namespace crucible {

 	// Functions named "foo_or_die" throw exceptions on failure.

-	/// Attempt to open the file with the given mode, throw exception on failure.
+	// Attempt to open the file with the given mode
 	int open_or_die(const string &file, int flags = O_RDONLY, mode_t mode = 0777);
-	/// Attempt to open the file with the given mode, throw exception on failure.
 	int openat_or_die(int dir_fd, const string &file, int flags = O_RDONLY, mode_t mode = 0777);

-	/// Decode open flags
+	// Decode open parameters
 	string o_flags_ntoa(int flags);
-	/// Decode open mode
 	string o_mode_ntoa(mode_t mode);

-	/// mmap with its one weird error case
+	// mmap with its one weird error case
 	void *mmap_or_die(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
-	/// Decode mmap prot
+	// Decode mmap parameters
 	string mmap_prot_ntoa(int prot);
-	/// Decode mmap flags
 	string mmap_flags_ntoa(int flags);

-	/// Rename, throw exception on failure.
+	// Unlink, rename
 	void rename_or_die(const string &from, const string &to);
-	/// Rename, throw exception on failure.
 	void renameat_or_die(int fromfd, const string &frompath, int tofd, const string &topath);

-	/// Truncate, throw exception on failure.
 	void ftruncate_or_die(int fd, off_t size);

 	// Read or write structs:
@@ -93,25 +86,19 @@ namespace crucible {
 	// Three-arg version of read_or_die/write_or_die throws an error on incomplete read/writes
 	// Four-arg version returns number of bytes read/written through reference arg

-	/// Attempt read by pointer and length, throw exception on IO error or short read.
 	void read_or_die(int fd, void *buf, size_t size);
-	/// Attempt read of a POD struct, throw exception on IO error or short read.
 	template <class T> void read_or_die(int fd, T& buf)
 	{
 		return read_or_die(fd, static_cast<void *>(&buf), sizeof(buf));
 	}

-	/// Attempt read by pointer and length, throw exception on IO error but not short read.
 	void read_partial_or_die(int fd, void *buf, size_t size_wanted, size_t &size_read);
-	/// Attempt read of a POD struct, throw exception on IO error but not short read.
 	template <class T> void read_partial_or_die(int fd, T& buf, size_t &size_read)
 	{
 		return read_partial_or_die(fd, static_cast<void *>(&buf), sizeof(buf), size_read);
 	}

-	/// Attempt read at position by pointer and length, throw exception on IO error but not short read.
 	void pread_or_die(int fd, void *buf, size_t size, off_t offset);
-	/// Attempt read at position of a POD struct, throw exception on IO error but not short read.
 	template <class T> void pread_or_die(int fd, T& buf, off_t offset)
 	{
 		return pread_or_die(fd, static_cast<void *>(&buf), sizeof(buf), offset);
@@ -138,23 +125,20 @@ namespace crucible {
 	// Specialization for strings which reads/writes the string content, not the struct string
 	template<> void write_or_die<string>(int fd, const string& str);
 	template<> void pread_or_die<string>(int fd, string& str, off_t offset);
+	template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset);
+	template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset);
 	template<> void pwrite_or_die<string>(int fd, const string& str, off_t offset);
-	template<> void pread_or_die<ByteVector>(int fd, ByteVector& str, off_t offset);
-	template<> void pwrite_or_die<ByteVector>(int fd, const ByteVector& str, off_t offset);
-	// Deprecated
-	template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset) = delete;
-	template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset) = delete;
-	template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset) = delete;
-	template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset) = delete;
+	template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset);
+	template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset);

-	/// Read a simple string.
+	// A different approach to reading a simple string
 	string read_string(int fd, size_t size);

-	/// A lot of Unix API wants you to initialize a struct and call
-	/// one function to fill it, another function to throw it away,
-	/// and has some unknown third thing you have to do when there's
-	/// an error.  That's also a C++ object with an exception-throwing
-	/// constructor.
+	// A lot of Unix API wants you to initialize a struct and call
+	// one function to fill it, another function to throw it away,
+	// and has some unknown third thing you have to do when there's
+	// an error.  That's also a C++ object with an exception-throwing
+	// constructor.
 	struct Stat : public stat {
 		Stat();
 		Stat(int f);
@@ -168,17 +152,17 @@ namespace crucible {

 	string st_mode_ntoa(mode_t mode);

-	/// Because it's not trivial to do correctly
+	// Because it's not trivial to do correctly
 	string readlink_or_die(const string &path);

-	/// Determine the name of a FD by readlink through /proc/self/fd/
+	// Determine the name of a FD by readlink through /proc/self/fd/
 	string name_fd(int fd);

-	/// Returns Fd objects because it does own them.
+	// Returns Fd objects because it does own them.
 	pair<Fd, Fd> socketpair_or_die(int domain = AF_UNIX, int type = SOCK_STREAM, int protocol = 0);

-	/// like unique_lock but for flock instead of mutexes...and not trying
-	/// to hide the many and subtle differences between those two things *at all*.
+	// like unique_lock but for flock instead of mutexes...and not trying
+	// to hide the many and subtle differences between those two things *at all*.
 	class Flock {
 		int	m_fd;
 		bool	m_locked;
@@ -199,7 +183,7 @@ namespace crucible {
 		int fd();
 	};

-	/// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr.
+	// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr.
 	void dup2_or_die(int fd_in, int fd_out);

 }
--- a/include/crucible/fs.h
+++ b/include/crucible/fs.h
@@ -1,9 +1,9 @@
 #ifndef CRUCIBLE_FS_H
 #define CRUCIBLE_FS_H

-#include "crucible/bytevector.h"
 #include "crucible/endian.h"
 #include "crucible/error.h"
+#include "crucible/spanner.h"

 // Terribly Linux-specific FS-wrangling functions

@@ -27,16 +27,18 @@ namespace crucible {
 	// wrapper around fallocate(...FALLOC_FL_PUNCH_HOLE...)
 	void punch_hole(int fd, off_t offset, off_t len);

-	struct BtrfsExtentSame {
+	struct BtrfsExtentInfo : public btrfs_ioctl_same_extent_info {
+		BtrfsExtentInfo(int dst_fd, off_t dst_offset);
+	};
+
+	struct BtrfsExtentSame : public btrfs_ioctl_same_args {
 		virtual ~BtrfsExtentSame();
 		BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length);
-		void add(int fd, uint64_t offset);
+		void add(int fd, off_t offset);
 		virtual void do_ioctl();

-		uint64_t m_logical_offset = 0;
-		uint64_t m_length = 0;
 		int m_fd;
-		vector<btrfs_ioctl_same_extent_info> m_info;
+		vector<BtrfsExtentInfo> m_info;
 	};

 	ostream & operator<<(ostream &os, const btrfs_ioctl_same_extent_info *info);
@@ -51,20 +53,20 @@ namespace crucible {

 	ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p);

-	struct BtrfsDataContainer {
+	struct BtrfsDataContainer : public btrfs_data_container {
 		BtrfsDataContainer(size_t size = 64 * 1024);
 		void *prepare(size_t size);

 		size_t get_size() const;
-		decltype(btrfs_data_container::bytes_left) get_bytes_left() const;
-		decltype(btrfs_data_container::bytes_missing) get_bytes_missing() const;
-		decltype(btrfs_data_container::elem_cnt) get_elem_cnt() const;
-		decltype(btrfs_data_container::elem_missed) get_elem_missed() const;
+		decltype(bytes_left) get_bytes_left() const;
+		decltype(bytes_missing) get_bytes_missing() const;
+		decltype(elem_cnt) get_elem_cnt() const;
+		decltype(elem_missed) get_elem_missed() const;

-		ByteVector m_data;
+		vector<uint8_t> m_data;
 	};

-	struct BtrfsIoctlLogicalInoArgs {
+	struct BtrfsIoctlLogicalInoArgs : public btrfs_ioctl_logical_ino_args {
 		BtrfsIoctlLogicalInoArgs(uint64_t logical, size_t buf_size = 16 * 1024 * 1024);

 		uint64_t get_flags() const;
@@ -73,6 +75,7 @@ namespace crucible {
 		virtual void do_ioctl(int fd);
 		virtual bool do_ioctl_nothrow(int fd);

+		size_t m_container_size;
 		struct BtrfsInodeOffsetRootSpan {
 			using iterator = BtrfsInodeOffsetRoot*;
 			using const_iterator = const BtrfsInodeOffsetRoot*;
@@ -83,17 +86,13 @@ namespace crucible {
 			const_iterator cend() const;
 			iterator data() const;
 			void clear();
+			operator vector<BtrfsInodeOffsetRoot>() const;
 		private:
 			iterator m_begin = nullptr;
 			iterator m_end = nullptr;
 		friend struct BtrfsIoctlLogicalInoArgs;
 		} m_iors;
-	private:
-		size_t m_container_size;
 		BtrfsDataContainer m_container;
-		uint64_t m_logical;
-		uint64_t m_flags = 0;
-	friend ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs *p);
 	};

 	ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs &p);
@@ -125,6 +124,15 @@ namespace crucible {

 	ostream & operator<<(ostream &os, const BtrfsIoctlDefragRangeArgs *p);

+	// in btrfs/ctree.h, but that's a nightmare to #include here
+	typedef enum {
+		BTRFS_COMPRESS_NONE  = 0,
+		BTRFS_COMPRESS_ZLIB  = 1,
+		BTRFS_COMPRESS_LZO   = 2,
+		BTRFS_COMPRESS_ZSTD  = 3,
+		BTRFS_COMPRESS_TYPES = 3
+	} btrfs_compression_type;
+
 	struct FiemapExtent : public fiemap_extent {
 		FiemapExtent();
 		FiemapExtent(const fiemap_extent &that);
@@ -133,26 +141,16 @@ namespace crucible {
 		off_t end() const;
 	};

-	struct Fiemap {
-
-		// because fiemap.h insists on giving FIEMAP_MAX_OFFSET
-		// a different type from the struct fiemap members
-		static const uint64_t s_fiemap_max_offset = FIEMAP_MAX_OFFSET;
+	struct Fiemap : public fiemap {

 		// Get entire file
-		Fiemap(uint64_t start = 0, uint64_t length = s_fiemap_max_offset);
+		Fiemap(uint64_t start = 0, uint64_t length = FIEMAP_MAX_OFFSET);

 		void do_ioctl(int fd);

 		vector<FiemapExtent> m_extents;
-		decltype(fiemap::fm_extent_count) m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
-		decltype(fiemap::fm_extent_count) m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
-		uint64_t m_start;
-		uint64_t m_length;
-		// FIEMAP is slow and full of lies.
-		// This makes FIEMAP even slower, but reduces the lies a little.
-		decltype(fiemap::fm_flags) m_flags = FIEMAP_FLAG_SYNC;
-	friend ostream &operator<<(ostream &, const Fiemap &);
+		uint64_t m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
+		uint64_t m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
 	};

 	ostream & operator<<(ostream &os, const fiemap_extent *info);
@@ -168,8 +166,8 @@ namespace crucible {

 	struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header {
 		BtrfsIoctlSearchHeader();
-		ByteVector m_data;
-		size_t set_data(const ByteVector &v, size_t offset);
+		Spanner<const uint8_t> m_data;
+		size_t set_data(const vector<uint8_t> &v, size_t offset);
 		bool operator<(const BtrfsIoctlSearchHeader &that) const;
 	};

@@ -183,18 +181,17 @@ namespace crucible {
 	ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr);

 	struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key {
-		BtrfsIoctlSearchKey(size_t buf_size = 1024);
-		bool do_ioctl_nothrow(int fd);
-		void do_ioctl(int fd);
+		BtrfsIoctlSearchKey(size_t buf_size = 4096);
+		virtual bool do_ioctl_nothrow(int fd);
+		virtual void do_ioctl(int fd);

 		// Copy objectid/type/offset so we move forward
 		void next_min(const BtrfsIoctlSearchHeader& ref);

-		// move forward to next object of a single type
-		void next_min(const BtrfsIoctlSearchHeader& ref, const uint8_t type);
-
 		size_t m_buf_size;
+		vector<uint8_t> m_ioctl_arg;
 		set<BtrfsIoctlSearchHeader> m_result;
+
 	};

 	ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
@@ -202,7 +199,6 @@ namespace crucible {

 	string btrfs_search_type_ntoa(unsigned type);
 	string btrfs_search_objectid_ntoa(uint64_t objectid);
-	string btrfs_compress_type_ntoa(uint8_t type);

 	uint64_t btrfs_get_root_id(int fd);
 	uint64_t btrfs_get_root_transid(int fd);
@@ -239,12 +235,11 @@ namespace crucible {

 	template<class V> ostream &hexdump(ostream &os, const V &v);

-	struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 {
+	struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v2 {
 		BtrfsIoctlFsInfoArgs();
 		void do_ioctl(int fd);
 		uint16_t csum_type() const;
 		uint16_t csum_size() const;
-		uint64_t generation() const;
 	};

 	ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
--- a/include/crucible/hexdump.h
+++ b/include/crucible/hexdump.h
@@ -1,36 +0,0 @@
-#ifndef CRUCIBLE_HEXDUMP_H
-#define CRUCIBLE_HEXDUMP_H
-
-#include "crucible/string.h"
-
-#include <ostream>
-
-namespace crucible {
-	using namespace std;
-
-	template <class V>
-	ostream &
-	hexdump(ostream &os, const V &v)
-	{
-		os << "V { size = " << v.size() << ", data:\n";
-		for (size_t i = 0; i < v.size(); i += 8) {
-			string hex, ascii;
-			for (size_t j = i; j < i + 8; ++j) {
-				if (j < v.size()) {
-					uint8_t c = v[j];
-					char buf[8];
-					sprintf(buf, "%02x ", c);
-					hex += buf;
-					ascii += (c < 32 || c > 126) ? '.' : c;
-				} else {
-					hex += "   ";
-					ascii += ' ';
-				}
-			}
-			os << astringprintf("\t%08x %s %s\n", i, hex.c_str(), ascii.c_str());
-		}
-		return os << "}";
-	}
-};
-
-#endif // CRUCIBLE_HEXDUMP_H
--- a/include/crucible/multilock.h
+++ b/include/crucible/multilock.h
@@ -1,40 +0,0 @@
-#ifndef CRUCIBLE_MULTILOCK_H
-#define CRUCIBLE_MULTILOCK_H
-
-#include <condition_variable>
-#include <map>
-#include <memory>
-#include <mutex>
-#include <string>
-
-namespace crucible {
-        using namespace std;
-
-	class MultiLocker {
-		mutex m_mutex;
-		condition_variable m_cv;
-		map<string, size_t> m_counters;
-
-		class LockHandle {
-			const string m_type;
-			MultiLocker &m_parent;
-			bool m_locked = false;
-			void set_locked(bool state);
-		public:
-			~LockHandle();
-			LockHandle(const string &type, MultiLocker &parent);
-		friend class MultiLocker;
-		};
-
-		friend class LockHandle;
-
-		bool is_lock_available(const string &type);
-		void put_lock(const string &type);
-		shared_ptr<LockHandle> get_lock_private(const string &type);
-	public:
-		static shared_ptr<LockHandle> get_lock(const string &type);
-	};
-
-}
-
-#endif // CRUCIBLE_MULTILOCK_H
--- a/include/crucible/namedptr.h
+++ b/include/crucible/namedptr.h
@@ -12,18 +12,13 @@
 namespace crucible {
 	using namespace std;

-	/// A thread-safe container for RAII of shared resources with unique names.
+	/// Storage for objects with unique names

 	template <class Return, class... Arguments>
 	class NamedPtr {
 	public:
-		/// The name in "NamedPtr"
 		using Key = tuple<Arguments...>;
-		/// A shared pointer to the named object with ownership
-		/// tracking that erases the object's stored name when
-		/// the last shared pointer is destroyed.
 		using Ptr = shared_ptr<Return>;
-		/// A function that translates a name into a shared pointer to an object.
 		using Func = function<Ptr(Arguments...)>;
 	private:
 		struct Value;
@@ -34,7 +29,6 @@ namespace crucible {
 			mutex		m_mutex;
 		};
 		using MapPtr = shared_ptr<MapRep>;
-		/// Container for Return pointers.  Destructor removes entry from map.
 		struct Value {
 			Ptr	m_ret_ptr;
 			MapPtr	m_map_rep;
@@ -56,21 +50,15 @@ namespace crucible {
 		void func(Func f);

 		Ptr operator()(Arguments... args);
-
 		Ptr insert(const Ptr &r, Arguments... args);
 	};

-	/// Construct NamedPtr map and define a function to turn a name into a pointer.
 	template <class Return, class... Arguments>
 	NamedPtr<Return, Arguments...>::NamedPtr(Func f) :
 		m_fn(f)
 	{
 	}

-	/// Construct a Value wrapper: the value to store, the argument key to store the value under,
-	/// and a pointer to the map.  Everything needed to remove the key from the map when the
-	/// last NamedPtr is deleted.  NamedPtr then releases its own pointer to the value, which
-	/// may or may not trigger deletion there.
 	template <class Return, class... Arguments>
 	NamedPtr<Return, Arguments...>::Value::Value(Ptr&& ret_ptr, const Key &key, const MapPtr &map_rep) :
 		m_ret_ptr(ret_ptr),
@@ -79,8 +67,6 @@ namespace crucible {
 	{
 	}

-	/// Destroy a Value wrapper: remove a dead Key from the map, then let the member destructors
-	/// do the rest.  The Key might be in the map and not dead, so leave it alone in that case.
 	template <class Return, class... Arguments>
 	NamedPtr<Return, Arguments...>::Value::~Value()
 	{
@@ -96,23 +82,21 @@ namespace crucible {
 		// "our" map entry if it exists and is expired.  The other
 		// thread would have done the same for us if the race had
 		// a different winner.
-		const auto found = m_map_rep->m_map.find(m_ret_key);
+		auto found = m_map_rep->m_map.find(m_ret_key);
 		if (found != m_map_rep->m_map.end() && found->second.expired()) {
 			m_map_rep->m_map.erase(found);
 		}
 	}

-	/// Find a Return by key and fetch a strong Return pointer.
-	/// Ignore Keys that have expired weak pointers.
 	template <class Return, class... Arguments>
 	typename NamedPtr<Return, Arguments...>::Ptr
 	NamedPtr<Return, Arguments...>::lookup_item(const Key &k)
 	{
 		// Must be called with lock held
-		const auto found = m_map_rep->m_map.find(k);
+		auto found = m_map_rep->m_map.find(k);
 		if (found != m_map_rep->m_map.end()) {
 			// Get the strong pointer back
-			const auto rv = found->second.lock();
+			auto rv = found->second.lock();
 			if (rv) {
 				// Have strong pointer.  Return value that shares map entry.
 				return shared_ptr<Return>(rv, rv->m_ret_ptr.get());
@@ -125,11 +109,6 @@ namespace crucible {
 		return Ptr();
 	}

-	/// Insert the Return value of calling Func(Arguments...).
-	/// If the value already exists in the map, return the existing value.
-	/// If another thread is already running Func(Arguments...) then this thread
-	/// will block until the other thread finishes inserting the Return in the
-	/// map, and both threads will return the same Return value.
 	template <class Return, class... Arguments>
 	typename NamedPtr<Return, Arguments...>::Ptr
 	NamedPtr<Return, Arguments...>::insert_item(Func fn, Arguments... args)
@@ -137,36 +116,34 @@ namespace crucible {
 		Key k(args...);

 		// Is it already in the map?
-		unique_lock<mutex> lock_lookup(m_map_rep->m_mutex);
+		unique_lock<mutex> lock(m_map_rep->m_mutex);
 		auto rv = lookup_item(k);
 		if (rv) {
 			return rv;
 		}

 		// Release map lock and acquire key lock
-		lock_lookup.unlock();
-		const auto key_lock = m_lockset.make_lock(k);
+		lock.unlock();
+		auto key_lock = m_lockset.make_lock(k);

 		// Did item appear in map while we were waiting for key?
-		lock_lookup.lock();
+		lock.lock();
 		rv = lookup_item(k);
 		if (rv) {
 			return rv;
 		}

 		// We now hold key and index locks, but item not in map (or expired).
-		// Release map lock so other threads can use the map
-		lock_lookup.unlock();
-
-		// Call the function and create a new Value outside of the map
-		const auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
+		// Release map lock
+		lock.unlock();

+		// Call the function and create a new Value
+		auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
 		// Function must return a non-null pointer
 		THROW_CHECK0(runtime_error, new_value_ptr->m_ret_ptr);

-		// Reacquire index lock for map insertion.  We still hold the key lock.
-		// Use a different lock object to make exceptions unlock in the right order
-		unique_lock<mutex> lock_insert(m_map_rep->m_mutex);
+		// Reacquire index lock for map insertion
+		lock.lock();

 		// Insert return value in map or overwrite existing
 		// empty or expired weak_ptr value.
@@ -181,16 +158,16 @@ namespace crucible {
 		// to find and fix.
 		assert(new_item_ref.expired());

-		// Update the map slot we are sure is empty
+		// Update the empty map slot
 		new_item_ref = new_value_ptr;

+		// Drop lock so we don't deadlock in constructor exceptions
+		lock.unlock();
+
 		// Return shared_ptr to Return using strong pointer's reference counter
 		return shared_ptr<Return>(new_value_ptr, new_value_ptr->m_ret_ptr.get());
-
-		// Release map lock, then key lock
 	}

-	/// (Re)define a function to turn a name into a pointer.
 	template <class Return, class... Arguments>
 	void
 	NamedPtr<Return, Arguments...>::func(Func func)
@@ -199,7 +176,6 @@ namespace crucible {
 		m_fn = func;
 	}

-	/// Convert a name into a pointer using the configured function.
 	template<class Return, class... Arguments>
 	typename NamedPtr<Return, Arguments...>::Ptr
 	NamedPtr<Return, Arguments...>::operator()(Arguments... args)
@@ -207,19 +183,14 @@ namespace crucible {
 		return insert_item(m_fn, args...);
 	}

-	/// Insert a pointer that has already been created under the
-	/// given name.  Useful for inserting a pointer to a derived
-	/// class when the name doesn't contain all of the information
-	/// required for the object, or when the Return is already known by
-	/// some cheaper method than calling the function.
 	template<class Return, class... Arguments>
 	typename NamedPtr<Return, Arguments...>::Ptr
 	NamedPtr<Return, Arguments...>::insert(const Ptr &r, Arguments... args)
 	{
 		THROW_CHECK0(invalid_argument, r);
-		return insert_item([&](Arguments...) { return r; }, args...);
+		return insert_item([&](Arguments...) -> Ptr { return r; }, args...);
 	}

 }

-#endif // CRUCIBLE_NAMEDPTR_H
+#endif // NAMEDPTR_H
--- a/include/crucible/ntoa.h
+++ b/include/crucible/ntoa.h
@@ -20,7 +20,7 @@ namespace crucible {
 #define NTOA_TABLE_ENTRY_BITS(x) { .n = (x), .mask = (x), .a = (#x) }

 // Enumerations (entire value matches all bits)
-#define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0ULL,  .a = (#x) }
+#define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0UL,  .a = (#x) }

 // End of table (sorry, C++ didn't get C99's compound literals, so we have to write out all the member names)
 #define NTOA_TABLE_ENTRY_END() { .n = 0, .mask = 0, .a = nullptr }
--- a/include/crucible/progress.h
+++ b/include/crucible/progress.h
@@ -20,8 +20,8 @@ namespace crucible {
 		using ProgressHolder = shared_ptr<ProgressHolderState>;

 		ProgressTracker(const value_type &v);
-		value_type begin() const;
-		value_type end() const;
+		value_type begin();
+		value_type end();

 		ProgressHolder hold(const value_type &v);

@@ -51,7 +51,7 @@ namespace crucible {

 	template <class T>
 	typename ProgressTracker<T>::value_type
-	ProgressTracker<T>::begin() const
+	ProgressTracker<T>::begin()
 	{
 		unique_lock<mutex> lock(m_state->m_mutex);
 		return m_state->m_begin;
@@ -59,7 +59,7 @@ namespace crucible {

 	template <class T>
 	typename ProgressTracker<T>::value_type
-	ProgressTracker<T>::end() const
+	ProgressTracker<T>::end()
 	{
 		unique_lock<mutex> lock(m_state->m_mutex);
 		return m_state->m_end;
--- a/include/crucible/seeker.h
+++ b/include/crucible/seeker.h
@@ -1,163 +0,0 @@
-#ifndef _CRUCIBLE_SEEKER_H_
-#define _CRUCIBLE_SEEKER_H_
-
-#include "crucible/error.h"
-
-#include <algorithm>
-#include <limits>
-
-#include <cstdint>
-
-#if 1
-#include <iostream>
-#include <sstream>
-#define DINIT(__x) __x
-#define DLOG(__x) do { logs << __x << std::endl; } while (false)
-#define DOUT(__err) do { __err << logs.str(); } while (false)
-#else
-#define DINIT(__x) do {} while (false)
-#define DLOG(__x) do {} while (false)
-#define DOUT(__x) do {} while (false)
-#endif
-
-namespace crucible {
-	using namespace std;
-
-	// Requirements for Container<Pos> Fetch(Pos lower, Pos upper):
-	// - fetches objects in Pos order, starting from lower (must be >= lower)
-	// - must return upper if present, may or may not return objects after that
-	// - returns a container of Pos objects with begin(), end(), rbegin(), rend()
-	// - container must iterate over objects in Pos order
-	// - uniqueness of Pos objects not required
-	// - should store the underlying data as a side effect
-	//
-	// Requirements for Pos:
-	// - should behave like an unsigned integer type
-	// - must have specializations in numeric_limits<T> for digits, max(), min()
-	// - must support +, -, -=, and related operators
-	// - must support <, <=, ==, and related operators
-	// - must support Pos / 2 (only)
-	//
-	// Requirements for seek_backward:
-	// - calls Fetch to search Pos space near target_pos
-	// - if no key exists with value <= target_pos, returns the minimum Pos value
-	// - returns the highest key value <= target_pos
-	// - returned key value may not be part of most recent Fetch result
-	// - 1 loop iteration when target_pos exists
-
-	template <class Fetch, class Pos = uint64_t>
-	Pos
-	seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max())
-	{
-		DINIT(ostringstream logs);
-		try {
-			static const Pos end_pos = numeric_limits<Pos>::max();
-			// TBH this probably won't work if begin_pos != 0, i.e. any signed type
-			static const Pos begin_pos = numeric_limits<Pos>::min();
-			// Run a binary search looking for the highest key below target_pos.
-			// Initial upper bound of the search is target_pos.
-			// Find initial lower bound by doubling the size of the range until a key below target_pos
-			// is found, or the lower bound reaches the beginning of the search space.
-			// If the lower bound search reaches the beginning of the search space without finding a key,
-			// return the beginning of the search space; otherwise, perform a binary search between
-			// the bounds now established.
-			Pos lower_bound = 0;
-			Pos upper_bound = target_pos;
-			bool found_low = false;
-			Pos probe_pos = target_pos;
-			// We need one loop for each bit of the search space to find the lower bound,
-			// one loop for each bit of the search space to find the upper bound,
-			// and one extra loop to confirm the boundary is correct.
-			for (size_t loop_count = min(numeric_limits<Pos>::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) {
-				DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")");
-				auto result = fetch(probe_pos, target_pos);
-				const Pos low_pos = result.empty() ? end_pos : *result.begin();
-				const Pos high_pos = result.empty() ? end_pos : *result.rbegin();
-				DLOG(" = " << low_pos << ".." << high_pos);
-				// check for correct behavior of the fetch function
-				THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos);
-				THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos);
-				THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos);
-				if (!found_low) {
-					// if target_pos == end_pos then we will find it in every empty result set,
-					// so in that case we force the lower bound to be lower than end_pos
-					if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) {
-						// found a lower bound, set the low bound there and switch to binary search
-						found_low = true;
-						lower_bound = low_pos;
-						DLOG("found_low = true, lower_bound = " << lower_bound);
-					} else {
-						// still looking for lower bound
-						// if probe_pos was begin_pos then we can stop with no result
-						if (probe_pos == begin_pos) {
-							DLOG("return: probe_pos == begin_pos " << begin_pos);
-							return begin_pos;
-						}
-						// double the range size, or use the distance between objects found so far
-						THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
-						// already checked low_pos <= high_pos above
-						const Pos want_delta = max(upper_bound - probe_pos, min_step);
-						// avoid underflowing the beginning of the search space
-						const Pos have_delta = min(want_delta, probe_pos - begin_pos);
-						THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta);
-						// move probe and try again
-						probe_pos = probe_pos - have_delta;
-						DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")");
-						continue;
-					}
-				}
-				if (low_pos <= target_pos && target_pos <= high_pos) {
-					// have keys on either side of target_pos in result
-					// search from the high end until we find the highest key below target
-					for (auto i = result.rbegin(); i != result.rend(); ++i) {
-						// more correctness checking for fetch
-						THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i);
-						if (*i <= target_pos) {
-							DLOG("return: *i " << *i << " <= target_pos " << target_pos);
-							return *i;
-						}
-					}
-					// if the list is empty then low_pos = high_pos = end_pos
-					// if target_pos = end_pos also, then we will execute the loop
-					// above but not find any matching entries.
-					THROW_CHECK0(runtime_error, result.empty());
-				}
-				if (target_pos <= low_pos) {
-					// results are all too high, so probe_pos..low_pos is too high
-					// lower the high bound to the probe pos
-					upper_bound = probe_pos;
-					DLOG("upper_bound = probe_pos " << probe_pos);
-				}
-				if (high_pos < target_pos) {
-					// results are all too low, so probe_pos..high_pos is too low
-					// raise the low bound to the high_pos
-					DLOG("lower_bound = high_pos " << high_pos);
-					lower_bound = high_pos;
-				}
-				// compute a new probe pos at the middle of the range and try again
-				// we can't have a zero-size range here because we would not have set found_low yet
-				THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound);
-				const Pos delta = (upper_bound - lower_bound) / 2;
-				probe_pos = lower_bound + delta;
-				if (delta < 1) {
-					// nothing can exist in the range (lower_bound, upper_bound)
-					// and an object is known to exist at lower_bound
-					DLOG("return: probe_pos == lower_bound " << lower_bound);
-					return lower_bound;
-				}
-				THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos);
-				THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
-				DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound);
-			}
-			THROW_ERROR(runtime_error, "FIXME: should not reach this line: "
-				"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", "
-				"found_low " << found_low);
-		} catch (...) {
-			DOUT(cerr);
-			throw;
-		}
-	}
-}
-
-#endif // _CRUCIBLE_SEEKER_H_
-
--- a/include/crucible/spanner.h
+++ b/include/crucible/spanner.h
@@ -0,0 +1,167 @@
+#ifndef CRUCIBLE_SPANNER_H
+#define CRUCIBLE_SPANNER_H
+
+#include "crucible/error.h"
+
+#include <memory>
+
+namespace crucible {
+
+	using namespace std;
+
+	// C++20 is already using the name "span" for something similar.
+	template <class T, class Head = T*, class Iter = Head>
+	class Spanner {
+	public:
+		using iterator = Iter;
+		using head_pointer = Head;
+		using value_type = T;
+
+		template <class Container>
+		Spanner(Container& container);
+
+		Spanner(head_pointer begin, iterator end);
+		Spanner(size_t size, head_pointer begin);
+		Spanner() = default;
+		Spanner &operator=(const Spanner &that) = default;
+		iterator begin() const;
+		iterator end() const;
+		value_type *data() const;
+		value_type &at(size_t n) const;
+		size_t size() const;
+		bool empty() const;
+		void clear();
+		value_type &operator[](size_t n) const;
+		iterator erase(iterator first, iterator last);
+		iterator erase(iterator first);
+	private:
+		head_pointer	m_begin;
+		size_t		m_size;
+	};
+
+	template <class Container, class Head = typename Container::value_type *, class Iter = Head>
+	Spanner<typename Container::value_type, Head, Iter> make_spanner(Container &container)
+	{
+		return Spanner<typename Container::value_type, Head, Iter>(container);
+	}
+
+	// This template is an attempt to turn a shared_ptr to a container
+	// into a range view that can be cheaply passed around.
+	// It probably doesn't quite work in the general case.
+	template <class Container, class Head = shared_ptr<typename Container::value_type>, class Iter = typename Container::value_type *>
+	Spanner<typename Container::value_type, Head, Iter> make_spanner(shared_ptr<Container> &cont_ptr)
+	{
+		shared_ptr<typename Container::value_type> head(cont_ptr, cont_ptr->data());
+		size_t const size = cont_ptr->size();
+		return Spanner<typename Container::value_type, Head, Iter>(size, head);
+	}
+
+	template <class T, class Head, class Iter>
+	template <class Container>
+	Spanner<T, Head, Iter>::Spanner(Container &container) :
+		m_begin(container.data()),
+		m_size(container.size())
+	{
+	}
+
+	template <class T, class Head, class Iter>
+	Spanner<T, Head, Iter>::Spanner(head_pointer begin, iterator end) :
+		m_begin(begin),
+		m_size(end - begin)
+	{
+	}
+
+	template <class T, class Head, class Iter>
+	Spanner<T, Head, Iter>::Spanner(size_t size, head_pointer begin) :
+		m_begin(begin),
+		m_size(size)
+	{
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::iterator
+	Spanner<T, Head, Iter>::erase(iterator first, iterator last)
+	{
+		auto end = m_begin + m_size;
+		if (first == m_begin) {
+			THROW_CHECK0(invalid_argument, last <= end);
+			m_begin = last;
+			return last;
+		}
+		if (last == end) {
+			THROW_CHECK0(invalid_argument, m_begin <= first);
+			m_size = first - m_begin;
+			return first;
+		}
+		THROW_ERROR(invalid_argument, "first != begin() and last != end()");
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::iterator
+	Spanner<T, Head, Iter>::erase(iterator first)
+	{
+		return erase(first, first + 1);
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::value_type &
+	Spanner<T, Head, Iter>::operator[](size_t n) const
+	{
+		return at(n);
+	}
+
+	template <class T, class Head, class Iter>
+	void
+	Spanner<T, Head, Iter>::clear()
+	{
+		m_begin = head_pointer();
+		m_size = 0;
+	}
+
+	template <class T, class Head, class Iter>
+	bool
+	Spanner<T, Head, Iter>::empty() const
+	{
+		return m_size == 0;
+	}
+
+	template <class T, class Head, class Iter>
+	size_t
+	Spanner<T, Head, Iter>::size() const
+	{
+		return m_size;
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::value_type *
+	Spanner<T, Head, Iter>::data() const
+	{
+		return &(*m_begin);
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::iterator
+	Spanner<T, Head, Iter>::begin() const
+	{
+		return data();
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::iterator
+	Spanner<T, Head, Iter>::end() const
+	{
+		return data() + m_size;
+	}
+
+	template <class T, class Head, class Iter>
+	typename Spanner<T, Head, Iter>::value_type &
+	Spanner<T, Head, Iter>::at(size_t n) const
+	{
+		THROW_CHECK2(out_of_range, n, size(), n < size());
+		return *(data() + n);
+	}
+
+}
+
+
+#endif // CRUCIBLE_SPANNER_H
--- a/include/crucible/string.h
+++ b/include/crucible/string.h
@@ -11,6 +11,23 @@
 namespace crucible {
 	using namespace std;

+	// Zero-initialize a base class object (usually a C struct)
+	template <class Base>
+	void
+	memset_zero(Base *that)
+	{
+		memset(that, 0, sizeof(Base));
+	}
+
+	// Copy a base class object (usually a C struct) into a vector<uint8_t>
+	template <class Base>
+	vector<uint8_t>
+	vector_copy_struct(Base *that)
+	{
+		const uint8_t *begin_that = reinterpret_cast<const uint8_t *>(static_cast<const Base *>(that));
+		return vector<uint8_t>(begin_that, begin_that + sizeof(Base));
+	}
+
 	// int->hex conversion with sprintf
 	string to_hex(uint64_t i);

--- a/include/crucible/task.h
+++ b/include/crucible/task.h
@@ -3,7 +3,6 @@

 #include <functional>
 #include <memory>
-#include <mutex>
 #include <ostream>
 #include <string>

@@ -93,92 +92,92 @@ namespace crucible {
 		/// Gets the current number of active workers
 		static size_t get_thread_count();

-		/// Gets the current load tracking statistics
-		struct LoadStats {
-			/// Current load extracted from last two 5-second load average samples
-			double current_load;
-			/// Target thread count computed from previous thread count and current load
-			double thread_target;
-			/// Load average for last 60 seconds
-			double loadavg;
-		};
-		static LoadStats get_current_load();
-
 		/// Drop the current queue and discard new Tasks without
 		/// running them.  Currently executing tasks are not
 		/// affected (use set_thread_count(0) to wait for those
 		/// to complete).
 		static void cancel();
-
-		/// Stop running any new Tasks.  All existing
-		/// Consumer threads will exit.  Does not affect queue.
-		/// Does not wait for threads to exit.  Reversible.
-		static void pause(bool paused = true);
 	};

+	// Barrier executes waiting Tasks once the last BarrierLock
+	// is released.  Multiple unique Tasks may be scheduled while
+	// BarrierLocks exist and all will be run() at once upon
+	// release.  If no BarrierLocks exist, Tasks are executed
+	// immediately upon insertion.
+
 	class BarrierState;

-	/// Barrier delays the execution of one or more Tasks.
-	/// The Tasks are executed when the last shared reference to the
-	/// BarrierState is released.  Copies of Barrier objects refer
-	/// to the same Barrier state.
-	class Barrier {
+	class BarrierLock {
 		shared_ptr<BarrierState> m_barrier_state;
-
+		BarrierLock(shared_ptr<BarrierState> pbs);
+	friend class Barrier;
 	public:
-		Barrier();
-
-		/// Schedule a task for execution when last Barrier is released.
-		void insert_task(Task t);
-
-		/// Release this reference to the barrier state.
-		/// Last released reference executes the task.
-		/// Barrier can only be released once, after which the
-		/// object can no longer be used.
+		// Release this Lock immediately and permanently
 		void release();
 	};

+	class Barrier {
+		shared_ptr<BarrierState> m_barrier_state;
+
+		Barrier(shared_ptr<BarrierState> pbs);
+	public:
+		Barrier();
+
+		// Prevent execution of tasks behind barrier until
+		// BarrierLock destructor or release() method is called.
+		BarrierLock lock();
+
+		// Schedule a task for execution when no Locks exist
+		void insert_task(Task t);
+	};
+
+	// Exclusion provides exclusive access to a ExclusionLock.
+	// One Task will be able to obtain the ExclusionLock; other Tasks
+	// may schedule themselves for re-execution after the ExclusionLock
+	// is released.
+
+	class ExclusionState;
+	class Exclusion;
+
 	class ExclusionLock {
-		shared_ptr<Task> m_owner;
-		ExclusionLock(shared_ptr<Task> owner);
+		shared_ptr<ExclusionState> m_exclusion_state;
+		ExclusionLock(shared_ptr<ExclusionState> pes);
+		ExclusionLock() = default;
 	friend class Exclusion;
 	public:
-		/// Explicit default constructor because we have other kinds
-		ExclusionLock() = default;
+		// Calls release()
+		~ExclusionLock();

-		/// Release this Lock immediately and permanently
+		// Release this Lock immediately and permanently
 		void release();

-		/// Test for locked state
+		// Test for locked state
 		operator bool() const;
 	};

 	class Exclusion {
-		mutex m_mutex;
-		weak_ptr<Task> m_owner;
+		shared_ptr<ExclusionState> m_exclusion_state;

+		Exclusion(shared_ptr<ExclusionState> pes);
 	public:
-		/// Attempt to obtain a Lock.  If successful, current Task
-		/// owns the Lock until the ExclusionLock is released
-		/// (it is the ExclusionLock that owns the lock, so it can
-		/// be passed to other Tasks or threads, but this is not
-		/// recommended practice).
-		/// If not successful, current Task is appended to the
-		/// task that currently holds the lock.  Current task is
-		/// expected to release any other ExclusionLock
-		/// objects it holds, and exit its Task function.
-		ExclusionLock try_lock(const Task &task);
+		Exclusion(const string &title);

-		/// Execute Task when Exclusion is unlocked (possibly
-		/// immediately).
-		void insert_task(const Task &t);
+		// Attempt to obtain a Lock.  If successful, current Task
+		// owns the Lock until the ExclusionLock is released
+		// (it is the ExclusionLock that owns the lock, so it can
+		// be passed to other Tasks or threads, but this is not
+		// recommended practice).
+		// If not successful, current Task is expected to call
+		// insert_task(current_task()), release any ExclusionLock
+		// objects it holds, and exit its Task function.
+		ExclusionLock try_lock();
+
+		// Execute Task when Exclusion is unlocked (possibly
+		// immediately).
+		void insert_task(Task t = Task::current_task());
 	};

-	/// Wrapper around pthread_setname_np which handles length limits
-	void pthread_setname(const string &name);

-	/// Wrapper around pthread_getname_np for symmetry
-	string pthread_getname();
 }

 #endif // CRUCIBLE_TASK_H
--- a/include/crucible/uname.h
+++ b/include/crucible/uname.h
@@ -1,14 +0,0 @@
-#ifndef CRUCIBLE_UNAME_H
-#define CRUCIBLE_UNAME_H
-
-#include <sys/utsname.h>
-
-namespace crucible {
-	using namespace std;
-
-	struct Uname : public utsname {
-		Uname();
-	};
-}
-
-#endif
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,9 +1,9 @@
+TAG ?= $(shell git describe --always --dirty || echo UNKNOWN)
+
 default: libcrucible.a
 %.a: Makefile

 CRUCIBLE_OBJS = \
-	bytevector.o \
-	btrfs-tree.o \
 	chatter.o \
 	city.o \
 	cleanup.o \
@@ -12,14 +12,12 @@ CRUCIBLE_OBJS = \
 	extentwalker.o \
 	fd.o \
 	fs.o \
-	multilock.o \
 	ntoa.o \
 	path.o \
 	process.o \
 	string.o \
 	task.o \
 	time.o \
-	uname.o \

 include ../makeflags
 -include ../localconf
@@ -30,13 +28,24 @@ BEES_LDFLAGS = $(LDFLAGS)
 configure.h: configure.h.in
 	$(TEMPLATE_COMPILER)

-%.dep: %.cc configure.h Makefile
+.depends:
+	mkdir -p $@
+
+.depends/%.dep: %.cc configure.h Makefile | .depends
 	$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<

-include $(CRUCIBLE_OBJS:%.o=%.dep)
+depends.mk: $(CRUCIBLE_OBJS:%.o=.depends/%.dep)
+	cat $^ > $@.new
+	mv -f $@.new $@
+
+.version.cc: configure.h Makefile ../makeflags $(CRUCIBLE_OBJS:.o=.cc) ../include/crucible/*.h
+	echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > $@.new
+	if ! cmp "$@.new" "$@"; then mv -fv $@.new $@; fi
+
+include depends.mk

 %.o: %.cc ../makeflags
 	$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<

-libcrucible.a: $(CRUCIBLE_OBJS)
+libcrucible.a: $(CRUCIBLE_OBJS) .version.o
 	$(AR) rcs $@ $^
--- a/lib/btrfs-tree.cc
+++ b/lib/btrfs-tree.cc
@@ -1,684 +0,0 @@
-#include "crucible/btrfs-tree.h"
-#include "crucible/btrfs.h"
-#include "crucible/error.h"
-#include "crucible/fs.h"
-#include "crucible/hexdump.h"
-#include "crucible/seeker.h"
-
-namespace crucible {
-	using namespace std;
-
-	uint64_t
-	BtrfsTreeItem::extent_begin() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
-		return m_objectid;
-	}
-
-	uint64_t
-	BtrfsTreeItem::extent_end() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
-		return m_objectid + m_offset;
-	}
-
-	uint64_t
-	BtrfsTreeItem::extent_generation() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
-		return btrfs_get_member(&btrfs_extent_item::generation, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::root_ref_dirid() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
-		return btrfs_get_member(&btrfs_root_ref::dirid, m_data);
-	}
-
-	string
-	BtrfsTreeItem::root_ref_name() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
-		const auto name_len = btrfs_get_member(&btrfs_root_ref::name_len, m_data);
-		const auto name_start = sizeof(struct btrfs_root_ref);
-		const auto name_end = name_len + name_start;
-		THROW_CHECK2(runtime_error, m_data.size(), name_end, m_data.size() >= name_end);
-		return string(m_data.data() + name_start, m_data.data() + name_end);
-	}
-
-	uint64_t
-	BtrfsTreeItem::root_ref_parent_rootid() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
-		return offset();
-	}
-
-	uint64_t
-	BtrfsTreeItem::root_flags() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_ITEM_KEY);
-		return btrfs_get_member(&btrfs_root_item::flags, m_data);
-	}
-
-	ostream &
-	operator<<(ostream &os, const BtrfsTreeItem &bti)
-	{
-		os << "BtrfsTreeItem {"
-			<< " objectid = " << to_hex(bti.objectid())
-			<< ", type = " << btrfs_search_type_ntoa(bti.type())
-			<< ", offset = " << to_hex(bti.offset())
-			<< ", transid = " << bti.transid()
-			<< ", data = ";
-		hexdump(os, bti.data());
-		return os;
-	}
-
-	uint64_t
-	BtrfsTreeItem::block_group_flags() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_BLOCK_GROUP_ITEM_KEY);
-		return btrfs_get_member(&btrfs_block_group_item::flags, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::block_group_used() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_BLOCK_GROUP_ITEM_KEY);
-		return btrfs_get_member(&btrfs_block_group_item::used, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::chunk_length() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_CHUNK_ITEM_KEY);
-		return btrfs_get_member(&btrfs_chunk::length, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::chunk_type() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_CHUNK_ITEM_KEY);
-		return btrfs_get_member(&btrfs_chunk::type, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::dev_extent_chunk_offset() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_EXTENT_KEY);
-		return btrfs_get_member(&btrfs_dev_extent::chunk_offset, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::dev_extent_length() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_EXTENT_KEY);
-		return btrfs_get_member(&btrfs_dev_extent::length, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::dev_item_total_bytes() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_ITEM_KEY);
-		return btrfs_get_member(&btrfs_dev_item::total_bytes, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::dev_item_bytes_used() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_ITEM_KEY);
-		return btrfs_get_member(&btrfs_dev_item::bytes_used, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::inode_size() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_INODE_ITEM_KEY);
-		return btrfs_get_member(&btrfs_inode_item::size, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::file_extent_logical_bytes() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		const auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
-		switch (file_extent_item_type) {
-			case BTRFS_FILE_EXTENT_INLINE:
-				return btrfs_get_member(&btrfs_file_extent_item::ram_bytes, m_data);
-			case BTRFS_FILE_EXTENT_PREALLOC:
-			case BTRFS_FILE_EXTENT_REG:
-				return btrfs_get_member(&btrfs_file_extent_item::num_bytes, m_data);
-			default:
-				THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type);
-		}
-	}
-
-	uint64_t
-	BtrfsTreeItem::file_extent_offset() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		const auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
-		switch (file_extent_item_type) {
-			case BTRFS_FILE_EXTENT_INLINE:
-				THROW_ERROR(invalid_argument, "extent is inline " << *this);
-			case BTRFS_FILE_EXTENT_PREALLOC:
-			case BTRFS_FILE_EXTENT_REG:
-				return btrfs_get_member(&btrfs_file_extent_item::offset, m_data);
-			default:
-				THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type << " in " << *this);
-		}
-	}
-
-	uint64_t
-	BtrfsTreeItem::file_extent_generation() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		return btrfs_get_member(&btrfs_file_extent_item::generation, m_data);
-	}
-
-	uint64_t
-	BtrfsTreeItem::file_extent_bytenr() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
-		switch (file_extent_item_type) {
-			case BTRFS_FILE_EXTENT_INLINE:
-				THROW_ERROR(invalid_argument, "extent is inline " << *this);
-			case BTRFS_FILE_EXTENT_PREALLOC:
-			case BTRFS_FILE_EXTENT_REG:
-				return btrfs_get_member(&btrfs_file_extent_item::disk_bytenr, m_data);
-			default:
-				THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type << " in " << *this);
-		}
-	}
-
-	uint8_t
-	BtrfsTreeItem::file_extent_type() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		return btrfs_get_member(&btrfs_file_extent_item::type, m_data);
-	}
-
-	btrfs_compression_type
-	BtrfsTreeItem::file_extent_compression() const
-	{
-		THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
-		return static_cast<btrfs_compression_type>(btrfs_get_member(&btrfs_file_extent_item::compression, m_data));
-	}
-
-	BtrfsTreeItem::BtrfsTreeItem(const BtrfsIoctlSearchHeader &bish) :
-		m_objectid(bish.objectid),
-		m_offset(bish.offset),
-		m_transid(bish.transid),
-		m_data(bish.m_data),
-		m_type(bish.type)
-	{
-	}
-
-	BtrfsTreeItem &
-	BtrfsTreeItem::operator=(const BtrfsIoctlSearchHeader &bish)
-	{
-		m_objectid = bish.objectid;
-		m_offset = bish.offset;
-		m_transid = bish.transid;
-		m_data = bish.m_data;
-		m_type = bish.type;
-		return *this;
-	}
-
-	bool
-	BtrfsTreeItem::operator!() const
-	{
-		return m_transid == 0 && m_objectid == 0 && m_offset == 0 && m_type == 0;
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::block_size() const
-	{
-		return m_block_size;
-	}
-
-	BtrfsTreeFetcher::BtrfsTreeFetcher(Fd new_fd) :
-		m_fd(new_fd)
-	{
-		BtrfsIoctlFsInfoArgs bifia;
-		bifia.do_ioctl(fd());
-		m_block_size = bifia.sectorsize;
-		THROW_CHECK1(runtime_error, m_block_size, m_block_size > 0);
-		// We don't believe sector sizes that aren't multiples of 4K
-		THROW_CHECK1(runtime_error, m_block_size, (m_block_size % 4096) == 0);
-		m_lookbehind_size = 128 * 1024;
-		m_scale_size = m_block_size;
-	}
-
-	Fd
-	BtrfsTreeFetcher::fd() const
-	{
-		return m_fd;
-	}
-
-	void
-	BtrfsTreeFetcher::fd(Fd fd)
-	{
-		m_fd = fd;
-	}
-
-	void
-	BtrfsTreeFetcher::type(uint8_t type)
-	{
-		m_type = type;
-	}
-
-	void
-	BtrfsTreeFetcher::tree(uint64_t tree)
-	{
-		m_tree = tree;
-	}
-
-	void
-	BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
-	{
-		m_min_transid = min_transid;
-		m_max_transid = max_transid;
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::lookbehind_size() const
-	{
-		return m_lookbehind_size;
-	}
-
-	void
-	BtrfsTreeFetcher::lookbehind_size(uint64_t lookbehind_size)
-	{
-		m_lookbehind_size = lookbehind_size;
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::scale_size() const
-	{
-		return m_scale_size;
-	}
-
-	void
-	BtrfsTreeFetcher::scale_size(uint64_t scale_size)
-	{
-		m_scale_size = scale_size;
-	}
-
-	void
-	BtrfsTreeFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t object)
-	{
-		(void)object;
-		// btrfs allows tree ID 0 meaning the current tree, but we do not.
-		THROW_CHECK0(invalid_argument, m_tree != 0);
-		sk.tree_id = m_tree;
-		sk.min_type = m_type;
-		sk.max_type = m_type;
-		sk.min_transid = m_min_transid;
-		sk.max_transid = m_max_transid;
-		sk.nr_items = 1;
-	}
-
-	void
-	BtrfsTreeFetcher::next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr)
-	{
-		key.next_min(hdr, m_type);
-	}
-
-	BtrfsTreeItem
-	BtrfsTreeFetcher::at(uint64_t logical)
-	{
-		BtrfsIoctlSearchKey &sk = m_sk;
-		fill_sk(sk, logical);
-		// Exact match, should return 0 or 1 items
-		sk.max_type = sk.min_type;
-		sk.nr_items = 1;
-		sk.do_ioctl(fd());
-		THROW_CHECK1(runtime_error, sk.m_result.size(), sk.m_result.size() < 2);
-		for (const auto &i : sk.m_result) {
-			if (hdr_logical(i) == logical && hdr_match(i)) {
-				return i;
-			}
-		}
-		return BtrfsTreeItem();
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::scale_logical(const uint64_t logical) const
-	{
-		THROW_CHECK1(invalid_argument, logical, (logical % m_scale_size) == 0 || logical == s_max_logical);
-		return logical / m_scale_size;
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::scaled_max_logical() const
-	{
-		return scale_logical(s_max_logical);
-	}
-
-	uint64_t
-	BtrfsTreeFetcher::unscale_logical(const uint64_t logical) const
-	{
-		THROW_CHECK1(invalid_argument, logical, logical <= scaled_max_logical());
-		if (logical == scaled_max_logical()) {
-			return s_max_logical;
-		}
-		return logical * scale_size();
-	}
-
-	BtrfsTreeItem
-	BtrfsTreeFetcher::rlower_bound(uint64_t logical)
-	{
-	#if 0
-	#define BTFRLB_DEBUG(x) do { cerr << x; } while (false)
-	#else
-	#define BTFRLB_DEBUG(x) do { } while (false)
-	#endif
-		BtrfsTreeItem closest_item;
-		uint64_t closest_logical = 0;
-		BtrfsIoctlSearchKey &sk = m_sk;
-		size_t loops = 0;
-		BTFRLB_DEBUG("rlower_bound: " << to_hex(logical) << endl);
-		seek_backward(scale_logical(logical), [&](uint64_t lower_bound, uint64_t upper_bound) {
-			++loops;
-			fill_sk(sk, unscale_logical(min(scaled_max_logical(), lower_bound)));
-			set<uint64_t> rv;
-			do {
-				sk.nr_items = 4;
-				sk.do_ioctl(fd());
-				BTFRLB_DEBUG("fetch: loop " << loops << " lower_bound..upper_bound " << to_hex(lower_bound) << ".." << to_hex(upper_bound));
-				for (auto &i : sk.m_result) {
-					next_sk(sk, i);
-					const auto this_logical = hdr_logical(i);
-					const auto scaled_hdr_logical = scale_logical(this_logical);
-					BTFRLB_DEBUG(" " << to_hex(scaled_hdr_logical));
-					if (hdr_match(i)) {
-						if (this_logical <= logical && this_logical > closest_logical) {
-							closest_logical = this_logical;
-							closest_item = i;
-						}
-						BTFRLB_DEBUG("(match)");
-						rv.insert(scaled_hdr_logical);
-					}
-					if (scaled_hdr_logical > upper_bound || hdr_stop(i)) {
-						if (scaled_hdr_logical >= upper_bound) {
-							BTFRLB_DEBUG("(" << to_hex(scaled_hdr_logical) << " >= " << to_hex(upper_bound) << ")");
-						}
-						if (hdr_stop(i)) {
-							rv.insert(numeric_limits<uint64_t>::max());
-							BTFRLB_DEBUG("(stop)");
-						}
-						break;
-					} else {
-						BTFRLB_DEBUG("(cont'd)");
-					}
-				}
-				BTFRLB_DEBUG(endl);
-				// We might get a search result that contains only non-matching items.
-				// Keep looping until we find any matching item or we run out of tree.
-			} while (rv.empty() && !sk.m_result.empty());
-			return rv;
-		}, scale_logical(lookbehind_size()));
-		return closest_item;
-	#undef BTFRLB_DEBUG
-	}
-
-	BtrfsTreeItem
-	BtrfsTreeFetcher::lower_bound(uint64_t logical)
-	{
-		BtrfsIoctlSearchKey &sk = m_sk;
-		fill_sk(sk, logical);
-		do {
-			assert(sk.max_offset == s_max_logical);
-			sk.do_ioctl(fd());
-			for (const auto &i : sk.m_result) {
-				if (hdr_match(i)) {
-					return i;
-				}
-				if (hdr_stop(i)) {
-					return BtrfsTreeItem();
-				}
-				next_sk(sk, i);
-			}
-		} while (!sk.m_result.empty());
-		return BtrfsTreeItem();
-	}
-
-	BtrfsTreeItem
-	BtrfsTreeFetcher::next(uint64_t logical)
-	{
-		const auto scaled_logical = scale_logical(logical);
-		if (scaled_logical + 1 > scaled_max_logical()) {
-			return BtrfsTreeItem();
-		}
-		return lower_bound(unscale_logical(scaled_logical + 1));
-	}
-
-	BtrfsTreeItem
-	BtrfsTreeFetcher::prev(uint64_t logical)
-	{
-		const auto scaled_logical = scale_logical(logical);
-		if (scaled_logical < 1) {
-			return BtrfsTreeItem();
-		}
-		return rlower_bound(unscale_logical(scaled_logical - 1));
-	}
-
-	void
-	BtrfsTreeObjectFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t object)
-	{
-		BtrfsTreeFetcher::fill_sk(sk, object);
-		sk.min_offset = 0;
-		sk.max_offset = numeric_limits<decltype(sk.max_offset)>::max();
-		sk.min_objectid = object;
-		sk.max_objectid = numeric_limits<decltype(sk.max_objectid)>::max();
-	}
-
-	uint64_t
-	BtrfsTreeObjectFetcher::hdr_logical(const BtrfsIoctlSearchHeader &hdr)
-	{
-		return hdr.objectid;
-	}
-
-	bool
-	BtrfsTreeObjectFetcher::hdr_match(const BtrfsIoctlSearchHeader &hdr)
-	{
-		// If you're calling this method without overriding it, you should have set type first
-		assert(m_type);
-		return hdr.type == m_type;
-	}
-
-	bool
-	BtrfsTreeObjectFetcher::hdr_stop(const BtrfsIoctlSearchHeader &hdr)
-	{
-		return false;
-		(void)hdr;
-	}
-
-	uint64_t
-	BtrfsTreeOffsetFetcher::hdr_logical(const BtrfsIoctlSearchHeader &hdr)
-	{
-		return hdr.offset;
-	}
-
-	bool
-	BtrfsTreeOffsetFetcher::hdr_match(const BtrfsIoctlSearchHeader &hdr)
-	{
-		assert(m_type);
-		return hdr.type == m_type && hdr.objectid == m_objectid;
-	}
-
-	bool
-	BtrfsTreeOffsetFetcher::hdr_stop(const BtrfsIoctlSearchHeader &hdr)
-	{
-		assert(m_type);
-		return hdr.objectid > m_objectid || hdr.type > m_type;
-	}
-
-	void
-	BtrfsTreeOffsetFetcher::objectid(uint64_t objectid)
-	{
-		m_objectid = objectid;
-	}
-
-	uint64_t
-	BtrfsTreeOffsetFetcher::objectid() const
-	{
-		return m_objectid;
-	}
-
-	void
-	BtrfsTreeOffsetFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t offset)
-	{
-		BtrfsTreeFetcher::fill_sk(sk, offset);
-		sk.min_offset = offset;
-		sk.max_offset = numeric_limits<decltype(sk.max_offset)>::max();
-		sk.min_objectid = m_objectid;
-		sk.max_objectid = m_objectid;
-	}
-
-	void
-	BtrfsCsumTreeFetcher::get_sums(uint64_t const logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t bytes)> output)
-	{
-	#if 0
-	#define BCTFGS_DEBUG(x) do { cerr << x; } while (false)
-	#else
-	#define BCTFGS_DEBUG(x) do { } while (false)
-	#endif
-		const uint64_t logical_end = logical + count * block_size();
-		BtrfsTreeItem bti = rlower_bound(logical);
-		size_t loops = 0;
-		BCTFGS_DEBUG("get_sums " << to_hex(logical) << ".." << to_hex(logical_end) << endl);
-		while (!!bti) {
-			BCTFGS_DEBUG("get_sums[" << loops << "]: " << bti << endl);
-			++loops;
-			// Reject wrong type or objectid
-			THROW_CHECK1(runtime_error, bti.type(), bti.type() == BTRFS_EXTENT_CSUM_KEY);
-			THROW_CHECK1(runtime_error, bti.objectid(), bti.objectid() == BTRFS_EXTENT_CSUM_OBJECTID);
-			// Is this object in range?
-			const uint64_t data_logical = bti.offset();
-			if (data_logical >= logical_end) {
-				// csum object is past end of range, we are done
-				return;
-			}
-			// Figure out how long this csum item is in various units
-			const size_t csum_byte_count = bti.data().size();
-			THROW_CHECK1(runtime_error, csum_byte_count, (csum_byte_count % m_sum_size) == 0);
-			THROW_CHECK1(runtime_error, csum_byte_count, csum_byte_count > 0);
-			const size_t csum_count = csum_byte_count / m_sum_size;
-			const uint64_t data_byte_count = csum_count * block_size();
-			const uint64_t data_logical_end = data_logical + data_byte_count;
-			if (data_logical_end <= logical) {
-				// too low, look at next item
-				bti = lower_bound(logical);
-				continue;
-			}
-			// There is some overlap?
-			const uint64_t overlap_begin = max(logical, data_logical);
-			const uint64_t overlap_end = min(logical_end, data_logical_end);
-			THROW_CHECK2(runtime_error, overlap_begin, overlap_end, overlap_begin < overlap_end);
-			const uint64_t overlap_offset = overlap_begin - data_logical;
-			THROW_CHECK1(runtime_error, overlap_offset, (overlap_offset % block_size()) == 0);
-			const uint64_t overlap_index = overlap_offset * m_sum_size / block_size();
-			const uint64_t overlap_byte_count = overlap_end - overlap_begin;
-			const uint64_t overlap_csum_byte_count = overlap_byte_count * m_sum_size / block_size();
-			// Can't be bigger than a btrfs item
-			THROW_CHECK1(runtime_error, overlap_index, overlap_index < 65536);
-			THROW_CHECK1(runtime_error, overlap_csum_byte_count, overlap_csum_byte_count < 65536);
-			// Yes, process the overlap
-			output(overlap_begin, bti.data().data() + overlap_index, overlap_csum_byte_count);
-			// Advance
-			bti = lower_bound(overlap_end);
-		}
-	#undef BCTFGS_DEBUG
-	}
-
-	uint32_t
-	BtrfsCsumTreeFetcher::sum_type() const
-	{
-		return m_sum_type;
-	}
-
-	size_t
-	BtrfsCsumTreeFetcher::sum_size() const
-	{
-		return m_sum_size;
-	}
-
-	BtrfsCsumTreeFetcher::BtrfsCsumTreeFetcher(const Fd &new_fd) :
-		BtrfsTreeOffsetFetcher(new_fd)
-	{
-		type(BTRFS_EXTENT_CSUM_KEY);
-		tree(BTRFS_CSUM_TREE_OBJECTID);
-		objectid(BTRFS_EXTENT_CSUM_OBJECTID);
-		BtrfsIoctlFsInfoArgs bifia;
-		bifia.do_ioctl(fd());
-		m_sum_type = static_cast<btrfs_compression_type>(bifia.csum_type());
-		m_sum_size = bifia.csum_size();
-		if (m_sum_type == BTRFS_CSUM_TYPE_CRC32 && m_sum_size == 0) {
-			// Older kernel versions don't fill in this field
-			m_sum_size = 4;
-		}
-		THROW_CHECK1(runtime_error, m_sum_size, m_sum_size > 0);
-	}
-
-	BtrfsExtentItemFetcher::BtrfsExtentItemFetcher(const Fd &new_fd) :
-		BtrfsTreeObjectFetcher(new_fd)
-	{
-		tree(BTRFS_EXTENT_TREE_OBJECTID);
-		type(BTRFS_EXTENT_ITEM_KEY);
-	}
-
-	BtrfsExtentDataFetcher::BtrfsExtentDataFetcher(const Fd &new_fd) :
-		BtrfsTreeOffsetFetcher(new_fd)
-	{
-		type(BTRFS_EXTENT_DATA_KEY);
-	}
-
-	BtrfsFsTreeFetcher::BtrfsFsTreeFetcher(const Fd &new_fd, uint64_t subvol) :
-		BtrfsTreeObjectFetcher(new_fd)
-	{
-		tree(subvol);
-		type(BTRFS_EXTENT_DATA_KEY);
-		scale_size(1);
-	}
-
-	BtrfsInodeFetcher::BtrfsInodeFetcher(const Fd &fd) :
-		BtrfsTreeObjectFetcher(fd)
-	{
-		type(BTRFS_INODE_ITEM_KEY);
-		scale_size(1);
-	}
-
-	BtrfsTreeItem
-	BtrfsInodeFetcher::stat(uint64_t subvol, uint64_t inode)
-	{
-		tree(subvol);
-		const auto item = at(inode);
-		if (!!item) {
-			THROW_CHECK2(runtime_error, item.objectid(), inode, inode == item.objectid());
-			THROW_CHECK2(runtime_error, item.type(), BTRFS_INODE_ITEM_KEY, item.type() == BTRFS_INODE_ITEM_KEY);
-		}
-		return item;
-	}
-
-	BtrfsRootFetcher::BtrfsRootFetcher(const Fd &fd) :
-		BtrfsTreeObjectFetcher(fd)
-	{
-		tree(BTRFS_ROOT_TREE_OBJECTID);
-		type(BTRFS_ROOT_ITEM_KEY);
-		scale_size(1);
-	}
-
-	BtrfsTreeItem
-	BtrfsRootFetcher::root(uint64_t subvol)
-	{
-		const auto item = at(subvol);
-		if (!!item) {
-			THROW_CHECK2(runtime_error, item.objectid(), subvol, subvol == item.objectid());
-			THROW_CHECK2(runtime_error, item.type(), BTRFS_ROOT_ITEM_KEY, item.type() == BTRFS_ROOT_ITEM_KEY);
-		}
-		return item;
-	}
-}
--- a/lib/bytevector.cc
+++ b/lib/bytevector.cc
@@ -1,190 +0,0 @@
-#include "crucible/bytevector.h"
-
-#include "crucible/error.h"
-#include "crucible/hexdump.h"
-#include "crucible/string.h"
-
-#include <cassert>
-
-namespace crucible {
-	using namespace std;
-
-	ByteVector::iterator
-	ByteVector::begin() const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		return m_ptr.get();
-	}
-
-	ByteVector::iterator
-	ByteVector::end() const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		return m_ptr.get() + m_size;
-	}
-
-	size_t
-	ByteVector::size() const
-	{
-		return m_size;
-	}
-
-	bool
-	ByteVector::empty() const
-	{
-		return !m_ptr || !m_size;
-	}
-
-	void
-	ByteVector::clear()
-	{
-		unique_lock<mutex> lock(m_mutex);
-		m_ptr.reset();
-		m_size = 0;
-	}
-
-	ByteVector::value_type&
-	ByteVector::operator[](size_t size) const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		return m_ptr.get()[size];
-	}
-
-	ByteVector::ByteVector(const ByteVector &that)
-	{
-		unique_lock<mutex> lock(that.m_mutex);
-		m_ptr = that.m_ptr;
-		m_size = that.m_size;
-	}
-
-	ByteVector&
-	ByteVector::operator=(const ByteVector &that)
-	{
-		// If &that == this, there's no need to do anything, but
-		// especially don't try to lock the same mutex twice.
-		if (&m_mutex != &that.m_mutex) {
-			unique_lock<mutex> lock_this(m_mutex, defer_lock);
-			unique_lock<mutex> lock_that(that.m_mutex, defer_lock);
-			lock(lock_this, lock_that);
-			m_ptr = that.m_ptr;
-			m_size = that.m_size;
-		}
-		return *this;
-	}
-
-	ByteVector::ByteVector(const ByteVector &that, size_t start, size_t length)
-	{
-		THROW_CHECK0(out_of_range, that.m_ptr);
-		THROW_CHECK2(out_of_range, start, that.m_size, start <= that.m_size);
-		THROW_CHECK2(out_of_range, start + length, that.m_size + length, start + length <= that.m_size + length);
-		m_ptr = Pointer(that.m_ptr, that.m_ptr.get() + start);
-		m_size = length;
-	}
-
-	ByteVector
-	ByteVector::at(size_t start, size_t length) const
-	{
-		return ByteVector(*this, start, length);
-	}
-
-	ByteVector::value_type&
-	ByteVector::at(size_t size) const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		THROW_CHECK0(out_of_range, m_ptr);
-		THROW_CHECK2(out_of_range, size, m_size, size < m_size);
-		return m_ptr.get()[size];
-	}
-
-	static
-	void *
-	bv_allocate(size_t size)
-	{
-#ifdef BEES_VALGRIND
-		// XXX: only do this to shut up valgrind
-		return calloc(1, size);
-#else
-		return malloc(size);
-#endif
-	}
-
-	ByteVector::ByteVector(size_t size)
-	{
-		m_ptr = Pointer(static_cast<value_type*>(bv_allocate(size)), free);
-		// bad_alloc doesn't fit THROW_CHECK's template
-		THROW_CHECK0(runtime_error, m_ptr);
-		m_size = size;
-	}
-
-	ByteVector::ByteVector(iterator begin, iterator end, size_t min_size)
-	{
-		const size_t size = end - begin;
-		const size_t alloc_size = max(size, min_size);
-		m_ptr = Pointer(static_cast<value_type*>(bv_allocate(alloc_size)), free);
-		THROW_CHECK0(runtime_error, m_ptr);
-		m_size = alloc_size;
-		memcpy(m_ptr.get(), begin, size);
-	}
-
-	bool
-	ByteVector::operator==(const ByteVector &that) const
-	{
-		unique_lock<mutex> lock_this(m_mutex, defer_lock);
-		unique_lock<mutex> lock_that(that.m_mutex, defer_lock);
-		lock(lock_this, lock_that);
-		if (!m_ptr) {
-			return !that.m_ptr;
-		}
-		if (!that.m_ptr) {
-			return false;
-		}
-		if (m_size != that.m_size) {
-			return false;
-		}
-		if (m_ptr.get() == that.m_ptr.get()) {
-			return true;
-		}
-		return !memcmp(m_ptr.get(), that.m_ptr.get(), m_size);
-	}
-
-	void
-	ByteVector::erase(iterator begin, iterator end)
-	{
-		unique_lock<mutex> lock(m_mutex);
-		const size_t size = end - begin;
-		if (!size) return;
-		THROW_CHECK0(out_of_range, m_ptr);
-		const iterator my_begin = m_ptr.get();
-		const iterator my_end = my_begin + m_size;
-		THROW_CHECK4(out_of_range, my_begin, begin, my_end, end, my_begin == begin || my_end == end);
-		if (begin == my_begin) {
-			if (end == my_end) {
-				m_size = 0;
-				m_ptr.reset();
-				return;
-			}
-			m_ptr = Pointer(m_ptr, end);
-		}
-		m_size -= size;
-	}
-
-	void
-	ByteVector::erase(iterator begin)
-	{
-		erase(begin, begin + 1);
-	}
-
-	ByteVector::value_type*
-	ByteVector::data() const
-	{
-		unique_lock<mutex> lock(m_mutex);
-		return m_ptr.get();
-	}
-
-	ostream&
-	operator<<(ostream &os, const ByteVector &bv) {
-		unique_lock<mutex> lock(bv.m_mutex);
-		hexdump(os, bv);
-		return os;
-	}
-}
--- a/lib/extentwalker.cc
+++ b/lib/extentwalker.cc
@@ -496,7 +496,7 @@ namespace crucible {
 	BtrfsExtentWalker::Vec
 	BtrfsExtentWalker::get_extent_map(off_t pos)
 	{
-		BtrfsIoctlSearchKey sk;
+		BtrfsIoctlSearchKey sk(65536);
 		if (!m_root_fd) {
 			m_root_fd = m_fd;
 		}
@@ -640,7 +640,9 @@ namespace crucible {
 	ExtentWalker::get_extent_map(off_t pos)
 	{
 		EWLOG("get_extent_map(" << to_hex(pos) << ")");
-		Fiemap fm(ranged_cast<uint64_t>(pos), ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos));
+		Fiemap fm;
+		fm.fm_start = ranged_cast<uint64_t>(pos);
+		fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos);
 		fm.m_max_count = fm.m_min_count = sc_extent_fetch_max;
 		fm.do_ioctl(m_fd);
 		Vec rv;
--- a/lib/fd.cc
+++ b/lib/fd.cc
@@ -361,11 +361,8 @@ namespace crucible {
                        THROW_ERROR(invalid_argument, "pwrite: trying to write on a closed file descriptor");
                }
 		int rv = ::pwrite(fd, buf, size, offset);
-		if (rv < 0) {
-			THROW_ERRNO("pwrite: could not write " << size << " bytes at fd " << name_fd(fd) << " offset " << offset);
-		}
-		if (rv != static_cast<ssize_t>(size)) {
-			THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at fd " << name_fd(fd) << " offset " << offset);
+		if (rv != static_cast<int>(size)) {
+			THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at offset " << offset);
 		}
 	}

@@ -395,7 +392,7 @@ namespace crucible {
 				}
 				THROW_ERRNO("read: " << size << " bytes");
 			}
-			if (rv > static_cast<ssize_t>(size)) {
+			if (rv > static_cast<int>(size)) {
 				THROW_ERROR(runtime_error, "read: somehow read more bytes (" << rv << ") than requested (" << size << ")");
 			}
 			if (rv == 0) break;
@@ -444,8 +441,8 @@ namespace crucible {
 					}
 					THROW_ERRNO("pread: " << size << " bytes");
 				}
-				if (rv != static_cast<ssize_t>(size)) {
-					THROW_ERROR(runtime_error, "pread: " << size << " bytes at fd " << name_fd(fd) << " offset " << offset << " returned " << rv);
+				if (rv != static_cast<int>(size)) {
+					THROW_ERROR(runtime_error, "pread: " << size << " bytes at offset " << offset << " returned " << rv);
 				}
 				break;
 			}
@@ -461,14 +458,28 @@ namespace crucible {

 	template<>
 	void
-	pread_or_die<ByteVector>(int fd, ByteVector &text, off_t offset)
+	pread_or_die<vector<char>>(int fd, vector<char> &text, off_t offset)
 	{
 		return pread_or_die(fd, text.data(), text.size(), offset);
 	}

 	template<>
 	void
-	pwrite_or_die<ByteVector>(int fd, const ByteVector &text, off_t offset)
+	pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t> &text, off_t offset)
+	{
+		return pread_or_die(fd, text.data(), text.size(), offset);
+	}
+
+	template<>
+	void
+	pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t> &text, off_t offset)
+	{
+		return pwrite_or_die(fd, text.data(), text.size(), offset);
+	}
+
+	template<>
+	void
+	pwrite_or_die<vector<char>>(int fd, const vector<char> &text, off_t offset)
 	{
 		return pwrite_or_die(fd, text.data(), text.size(), offset);
 	}
@@ -480,9 +491,9 @@ namespace crucible {
 		return pwrite_or_die(fd, text.data(), text.size(), offset);
 	}

-	Stat::Stat() :
-		stat( (stat) { } )
+	Stat::Stat()
 	{
+		memset_zero<stat>(this);
 	}

 	Stat &
@@ -501,15 +512,15 @@ namespace crucible {
 		return *this;
 	}

-	Stat::Stat(int fd) :
-		stat( (stat) { } )
+	Stat::Stat(int fd)
 	{
+		memset_zero<stat>(this);
 		fstat(fd);
 	}

-	Stat::Stat(const string &filename) :
-		stat( (stat) { } )
+	Stat::Stat(const string &filename)
 	{
+		memset_zero<stat>(this);
 		lstat(filename);
 	}

@@ -524,14 +535,7 @@ namespace crucible {
 	void
 	ioctl_iflags_set(int fd, int attr)
 	{
-		// This bit of nonsense brought to you by Valgrind.
-		union {
-			int attr;
-			long zero;
-		} u;
-		u.zero = 0;
-		u.attr = attr;
-		DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &u.attr));
+		DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &attr));
 	}

 	string
--- a/lib/fs.cc
+++ b/lib/fs.cc
@@ -2,7 +2,6 @@

 #include "crucible/error.h"
 #include "crucible/fd.h"
-#include "crucible/hexdump.h"
 #include "crucible/limits.h"
 #include "crucible/ntoa.h"
 #include "crucible/string.h"
@@ -33,11 +32,19 @@ namespace crucible {
 #endif
 	}

+	BtrfsExtentInfo::BtrfsExtentInfo(int dst_fd, off_t dst_offset)
+	{
+		memset_zero<btrfs_ioctl_same_extent_info>(this);
+		fd = dst_fd;
+		logical_offset = dst_offset;
+	}
+
 	BtrfsExtentSame::BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length) :
-		m_logical_offset(src_offset),
-		m_length(src_length),
 		m_fd(src_fd)
 	{
+		memset_zero<btrfs_ioctl_same_args>(this);
+		logical_offset = src_offset;
+		length = src_length;
 	}

 	BtrfsExtentSame::~BtrfsExtentSame()
@@ -45,12 +52,9 @@ namespace crucible {
 	}

 	void
-	BtrfsExtentSame::add(int const fd, uint64_t const offset)
+	BtrfsExtentSame::add(int fd, off_t offset)
 	{
-		m_info.push_back( (btrfs_ioctl_same_extent_info) {
-			.fd = fd,
-			.logical_offset = offset,
-		});
+		m_info.push_back(BtrfsExtentInfo(fd, offset));
 	}

 	ostream &
@@ -107,8 +111,11 @@ namespace crucible {
 				os << " '" << fd_name << "'";
 			});
 		}
-		os << ", .logical_offset = " << to_hex(bes.m_logical_offset);
-		os << ", .length = " << to_hex(bes.m_length);
+		os << ", .logical_offset = " << to_hex(bes.logical_offset);
+		os << ", .length = " << to_hex(bes.length);
+		os << ", .dest_count = " << bes.dest_count;
+		os << ", .reserved1 = " << bes.reserved1;
+		os << ", .reserved2 = " << bes.reserved2;
 		os << ", .info[] = {";
 		for (size_t i = 0; i < bes.m_info.size(); ++i) {
 			os << " [" << i << "] = " << &(bes.m_info[i]) << ",";
@@ -119,25 +126,22 @@ namespace crucible {
 	void
 	btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset)
 	{
-		btrfs_ioctl_clone_range_args args ( (btrfs_ioctl_clone_range_args) {
-			.src_fd = src_fd,
-			.src_offset = ranged_cast<uint64_t, off_t>(src_offset),
-			.src_length = ranged_cast<uint64_t, off_t>(src_length),
-			.dest_offset = ranged_cast<uint64_t, off_t>(dst_offset),
-		} );
+		struct btrfs_ioctl_clone_range_args args;
+		memset_zero(&args);
+		args.src_fd = src_fd;
+		args.src_offset = src_offset;
+		args.src_length = src_length;
+		args.dest_offset = dst_offset;
 		DIE_IF_MINUS_ONE(ioctl(dst_fd, BTRFS_IOC_CLONE_RANGE, &args));
 	}

 	void
 	BtrfsExtentSame::do_ioctl()
 	{
-		const size_t buf_size = sizeof(btrfs_ioctl_same_args) + m_info.size() * sizeof(btrfs_ioctl_same_extent_info);
-		ByteVector ioctl_arg( (btrfs_ioctl_same_args) {
-			.logical_offset = m_logical_offset,
-			.length = m_length,
-			.dest_count = ranged_cast<decltype(btrfs_ioctl_same_args::dest_count)>(m_info.size()),
-		}, buf_size);
-		btrfs_ioctl_same_args *const ioctl_ptr = ioctl_arg.get<btrfs_ioctl_same_args>();
+		dest_count = m_info.size();
+		vector<uint8_t> ioctl_arg = vector_copy_struct<btrfs_ioctl_same_args>(this);
+		ioctl_arg.resize(sizeof(btrfs_ioctl_same_args) + dest_count * sizeof(btrfs_ioctl_same_extent_info), 0);
+		btrfs_ioctl_same_args *ioctl_ptr = reinterpret_cast<btrfs_ioctl_same_args *>(ioctl_arg.data());
 		size_t count = 0;
 		for (auto i = m_info.cbegin(); i != m_info.cend(); ++i) {
 			ioctl_ptr->info[count] = static_cast<const btrfs_ioctl_same_extent_info &>(m_info[count]);
@@ -190,15 +194,18 @@ namespace crucible {
 	void *
 	BtrfsDataContainer::prepare(size_t container_size)
 	{
+		if (m_data.size() < container_size) {
+			m_data.resize(container_size);
+		}
+		btrfs_data_container *p = reinterpret_cast<btrfs_data_container *>(m_data.data());
 		const size_t min_size = offsetof(btrfs_data_container, val);
 		if (container_size < min_size) {
 			THROW_ERROR(out_of_range, "container size " << container_size << " smaller than minimum " << min_size);
 		}
-		if (m_data.size() < container_size) {
-			m_data = ByteVector(container_size);
-		}
-		const auto p = m_data.get<btrfs_data_container>();
-		*p = (btrfs_data_container) { };
+		p->bytes_left = 0;
+		p->bytes_missing = 0;
+		p->elem_cnt = 0;
+		p->elem_missed = 0;
 		return p;
 	}

@@ -211,29 +218,25 @@ namespace crucible {
 	decltype(btrfs_data_container::bytes_left)
 	BtrfsDataContainer::get_bytes_left() const
 	{
-		const auto p = m_data.get<btrfs_data_container>();
-		return p->bytes_left;
+		return bytes_left;
 	}

 	decltype(btrfs_data_container::bytes_missing)
 	BtrfsDataContainer::get_bytes_missing() const
 	{
-		const auto p = m_data.get<btrfs_data_container>();
-		return p->bytes_missing;
+		return bytes_missing;
 	}

 	decltype(btrfs_data_container::elem_cnt)
 	BtrfsDataContainer::get_elem_cnt() const
 	{
-		const auto p = m_data.get<btrfs_data_container>();
-		return p->elem_cnt;
+		return elem_cnt;
 	}

 	decltype(btrfs_data_container::elem_missed)
 	BtrfsDataContainer::get_elem_missed() const
 	{
-		const auto p = m_data.get<btrfs_data_container>();
-		return p->elem_missed;
+		return elem_missed;
 	}

 	ostream &
@@ -243,7 +246,7 @@ namespace crucible {
 			return os << "BtrfsIoctlLogicalInoArgs NULL";
 		}
 		os << "BtrfsIoctlLogicalInoArgs {";
-		os << " .m_logical = " << to_hex(p->m_logical);
+		os << " .logical = " << to_hex(p->logical);
 		os << " .inodes[] = {\n";
 		unsigned count = 0;
 		for (auto i = p->m_iors.cbegin(); i != p->m_iors.cend(); ++i) {
@@ -255,9 +258,10 @@ namespace crucible {

 	BtrfsIoctlLogicalInoArgs::BtrfsIoctlLogicalInoArgs(uint64_t new_logical, size_t new_size) :
 		m_container_size(new_size),
-		m_container(new_size),
-		m_logical(new_logical)
+		m_container(new_size)
 	{
+		memset_zero<btrfs_ioctl_logical_ino_args>(this);
+		logical = new_logical;
 	}

 	size_t
@@ -296,6 +300,11 @@ namespace crucible {
 		return m_begin;
 	}

+	BtrfsIoctlLogicalInoArgs::BtrfsInodeOffsetRootSpan::operator vector<BtrfsInodeOffsetRoot>() const
+	{
+		return vector<BtrfsInodeOffsetRoot>(m_begin, m_end);
+	}
+
 	void
 	BtrfsIoctlLogicalInoArgs::BtrfsInodeOffsetRootSpan::clear()
 	{
@@ -305,28 +314,23 @@ namespace crucible {
 	void
 	BtrfsIoctlLogicalInoArgs::set_flags(uint64_t new_flags)
 	{
-		m_flags = new_flags;
+		// We are still supporting building with old headers that don't have .flags yet
+		*(&reserved[0] + 3) = new_flags;
 	}

 	uint64_t
 	BtrfsIoctlLogicalInoArgs::get_flags() const
 	{
 		// We are still supporting building with old headers that don't have .flags yet
-		return m_flags;
+		return *(&reserved[0] + 3);
 	}

 	bool
 	BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
 	{
-		btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
-			.logical = m_logical,
-			.size = m_container_size,
-			.inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size)),
-		};
-		// We are still supporting building with old headers that don't have .flags yet
-		*(&args.reserved[0] + 3) = m_flags;
-
-		btrfs_ioctl_logical_ino_args *const p = &args;
+		btrfs_ioctl_logical_ino_args *p = static_cast<btrfs_ioctl_logical_ino_args *>(this);
+		inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size));
+		size = m_container.get_size();

 		m_iors.clear();

@@ -363,13 +367,13 @@ namespace crucible {
 			bili_version = BTRFS_IOC_LOGICAL_INO_V2;
 		}

-		btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
-		BtrfsInodeOffsetRoot *const ior_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);
+		btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
+		BtrfsInodeOffsetRoot *input_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);

 		// elem_cnt counts uint64_t, but BtrfsInodeOffsetRoot is 3x uint64_t
 		THROW_CHECK1(runtime_error, bdc->elem_cnt, bdc->elem_cnt % 3 == 0);
-		m_iors.m_begin = ior_iter;
-		m_iors.m_end = ior_iter + bdc->elem_cnt / 3;
+		m_iors.m_begin = input_iter;
+		m_iors.m_end = input_iter + bdc->elem_cnt / 3;
 		return true;
 	}

@@ -392,10 +396,9 @@ namespace crucible {
 	}

 	BtrfsIoctlInoPathArgs::BtrfsIoctlInoPathArgs(uint64_t inode, size_t new_size) :
-		btrfs_ioctl_ino_path_args( (btrfs_ioctl_ino_path_args) { } ),
 		m_container_size(new_size)
 	{
-		assert(inum == 0);
+		memset_zero<btrfs_ioctl_ino_path_args>(this);
 		inum = inode;
 	}

@@ -413,14 +416,14 @@ namespace crucible {
 			return false;
 		}

-		btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
+		btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
 		m_paths.reserve(bdc->elem_cnt);

 		const uint64_t *up = reinterpret_cast<const uint64_t *>(bdc->val);
-		const char *const cp = reinterpret_cast<const char *>(bdc->val);
+		const char *cp = reinterpret_cast<const char *>(bdc->val);

 		for (auto count = bdc->elem_cnt; count > 0; --count) {
-			const char *const path = cp + *up++;
+			const char *path = cp + *up++;
 			if (static_cast<size_t>(path - cp) > container.get_size()) {
 				THROW_ERROR(out_of_range, "offset " << (path - cp) << " > size " << container.get_size() << " in " << __PRETTY_FUNCTION__);
 			}
@@ -455,10 +458,9 @@ namespace crucible {
 		return os;
 	}

-	BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid) :
-		btrfs_ioctl_ino_lookup_args( (btrfs_ioctl_ino_lookup_args) { } )
+	BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid)
 	{
-		assert(objectid == 0);
+		memset_zero<btrfs_ioctl_ino_lookup_args>(this);
 		objectid = new_objectid;
 	}

@@ -476,9 +478,9 @@ namespace crucible {
 		}
 	}

-	BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs() :
-		btrfs_ioctl_defrag_range_args( (btrfs_ioctl_defrag_range_args) { } )
+	BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs()
 	{
+		memset_zero<btrfs_ioctl_defrag_range_args>(this);
 	}

 	bool
@@ -508,10 +510,9 @@ namespace crucible {
 	}

 	string
-	btrfs_compress_type_ntoa(uint8_t compress_type)
+	btrfs_ioctl_defrag_range_compress_type_ntoa(uint32_t compress_type)
 	{
 		static const bits_ntoa_table table[] = {
-			NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_NONE),
 			NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZLIB),
 			NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_LZO),
 			NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZSTD),
@@ -531,14 +532,14 @@ namespace crucible {
 		os << " .len = " << p->len;
 		os << " .flags = " << btrfs_ioctl_defrag_range_flags_ntoa(p->flags);
 		os << " .extent_thresh = " << p->extent_thresh;
-		os << " .compress_type = " << btrfs_compress_type_ntoa(p->compress_type);
+		os << " .compress_type = " << btrfs_ioctl_defrag_range_compress_type_ntoa(p->compress_type);
 		os << " .unused[4] = { " << p->unused[0] << ", " << p->unused[1] << ", " << p->unused[2] << ", " << p->unused[3] << "} }";
 		return os;
 	}

-	FiemapExtent::FiemapExtent() :
-		fiemap_extent( (fiemap_extent) { } )
+	FiemapExtent::FiemapExtent()
 	{
+		memset_zero<fiemap_extent>(this);
 	}

 	FiemapExtent::FiemapExtent(const fiemap_extent &that)
@@ -645,10 +646,13 @@ namespace crucible {
 	operator<<(ostream &os, const Fiemap &args)
 	{
 		os << "Fiemap {";
-		os << " .m_start = " << to_hex(args.m_start) << ".." << to_hex(args.m_start + args.m_length);
-		os << ", .m_length = " << to_hex(args.m_length);
-		os << ", .m_flags = " << fiemap_flags_ntoa(args.m_flags);
-		os << ", .fm_extents[" << args.m_extents.size() << "] = {";
+		os << " .fm_start = " << to_hex(args.fm_start) << ".." << to_hex(args.fm_start + args.fm_length);
+		os << ", .fm_length = " << to_hex(args.fm_length);
+		if (args.fm_flags) os << ", .fm_flags = " << fiemap_flags_ntoa(args.fm_flags);
+		os << ", .fm_mapped_extents = " << args.fm_mapped_extents;
+		os << ", .fm_extent_count = " << args.fm_extent_count;
+		if (args.fm_reserved) os << ", .fm_reserved = " << args.fm_reserved;
+		os << ", .fm_extents[] = {";
 		size_t count = 0;
 		for (auto i = args.m_extents.cbegin(); i != args.m_extents.cend(); ++i) {
 			os << "\n\t[" << count++ << "] = " << &(*i) << ",";
@@ -656,35 +660,41 @@ namespace crucible {
 		return os << "\n}";
 	}

-	Fiemap::Fiemap(uint64_t start, uint64_t length) :
-		m_start(start),
-		m_length(length)
+	Fiemap::Fiemap(uint64_t start, uint64_t length)
 	{
+		memset_zero<fiemap>(this);
+		fm_start = start;
+		fm_length = length;
+		// FIEMAP is slow and full of lines.
+		// This makes FIEMAP even slower, but reduces the lies a little.
+		fm_flags = FIEMAP_FLAG_SYNC;
 	}

 	void
 	Fiemap::do_ioctl(int fd)
 	{
 		THROW_CHECK1(out_of_range, m_min_count, m_min_count <= m_max_count);
-		THROW_CHECK1(out_of_range, m_min_count, m_min_count > 0);

-		const auto extent_count = m_min_count;
-		ByteVector ioctl_arg(sizeof(fiemap) + extent_count * sizeof(fiemap_extent));
+		auto extent_count = m_min_count;
+		vector<uint8_t> ioctl_arg = vector_copy_struct<fiemap>(this);

-		fiemap *const ioctl_ptr = ioctl_arg.get<fiemap>();
+		ioctl_arg.resize(sizeof(fiemap) + extent_count * sizeof(fiemap_extent), 0);

-		auto start = m_start;
-		const auto end = m_start + m_length;
+		fiemap *ioctl_ptr = reinterpret_cast<fiemap *>(ioctl_arg.data());
+
+		auto start = fm_start;
+		auto end = fm_start + fm_length;
+
+		auto orig_start = fm_start;
+		auto orig_length = fm_length;

 		vector<FiemapExtent> extents;

 		while (start < end && extents.size() < m_max_count) {
-			*ioctl_ptr = (fiemap) {
-				.fm_start = start,
-				.fm_length = end - start,
-				.fm_flags = m_flags,
-				.fm_extent_count = extent_count,
-			};
+			ioctl_ptr->fm_start = start;
+			ioctl_ptr->fm_length = end - start;
+			ioctl_ptr->fm_extent_count = extent_count;
+			ioctl_ptr->fm_mapped_extents = 0;

 			// cerr << "Before (fd = " << fd << ") : " << ioctl_ptr << endl;
 			DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_FIEMAP, ioctl_ptr));
@@ -710,89 +720,74 @@ namespace crucible {
 			}
 		}

+		fiemap *this_ptr = static_cast<fiemap *>(this);
+		*this_ptr = *ioctl_ptr;
+		fm_start = orig_start;
+		fm_length = orig_length;
+		fm_extent_count = extents.size();
 		m_extents = extents;
 	}

 	BtrfsIoctlSearchKey::BtrfsIoctlSearchKey(size_t buf_size) :
-		btrfs_ioctl_search_key( (btrfs_ioctl_search_key) {
-			.max_objectid = numeric_limits<decltype(max_objectid)>::max(),
-			.max_offset = numeric_limits<decltype(max_offset)>::max(),
-			.max_transid = numeric_limits<decltype(max_transid)>::max(),
-			.max_type = numeric_limits<decltype(max_type)>::max(),
-			.nr_items = 1,
-		}),
 		m_buf_size(buf_size)
 	{
+		memset_zero<btrfs_ioctl_search_key>(this);
+		max_objectid = numeric_limits<decltype(max_objectid)>::max();
+		max_offset = numeric_limits<decltype(max_offset)>::max();
+		max_transid = numeric_limits<decltype(max_transid)>::max();
+		max_type = numeric_limits<decltype(max_type)>::max();
+		nr_items = numeric_limits<decltype(nr_items)>::max();
 	}

-	BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader() :
-		btrfs_ioctl_search_header( (btrfs_ioctl_search_header) { } )
+	BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader()
 	{
+		memset_zero<btrfs_ioctl_search_header>(this);
 	}

 	size_t
-	BtrfsIoctlSearchHeader::set_data(const ByteVector &v, size_t offset)
+	BtrfsIoctlSearchHeader::set_data(const vector<uint8_t> &v, size_t offset)
 	{
 		THROW_CHECK2(invalid_argument, offset, v.size(), offset + sizeof(btrfs_ioctl_search_header) <= v.size());
 		memcpy(static_cast<btrfs_ioctl_search_header *>(this), &v[offset], sizeof(btrfs_ioctl_search_header));
 		offset += sizeof(btrfs_ioctl_search_header);
 		THROW_CHECK2(invalid_argument, offset + len, v.size(), offset + len <= v.size());
-		m_data = ByteVector(v, offset, len);
+		m_data = Spanner<const uint8_t>(&v[offset], &v[offset + len]);
 		return offset + len;
 	}

 	bool
 	BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
 	{
-		// It would be really nice if the kernel tells us whether our
-		// buffer overflowed or how big the overflowing object
-		// was; instead, we have to guess.
+		// Normally we like to be paranoid and fill empty bytes with zero,
+		// but these buffers can be huge.  80% of a 4GHz CPU huge.
+
+		// Keep the ioctl buffer from one run to the next to save on malloc costs
+		size_t target_buf_size = sizeof(btrfs_ioctl_search_args_v2) + m_buf_size;
+
+		m_ioctl_arg = vector_copy_struct<btrfs_ioctl_search_key>(this);
+		m_ioctl_arg.resize(target_buf_size);

 		m_result.clear();
-		// Make sure there is space for at least the search key and one (empty) header
-		size_t buf_size = max(m_buf_size, sizeof(btrfs_ioctl_search_args_v2) + sizeof(btrfs_ioctl_search_header));
-		ByteVector ioctl_arg;
-		btrfs_ioctl_search_args_v2 *ioctl_ptr;
-		do {
-			// ioctl buffer size does not include search key header or buffer size
-			ioctl_arg = ByteVector(buf_size + sizeof(btrfs_ioctl_search_args_v2));
-			ioctl_ptr = ioctl_arg.get<btrfs_ioctl_search_args_v2>();
-			ioctl_ptr->key = static_cast<const btrfs_ioctl_search_key&>(*this);
-			ioctl_ptr->buf_size = buf_size;
-			// Don't bother supporting V1.  Kernels that old have other problems.
-			int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
-			if (rv != 0 && errno != EOVERFLOW) {
-				return false;
-			}
-			if (rv == 0 && nr_items <= ioctl_ptr->key.nr_items) {
-				// got all the items we wanted, thanks
-				m_buf_size = max(m_buf_size, buf_size);
-				break;
-			}
-			// Didn't get all the items we wanted.  Increase the buf size and try again.
-			// These sizes are very common on default-formatted btrfs, so use these
-			// instead of naive doubling.
-			if (buf_size < 4096) {
-				buf_size = 4096;
-			} else if (buf_size < 16384) {
-				buf_size = 16384;
-			} else if (buf_size < 65536) {
-				buf_size = 65536;
-			} else {
-				buf_size *= 2;
-			}
-			// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
-		} while (buf_size < 65536);

-		// ioctl changes nr_items, this has to be copied back
+		btrfs_ioctl_search_args_v2 *ioctl_ptr = reinterpret_cast<btrfs_ioctl_search_args_v2 *>(m_ioctl_arg.data());
+
+		ioctl_ptr->buf_size = m_buf_size;
+
+		// Don't bother supporting V1.  Kernels that old have other problems.
+		int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_ptr);
+		if (rv != 0) {
+			return false;
+		}
+
 		static_cast<btrfs_ioctl_search_key&>(*this) = ioctl_ptr->key;

 		size_t offset = pointer_distance(ioctl_ptr->buf, ioctl_ptr);
 		for (decltype(nr_items) i = 0; i < nr_items; ++i) {
 			BtrfsIoctlSearchHeader item;
-			offset = item.set_data(ioctl_arg, offset);
+			offset = item.set_data(m_ioctl_arg, offset);
 			m_result.insert(item);
 		}
+
 		return true;
 	}

@@ -800,7 +795,7 @@ namespace crucible {
 	BtrfsIoctlSearchKey::do_ioctl(int fd)
 	{
 		if (!do_ioctl_nothrow(fd)) {
-			THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd) << ": " << *this);
+			THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd));
 		}
 	}

@@ -811,47 +806,33 @@ namespace crucible {
 		min_type = ref.type;
 		min_offset = ref.offset + 1;
 		if (min_offset < ref.offset) {
-			// We wrapped, try the next type
-			++min_type;
-			assert(min_offset == 0);
-			if (min_type < ref.type) {
-				assert(min_type == 0);
-				// We wrapped, try the next objectid
-				++min_objectid;
-				// no advancement possible at end
-				THROW_CHECK1(runtime_error, min_type, min_type == 0);
-			}
+			// We wrapped, try the next objectid
+			++min_objectid;
 		}
 	}

-	void
-	BtrfsIoctlSearchKey::next_min(const BtrfsIoctlSearchHeader &ref, const uint8_t type)
+	template <class V>
+	ostream &
+	hexdump(ostream &os, const V &v)
 	{
-		if (ref.type < type) {
-			// forward to type in same object with zero offset
-			min_objectid = ref.objectid;
-			min_type = type;
-			min_offset = 0;
-		} else if (ref.type > type) {
-			// skip directly to start of next objectid with target type
-			min_objectid = ref.objectid + 1;
-			// no advancement possible at end
-			THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
-			min_type = type;
-			min_offset = 0;
-		} else {
-			// advance within this type
-			min_objectid = ref.objectid;
-			min_type = ref.type;
-			min_offset = ref.offset + 1;
-			if (min_offset < ref.offset) {
-				// We wrapped, try the next objectid, same type
-				++min_objectid;
-				THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
-				min_type = type;
-				assert(min_offset == 0);
+		os << "vector<uint8_t> { size = " << v.size() << ", data:\n";
+		for (size_t i = 0; i < v.size(); i += 8) {
+			string hex, ascii;
+			for (size_t j = i; j < i + 8; ++j) {
+				if (j < v.size()) {
+					uint8_t c = v[j];
+					char buf[8];
+					sprintf(buf, "%02x ", c);
+					hex += buf;
+					ascii += (c < 32 || c > 126) ? '.' : c;
+				} else {
+					hex += "   ";
+					ascii += ' ';
+				}
 			}
+			os << astringprintf("\t%08x %s %s\n", i, hex.c_str(), ascii.c_str());
 		}
+		return os << "}";
 	}

 	string
@@ -1048,9 +1029,9 @@ namespace crucible {
 		return rv;
 	}

-	Statvfs::Statvfs() :
-		statvfs( (statvfs) { } )
+	Statvfs::Statvfs()
 	{
+		memset_zero<statvfs>(this);
 	}

 	Statvfs::Statvfs(int fd) :
@@ -1101,20 +1082,16 @@ namespace crucible {
 		return os << " }";
 	};

-	BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs() :
-		btrfs_ioctl_fs_info_args_v3( (btrfs_ioctl_fs_info_args_v3) {
-			.flags = 0
-				| BTRFS_FS_INFO_FLAG_CSUM_INFO
-				| BTRFS_FS_INFO_FLAG_GENERATION
-			,
-		})
+	BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs()
 	{
+		memset_zero<btrfs_ioctl_fs_info_args_v2>(this);
+		flags = BTRFS_FS_INFO_FLAG_CSUM_INFO;
 	}

 	void
 	BtrfsIoctlFsInfoArgs::do_ioctl(int fd)
 	{
-		btrfs_ioctl_fs_info_args_v3 *p = static_cast<btrfs_ioctl_fs_info_args_v3 *>(this);
+		btrfs_ioctl_fs_info_args_v2 *p = static_cast<btrfs_ioctl_fs_info_args_v2 *>(this);
 		if (ioctl(fd, BTRFS_IOC_FS_INFO, p)) {
 			THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
 		}
@@ -1123,19 +1100,13 @@ namespace crucible {
 	uint16_t
 	BtrfsIoctlFsInfoArgs::csum_type() const
 	{
-		return this->btrfs_ioctl_fs_info_args_v3::csum_type;
+		return this->btrfs_ioctl_fs_info_args_v2::csum_type;
 	}

 	uint16_t
 	BtrfsIoctlFsInfoArgs::csum_size() const
 	{
-		return this->btrfs_ioctl_fs_info_args_v3::csum_size;
-	}
-
-	uint64_t
-	BtrfsIoctlFsInfoArgs::generation() const
-	{
-		return this->btrfs_ioctl_fs_info_args_v3::generation;
+		return this->btrfs_ioctl_fs_info_args_v2::csum_size;
 	}

 };
--- a/lib/multilock.cc
+++ b/lib/multilock.cc
@@ -1,72 +0,0 @@
-#include "crucible/multilock.h"
-
-#include "crucible/error.h"
-
-namespace crucible {
-	using namespace std;
-
-	MultiLocker::LockHandle::LockHandle(const string &type, MultiLocker &parent) :
-		m_type(type),
-		m_parent(parent)
-	{
-	}
-
-	void
-	MultiLocker::LockHandle::set_locked(const bool state)
-	{
-		m_locked = state;
-	}
-
-	MultiLocker::LockHandle::~LockHandle()
-	{
-		if (m_locked) {
-			m_parent.put_lock(m_type);
-			m_locked = false;
-		}
-	}
-
-	bool
-	MultiLocker::is_lock_available(const string &type)
-	{
-		for (const auto &i : m_counters) {
-			if (i.second != 0 && i.first != type) {
-				return false;
-			}
-		}
-		return true;
-	}
-
-	void
-	MultiLocker::put_lock(const string &type)
-	{
-		unique_lock<mutex> lock(m_mutex);
-		auto &counter = m_counters[type];
-		THROW_CHECK2(runtime_error, type, counter, counter > 0);
-		--counter;
-		if (counter == 0) {
-			m_cv.notify_all();
-		}
-	}
-
-	shared_ptr<MultiLocker::LockHandle>
-	MultiLocker::get_lock_private(const string &type)
-	{
-		unique_lock<mutex> lock(m_mutex);
-		m_counters.insert(make_pair(type, size_t(0)));
-		while (!is_lock_available(type)) {
-			m_cv.wait(lock);
-		}
-		const auto rv = make_shared<LockHandle>(type, *this);
-		++m_counters[type];
-		rv->set_locked(true);
-		return rv;
-	}
-
-	shared_ptr<MultiLocker::LockHandle>
-	MultiLocker::get_lock(const string &type)
-	{
-		static MultiLocker s_process_instance;
-		return s_process_instance.get_lock_private(type);
-	}
-
-}
--- a/lib/task.cc
+++ b/lib/task.cc
@@ -18,27 +18,6 @@
 namespace crucible {
 	using namespace std;

-	static const size_t thread_name_length = 15; // TASK_COMM_LEN on Linux
-
-	void
-	pthread_setname(const string &name)
-	{
-		auto name_copy = name.substr(0, thread_name_length);
-		// Don't care if a debugging facility fails
-		pthread_setname_np(pthread_self(), name_copy.c_str());
-	}
-
-	string
-	pthread_getname()
-	{
-		char buf[thread_name_length + 1] = { 0 };
-		// We'll get an empty name if this fails...
-		pthread_getname_np(pthread_self(), buf, sizeof(buf));
-		// ...or at least null-terminated garbage
-		buf[thread_name_length] = '\0';
-		return buf;
-	}
-
 	class TaskState;
 	using TaskStatePtr = shared_ptr<TaskState>;
 	using TaskStateWeak = weak_ptr<TaskState>;
@@ -51,8 +30,7 @@ namespace crucible {

 	static thread_local TaskStatePtr tl_current_task;

-	/// because we don't want to bump -std=c++-17 just to get scoped_lock.
-	/// Also we don't want to self-deadlock if both mutexes are the same mutex.
+	/// because we don't want to bump -std=c++-17 just to get scoped_lock
 	class PairLock {
 		unique_lock<mutex>	m_lock1, m_lock2;
 	public:
@@ -76,8 +54,8 @@ namespace crucible {
 		/// Tasks to be executed after the current task is executed
 		list<TaskStatePtr>			m_post_exec_queue;

-		/// Set by run() and append().  Cleared by exec().
-		bool					m_run_now = false;
+		/// Incremented by run() and append().  Decremented by exec().
+		size_t					m_run_count = 0;

 		/// Set when task starts execution by exec().
 		/// Cleared when exec() ends.
@@ -111,7 +89,6 @@ namespace crucible {

 		TaskState &operator=(const TaskState &) = delete;
 		TaskState(const TaskState &) = delete;
-		TaskState(TaskState &&) = delete;

 	public:
 		~TaskState();
@@ -159,8 +136,6 @@ namespace crucible {
 		size_t					m_configured_thread_max;
 		double					m_thread_target;
 		bool					m_cancelled = false;
-		bool					m_paused = false;
-		TaskMaster::LoadStats			m_load_stats;

 	friend class TaskConsumer;
 	friend class TaskMaster;
@@ -174,7 +149,6 @@ namespace crucible {
 		void set_loadavg_target(double target);
 		void loadavg_thread_fn();
 		void cancel();
-		void pause(bool paused = true);

 		TaskMasterState &operator=(const TaskMasterState &) = delete;
 		TaskMasterState(const TaskMasterState &) = delete;
@@ -187,7 +161,6 @@ namespace crucible {
 		static void push_front(TaskQueue &queue);
 		size_t get_queue_count();
 		size_t get_thread_count();
-		static TaskMaster::LoadStats get_current_load();
 	};

 	class TaskConsumer : public enable_shared_from_this<TaskConsumer> {
@@ -219,34 +192,25 @@ namespace crucible {
 		if (queue.empty()) {
 			return;
 		}
-		const auto tlcc = tl_current_consumer;
+		auto tlcc = tl_current_consumer;
 		if (tlcc) {
 			// We are executing under a TaskConsumer, splice our post-exec queue at front.
 			// No locks needed because we are using only thread-local objects.
 			tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
 		} else {
 			// We are not executing under a TaskConsumer.
-			// If there is only one task, then just insert it at the front of the queue.
-			if (queue.size() == 1) {
-				TaskMasterState::push_front(queue);
-			} else {
-				// If there are multiple tasks, create a new task to wrap our post-exec queue,
-				// then push it to the front of the global queue using normal locking methods.
-				TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
-				swap(rescue_task->m_post_exec_queue, queue);
-				TaskQueue tq_one { rescue_task };
-				TaskMasterState::push_front(tq_one);
-			}
+			// Create a new task to wrap our post-exec queue,
+			// then push it to the front of the global queue using normal locking methods.
+			TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
+			swap(rescue_task->m_post_exec_queue, queue);
+			TaskQueue tq_one { rescue_task };
+			TaskMasterState::push_front(tq_one);
 		}
-		assert(queue.empty());
 	}

 	TaskState::~TaskState()
 	{
 		--s_instance_count;
-		unique_lock<mutex> lock(m_mutex);
-		// If any dependent Tasks were appended since the last exec, run them now
-		TaskState::rescue_queue(m_post_exec_queue);
 	}

 	TaskState::TaskState(string title, function<void()> exec_fn) :
@@ -283,10 +247,11 @@ namespace crucible {
 	void
 	TaskState::clear_queue(TaskQueue &tq)
 	{
-		for (auto &i : tq) {
+		while (!tq.empty()) {
+			auto i = *tq.begin();
+			tq.pop_front();
 			i->clear();
 		}
-		tq.clear();
 	}

 	void
@@ -301,8 +266,8 @@ namespace crucible {
 	{
 		THROW_CHECK0(invalid_argument, task);
 		PairLock lock(m_mutex, task->m_mutex);
-		if (!task->m_run_now) {
-			task->m_run_now = true;
+		if (!task->m_run_count) {
+			++task->m_run_count;
 			append_nolock(task);
 		}
 	}
@@ -318,25 +283,26 @@ namespace crucible {
 			append_nolock(shared_from_this());
 			return;
 		} else {
-			m_run_now = false;
+			--m_run_count;
 			m_is_running = true;
 		}
+		lock.unlock();
+
+		char buf[24] = { 0 };
+		DIE_IF_MINUS_ERRNO(pthread_getname_np(pthread_self(), buf, sizeof(buf)));
+		DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_title.c_str()));

 		TaskStatePtr this_task = shared_from_this();
 		swap(this_task, tl_current_task);
-		lock.unlock();
-
-		const auto old_thread_name = pthread_getname();
-		pthread_setname(m_title);

 		catch_all([&]() {
 			m_exec_fn();
 		});

-		pthread_setname(old_thread_name);
+		swap(this_task, tl_current_task);
+		pthread_setname_np(pthread_self(), buf);

 		lock.lock();
-		swap(this_task, tl_current_task);
 		m_is_running = false;

 		// Splice task post_exec queue at front of local queue
@@ -360,25 +326,24 @@ namespace crucible {
 	TaskState::run()
 	{
 		unique_lock<mutex> lock(m_mutex);
-		if (m_run_now) {
+		if (m_run_count) {
 			return;
 		}
-		m_run_now = true;
+		++m_run_count;
 		TaskMasterState::push_back(shared_from_this());
 	}

 	TaskMasterState::TaskMasterState(size_t thread_max) :
 		m_thread_max(thread_max),
 		m_configured_thread_max(thread_max),
-		m_thread_target(thread_max),
-		m_load_stats(TaskMaster::LoadStats { 0 })
+		m_thread_target(thread_max)
 	{
 	}

 	void
 	TaskMasterState::start_threads_nolock()
 	{
-		while (m_threads.size() < m_thread_max && !m_paused) {
+		while (m_threads.size() < m_thread_max) {
 			m_threads.insert(make_shared<TaskConsumer>(shared_from_this()));
 		}
 	}
@@ -445,13 +410,6 @@ namespace crucible {
 		return s_tms->m_threads.size();
 	}

-	TaskMaster::LoadStats
-	TaskMaster::get_current_load()
-	{
-		unique_lock<mutex> lock(s_tms->m_mutex);
-		return s_tms->m_load_stats;
-	}
-
 	ostream &
 	TaskMaster::print_queue(ostream &os)
 	{
@@ -486,8 +444,8 @@ namespace crucible {
 	size_t
 	TaskMasterState::calculate_thread_count_nolock()
 	{
-		if (m_paused) {
-			// No threads running while paused or cancelled
+		if (m_cancelled) {
+			// No threads running while cancelled
 			return 0;
 		}

@@ -519,20 +477,18 @@ namespace crucible {

 		m_prev_loadavg = loadavg;

-		const double load_deficit = m_load_target - loadavg;
-		if (load_deficit > 0) {
-			// Load is too low, solve by adding another worker
-			m_thread_target += load_deficit / 3;
-		} else if (load_deficit < 0) {
-			// Load is too high, solve by removing all known excess tasks
-			m_thread_target += load_deficit;
-		}
+		// Change the thread target based on the
+		// difference between current and desired load
+		// but don't get too close all at once due to rounding and sample error.
+		// If m_load_target < 1.0 then we are just doing PWM with one thread.

-		m_load_stats = TaskMaster::LoadStats {
-			.current_load = current_load,
-			.thread_target = m_thread_target,
-			.loadavg = loadavg,
-		};
+		if (m_load_target <= 1.0) {
+			m_thread_target = 1.0;
+		} else if (m_load_target - current_load >= 1.0) {
+			m_thread_target += (m_load_target - current_load - 1.0) / 2.0;
+		} else if (m_load_target < current_load) {
+			m_thread_target += m_load_target - current_load;
+		}

 		// Cannot exceed configured maximum thread count or less than zero
 		m_thread_target = min(max(0.0, m_thread_target), double(m_configured_thread_max));
@@ -563,6 +519,12 @@ namespace crucible {
 	TaskMasterState::set_thread_count(size_t thread_max)
 	{
 		unique_lock<mutex> lock(m_mutex);
+		// XXX: someday we might want to uncancel, and this would be the place to do it;
+		// however, when we cancel we destroy the entire Task queue, and that might be
+		// non-trivial to recover from
+		if (m_cancelled) {
+			return;
+		}
 		m_configured_thread_max = thread_max;
 		lock.unlock();
 		adjust_thread_count();
@@ -579,7 +541,6 @@ namespace crucible {
 	TaskMasterState::cancel()
 	{
 		unique_lock<mutex> lock(m_mutex);
-		m_paused = true;
 		m_cancelled = true;
 		decltype(m_queue) empty_queue;
 		m_queue.swap(empty_queue);
@@ -594,25 +555,14 @@ namespace crucible {
 		s_tms->cancel();
 	}

-	void
-	TaskMasterState::pause(const bool paused)
-	{
-		unique_lock<mutex> lock(m_mutex);
-		m_paused = paused;
-		m_condvar.notify_all();
-		lock.unlock();
-	}
-
-	void
-	TaskMaster::pause(const bool paused)
-	{
-		s_tms->pause(paused);
-	}
-
 	void
 	TaskMasterState::set_thread_min_count(size_t thread_min)
 	{
 		unique_lock<mutex> lock(m_mutex);
+		// XXX: someday we might want to uncancel, and this would be the place to do it
+		if (m_cancelled) {
+			return;
+		}
 		m_thread_min = thread_min;
 		lock.unlock();
 		adjust_thread_count();
@@ -628,7 +578,7 @@ namespace crucible {
 	void
 	TaskMasterState::loadavg_thread_fn()
 	{
-		pthread_setname("load_tracker");
+		pthread_setname_np(pthread_self(), "load_tracker");
 		while (!m_cancelled) {
 			adjust_thread_count();
 			nanosleep(5.0);
@@ -744,7 +694,7 @@ namespace crucible {
 	TaskConsumer::consumer_thread()
 	{
 		// Keep a copy because we will be destroying *this later
-		const auto master_copy = m_master;
+		auto master_copy = m_master;

 		// Constructor is running with master locked.
 		// Wait until that is done before trying to do anything.
@@ -754,13 +704,13 @@ namespace crucible {
 		m_thread->detach();

 		// Set thread name so it isn't empty or the name of some other thread
-		pthread_setname("task_consumer");
+		DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), "task_consumer"));

 		// It is now safe to access our own shared_ptr
 		TaskConsumerPtr this_consumer = shared_from_this();
 		swap(this_consumer, tl_current_consumer);

-		while (!master_copy->m_paused) {
+		while (!master_copy->m_cancelled) {
 			if (master_copy->m_thread_max < master_copy->m_threads.size()) {
 				// We are one of too many threads, exit now
 				break;
@@ -799,7 +749,6 @@ namespace crucible {
 		// There is no longer a current consumer, but hold our own shared
 		// state so it's still there in the destructor
 		swap(this_consumer, tl_current_consumer);
-		assert(!tl_current_consumer);

 		// Release lock to rescue queue (may attempt to queue a new task at TaskMaster).
 		// rescue_queue normally sends tasks to the local queue of the current TaskConsumer thread,
@@ -831,16 +780,24 @@ namespace crucible {
 		void insert_task(Task t);
 	};

+	Barrier::Barrier(shared_ptr<BarrierState> pbs) :
+		m_barrier_state(pbs)
+	{
+	}
+
+	Barrier::Barrier() :
+		m_barrier_state(make_shared<BarrierState>())
+	{
+	}
+
 	void
 	BarrierState::release()
 	{
-		set<Task> tasks_local;
 		unique_lock<mutex> lock(m_mutex);
-		swap(tasks_local, m_tasks);
-		lock.unlock();
-		for (const auto &i : tasks_local) {
+		for (auto i : m_tasks) {
 			i.run();
 		}
+		m_tasks.clear();
 	}

 	BarrierState::~BarrierState()
@@ -848,6 +805,17 @@ namespace crucible {
 		release();
 	}

+	BarrierLock::BarrierLock(shared_ptr<BarrierState> pbs) :
+		m_barrier_state(pbs)
+	{
+	}
+
+	void
+	BarrierLock::release()
+	{
+		m_barrier_state.reset();
+	}
+
 	void
 	BarrierState::insert_task(Task t)
 	{
@@ -855,69 +823,122 @@ namespace crucible {
 		m_tasks.insert(t);
 	}

-	Barrier::Barrier() :
-		m_barrier_state(make_shared<BarrierState>())
-	{
-	}
-
 	void
 	Barrier::insert_task(Task t)
 	{
 		m_barrier_state->insert_task(t);
 	}

-	void
-	Barrier::release()
+	BarrierLock
+	Barrier::lock()
 	{
-		m_barrier_state.reset();
+		return BarrierLock(m_barrier_state);
 	}

-	ExclusionLock::ExclusionLock(shared_ptr<Task> owner) :
-		m_owner(owner)
+	class ExclusionState {
+		mutex		m_mutex;
+		bool		m_locked = false;
+		Task		m_task;
+
+	public:
+		ExclusionState(const string &title);
+		~ExclusionState();
+		void release();
+		bool try_lock();
+		void insert_task(Task t);
+	};
+
+	Exclusion::Exclusion(shared_ptr<ExclusionState> pbs) :
+		m_exclusion_state(pbs)
+	{
+	}
+
+	Exclusion::Exclusion(const string &title) :
+		m_exclusion_state(make_shared<ExclusionState>(title))
+	{
+	}
+
+	ExclusionState::ExclusionState(const string &title) :
+		m_task(title, [](){})
+	{
+	}
+
+	void
+	ExclusionState::release()
+	{
+		unique_lock<mutex> lock(m_mutex);
+		m_locked = false;
+		m_task.run();
+	}
+
+	ExclusionState::~ExclusionState()
+	{
+		release();
+	}
+
+	ExclusionLock::ExclusionLock(shared_ptr<ExclusionState> pbs) :
+		m_exclusion_state(pbs)
 	{
 	}

 	void
 	ExclusionLock::release()
 	{
-		m_owner.reset();
+		if (m_exclusion_state) {
+			m_exclusion_state->release();
+			m_exclusion_state.reset();
+		}
+	}
+
+	ExclusionLock::~ExclusionLock()
+	{
+		release();
 	}

 	void
-	Exclusion::insert_task(const Task &task)
+	ExclusionState::insert_task(Task task)
 	{
 		unique_lock<mutex> lock(m_mutex);
-		const auto sp = m_owner.lock();
-		lock.unlock();
-		if (sp) {
+		if (m_locked) {
 			// If Exclusion is locked then queue task for release;
-			sp->append(task);
+			m_task.append(task);
 		} else {
 			// otherwise, run the inserted task immediately
 			task.run();
 		}
 	}

-	ExclusionLock
-	Exclusion::try_lock(const Task &task)
+	bool
+	ExclusionState::try_lock()
 	{
 		unique_lock<mutex> lock(m_mutex);
-		const auto sp = m_owner.lock();
-		if (sp) {
-			if (task) {
-				sp->append(task);
-			}
-			return ExclusionLock();
+		if (m_locked) {
+			return false;
 		} else {
-			const auto rv = make_shared<Task>(task);
-			m_owner = rv;
-			return ExclusionLock(rv);
+			m_locked = true;
+			return true;
 		}
 	}

+	void
+	Exclusion::insert_task(Task t)
+	{
+		m_exclusion_state->insert_task(t);
+	}
+
 	ExclusionLock::operator bool() const
 	{
-		return !!m_owner;
+		return !!m_exclusion_state;
 	}

+	ExclusionLock
+	Exclusion::try_lock()
+	{
+		THROW_CHECK0(runtime_error, m_exclusion_state);
+		if (m_exclusion_state->try_lock()) {
+			return ExclusionLock(m_exclusion_state);
+		} else {
+			return ExclusionLock();
+		}
+	}
 }
--- a/lib/uname.cc
+++ b/lib/uname.cc
@@ -1,11 +0,0 @@
-#include "crucible/error.h"
-#include "crucible/uname.h"
-
-namespace crucible {
-	using namespace std;
-
-	Uname::Uname()
-	{
-		DIE_IF_NON_ZERO(uname(static_cast<utsname*>(this)));
-	}
-}
--- a/2
+++ b/2
@@ -10,4 +10,4 @@ CCFLAGS = -Wall -Wextra -Werror -O3
 CCFLAGS += -I../include -D_FILE_OFFSET_BITS=64

 BEES_CFLAGS   = $(CCFLAGS) -std=c99 $(CFLAGS)
-BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast -Wno-missing-field-initializers $(CXXFLAGS)
+BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast $(CXXFLAGS)
--- a/scripts/beesd.in
+++ b/scripts/beesd.in
@@ -15,7 +15,7 @@ readonly AL128K="$((128*1024))"
 readonly AL16M="$((16*1024*1024))"
 readonly CONFIG_DIR=@ETC_PREFIX@/bees/

-readonly bees_bin=$(realpath @DESTDIR@/@LIBEXEC_PREFIX@/bees)
+readonly bees_bin=$(realpath @LIBEXEC_PREFIX@/bees)

 command -v "$bees_bin" &> /dev/null || ERRO "Missing 'bees' agent"

@@ -128,7 +128,7 @@ fi
    fi
    if (( "$OLD_SIZE" != "$NEW_SIZE" )); then
        INFO "Resize db: $OLD_SIZE -> $NEW_SIZE"
-        rm -f "$BEESHOME/beescrawl.dat"
+        [ -f "$BEESHOME/beescrawl.$UUID.dat" ] && rm "$BEESHOME/beescrawl.$UUID.dat"
        truncate -s $NEW_SIZE $DB_PATH
    fi
    chmod 700 "$DB_PATH"
--- a/scripts/beesd@.service.in
+++ b/scripts/beesd@.service.in
@@ -17,7 +17,6 @@ KillSignal=SIGTERM
 MemoryAccounting=true
 Nice=19
 Restart=on-abnormal
-RuntimeDirectory=bees
 StartupCPUWeight=25
 StartupIOWeight=25

--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,11 @@
 BEES = ../bin/bees
+PROGRAMS = \
+	../bin/fiemap \
+	../bin/fiewalk \

-all: $(BEES)
+PROGRAM_OBJS = $(foreach b,$(PROGRAMS),$(patsubst ../bin/%,%.o,$(b)))
+
+all: $(BEES) $(PROGRAMS)

 include ../makeflags
 -include ../localconf
@@ -20,18 +25,25 @@ BEES_OBJS = \

 ALL_OBJS = $(BEES_OBJS) $(PROGRAM_OBJS)

-bees-version.c: bees.h $(BEES_OBJS:.o=.cc) Makefile ../lib/libcrucible.a
-	echo "const char *BEES_VERSION = \"$(BEES_VERSION)\";" > bees-version.c.new
-	if ! [ -e "$@" ] || ! cmp -s "$@.new" "$@"; then mv -fv $@.new $@; fi
+bees-version.c: bees.h $(BEES_OBJS:.o=.cc) Makefile
+	echo "const char *BEES_VERSION = \"$(BEES_VERSION)\";" > bees-version.new.c
+	mv -f bees-version.new.c bees-version.c

 bees-usage.c: bees-usage.txt Makefile
 	(echo 'const char *BEES_USAGE = '; sed -r 's/^(.*)$$/"\1\\n"/' < bees-usage.txt; echo ';') > bees-usage.new.c
 	mv -f bees-usage.new.c bees-usage.c

-%.dep: %.cc Makefile
+.depends:
+	mkdir -p $@
+
+.depends/%.dep: %.cc Makefile | .depends
 	$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<

-include $(ALL_OBJS:%.o=%.dep)
+depends.mk: $(ALL_OBJS:%.o=.depends/%.dep)
+	cat $^ > $@.new
+	mv -f $@.new $@
+
+include depends.mk

 %.o: %.c ../makeflags
 	$(CC) $(BEES_CFLAGS) -o $@ -c $<
@@ -39,6 +51,11 @@ include $(ALL_OBJS:%.o=%.dep)
 %.o: %.cc ../makeflags
 	$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<

+$(PROGRAMS): ../bin/%: %.o
+	$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $< $(LIBS)
+
+$(PROGRAMS): ../lib/libcrucible.a
+
 $(BEES): $(BEES_OBJS) bees-version.o bees-usage.o ../lib/libcrucible.a
 	$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)

--- a/src/bees-context.cc
+++ b/src/bees-context.cc
@@ -43,13 +43,12 @@ BeesFdCache::BeesFdCache(shared_ptr<BeesContext> ctx) :
 void
 BeesFdCache::clear()
 {
-	BEESLOGDEBUG("Clearing root FD cache with size " << m_root_cache.size() << " to enable subvol delete");
-	BEESNOTE("Clearing root FD cache with size " << m_root_cache.size());
+	BEESNOTE("Clearing root FD cache to enable subvol delete");
+	BEESLOGDEBUG("Clearing root FD cache to enable subvol delete");
 	m_root_cache.clear();
 	BEESCOUNT(root_clear);
-
-	BEESLOGDEBUG("Clearing open FD cache with size " << m_file_cache.size() << " to enable file delete");
-	BEESNOTE("Clearing open FD cache with size " << m_file_cache.size());
+	BEESLOGDEBUG("Clearing open FD cache to enable file delete");
+	BEESNOTE("Clearing open FD cache to enable file delete");
 	m_file_cache.clear();
 	BEESCOUNT(open_clear);
 }
@@ -85,11 +84,11 @@ BeesContext::dump_status()
 		ofs << "RATES:\n";
 		ofs << "\t" << avg_rates << "\n";

-		const auto load_stats = TaskMaster::get_current_load();
-		ofs << "THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers, load: current " << load_stats.current_load << " target " << load_stats.thread_target << " average " << load_stats.loadavg << "):\n";
+		ofs << "THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers):\n";
 		for (auto t : BeesNote::get_status()) {
 			ofs << "\ttid " << t.first << ": " << t.second << "\n";
 		}
+
 #if 0
 		// Huge amount of data, not a lot of information (yet)
 		ofs << "WORKERS:\n";
@@ -153,8 +152,8 @@ BeesContext::show_progress()
 		BEESLOGINFO("\t" << deltaRates);

 		BEESNOTE("logging current thread status");
-		const auto load_stats = TaskMaster::get_current_load();
-		BEESLOGINFO("THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers, load: current " << load_stats.current_load << " target " << load_stats.thread_target << " average " << load_stats.loadavg << "):");
+		BEESLOGINFO("THREADS:");
+
 		for (auto t : BeesNote::get_status()) {
 			BEESLOGINFO("\ttid " << t.first << ": " << t.second);
 		}
@@ -188,26 +187,29 @@ BeesContext::is_root_ro(uint64_t root)
 }

 bool
-BeesContext::dedup(const BeesRangePair &brp_in)
+BeesContext::dedup(const BeesRangePair &brp)
 {
 	// TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't
-	BEESNOTE("dedup " << brp_in);
+	BEESNOTE("dedup " << brp);

-	if (is_root_ro(brp_in.second.fid().root())) {
-		// BEESLOGDEBUG("WORKAROUND: dst root " << (brp_in.second.fid().root()) << " is read-only);
+	brp.second.fd(shared_from_this());
+
+	if (is_root_ro(brp.second.fid().root())) {
+		// BEESLOGDEBUG("WORKAROUND: dst root is read-only in " << name_fd(brp.second.fd()));
 		BEESCOUNT(dedup_workaround_btrfs_send);
 		return false;
 	}

-	auto brp = brp_in;
 	brp.first.fd(shared_from_this());
-	brp.second.fd(shared_from_this());

 	BEESTOOLONG("dedup " << brp);

 	BeesAddress first_addr(brp.first.fd(), brp.first.begin());
 	BeesAddress second_addr(brp.second.fd(), brp.second.begin());

+	BEESLOGINFO("dedup: src " << pretty(brp.first.size())  << " [" << to_hex(brp.first.begin())  << ".." << to_hex(brp.first.end())  << "] {" << first_addr  << "} " << name_fd(brp.first.fd()) << "\n"
+		 << "       dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
+
 	if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) {
 		BEESLOGTRACE("equal physical addresses in dedup");
 		BEESCOUNT(bug_dedup_same_physical);
@@ -217,18 +219,8 @@ BeesContext::dedup(const BeesRangePair &brp_in)
 	THROW_CHECK1(invalid_argument, brp, brp.first.size() == brp.second.size());

 	BEESCOUNT(dedup_try);
-
-	BEESNOTE("waiting to dedup " << brp);
-	const auto lock = MultiLocker::get_lock("dedupe");
-
 	Timer dedup_timer;
-
-	BEESLOGINFO("dedup: src " << pretty(brp.first.size())  << " [" << to_hex(brp.first.begin())  << ".." << to_hex(brp.first.end())  << "] {" << first_addr  << "} " << name_fd(brp.first.fd()) << "\n"
-		 << "       dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
-	BEESNOTE("dedup: src " << pretty(brp.first.size())  << " [" << to_hex(brp.first.begin())  << ".." << to_hex(brp.first.end())  << "] {" << first_addr  << "} " << name_fd(brp.first.fd()) << "\n"
-		 << "       dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
-
-	const bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
+	bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
 	BEESCOUNTADD(dedup_ms, dedup_timer.age() * 1000);

 	if (rv) {
@@ -300,15 +292,6 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 	BEESTRACE("scan extent " << e);
 	BEESCOUNT(scan_extent);

-	// EXPERIMENT:  Don't bother with tiny extents unless they are the entire file.
-	// We'll take a tiny extent at BOF or EOF but not in between.
-	if (e.begin() && e.size() < 128 * 1024 && e.end() != Stat(bfr.fd()).st_size) {
-		BEESCOUNT(scan_extent_tiny);
-		// This doesn't work properly with the current architecture,
-		// so we don't do an early return here.
-		// return bfr;
-	}
-
 	// We keep moving this method around
 	auto m_ctx = shared_from_this();

@@ -334,23 +317,29 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 	if (e.flags() & Extent::PREALLOC) {
 		// Prealloc is all zero and we replace it with a hole.
 		// No special handling is required here.  Nuke it and move on.
-		BEESLOGINFO("prealloc extent " << e);
-		// Must not extend past EOF
-		auto extent_size = min(e.end(), bfr.file_size()) - e.begin();
-		// Must hold tmpfile until dedupe is done
-		const auto tmpfile = m_ctx->tmpfile();
-		BeesFileRange prealloc_bfr(tmpfile->make_hole(extent_size));
-		// Apparently they can both extend past EOF
-		BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
-		BeesRangePair brp(prealloc_bfr, copy_bfr);
-		// Raw dedupe here - nothing else to do with this extent, nothing to merge with
-		if (m_ctx->dedup(brp)) {
-			BEESCOUNT(dedup_prealloc_hit);
-			BEESCOUNTADD(dedup_prealloc_bytes, e.size());
-			return bfr;
-		} else {
-			BEESCOUNT(dedup_prealloc_miss);
-		}
+		Task(
+			"dedup_prealloc",
+			[m_ctx, bfr, e]() {
+				BEESLOGINFO("prealloc extent " << e);
+				// Must not extend past EOF
+				auto extent_size = min(e.end(), bfr.file_size()) - e.begin();
+				// Must hold tmpfile until dedupe is done
+				auto tmpfile = m_ctx->tmpfile();
+				BeesFileRange prealloc_bfr(tmpfile->make_hole(extent_size));
+				// Apparently they can both extend past EOF
+				BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
+				BeesRangePair brp(prealloc_bfr, copy_bfr);
+				// Raw dedupe here - nothing else to do with this extent, nothing to merge with
+				if (m_ctx->dedup(brp)) {
+					BEESCOUNT(dedup_prealloc_hit);
+					BEESCOUNTADD(dedup_prealloc_bytes, e.size());
+					// return bfr;
+				} else {
+					BEESCOUNT(dedup_prealloc_miss);
+				}
+			}
+		).run();
+		return bfr; // if dedupe success, which we now blindly assume
 	}

 	// OK we need to read extent now
@@ -602,6 +591,57 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 		BEESCOUNT(scan_zero_compressed);
 	}

+	// Turning this off because it's a waste of time on small extents
+	// and it's incorrect for large extents.
+#if 0
+	// If the extent contains obscured blocks, and we can find no
+	// other refs to the extent that reveal those blocks, nuke the incoming extent.
+	// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
+	// because we can't make extents that large with dedupe.
+	// Don't rewrite small extents because it is a waste of time without being
+	// able to combine them into bigger extents.
+	if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {
+		BEESCOUNT(scan_obscured);
+		BEESNOTE("obscured extent " << e);
+		// We have to map all the source blocks to see if any of them
+		// (or all of them aggregated) provide a path through the FS to the blocks
+		BeesResolver br(m_ctx, BeesAddress(e, e.begin()));
+		BeesBlockData ref_bbd(bfr.fd(), bfr.begin(), min(BLOCK_SIZE_SUMS, bfr.size()));
+		// BEESLOG("ref_bbd " << ref_bbd);
+		auto bfr_set = br.find_all_matches(ref_bbd);
+		bool non_obscured_extent_found = false;
+		set<off_t> blocks_to_find;
+		for (off_t j = 0; j < e.physical_len(); j += BLOCK_SIZE_CLONE) {
+			blocks_to_find.insert(j);
+		}
+		// Don't bother if saving less than 1%
+		auto maximum_hidden_count = blocks_to_find.size() / 100;
+		for (auto i : bfr_set) {
+			BtrfsExtentWalker ref_ew(bfr.fd(), bfr.begin(), m_ctx->root_fd());
+			Extent ref_e = ref_ew.current();
+			// BEESLOG("\tref_e " << ref_e);
+			THROW_CHECK2(out_of_range, ref_e, e, ref_e.offset() + ref_e.logical_len() <= e.physical_len());
+			for (off_t j = ref_e.offset(); j < ref_e.offset() + ref_e.logical_len(); j += BLOCK_SIZE_CLONE) {
+				blocks_to_find.erase(j);
+			}
+			if (blocks_to_find.size() <= maximum_hidden_count) {
+				BEESCOUNT(scan_obscured_miss);
+				BEESLOG("Found references to all but " << blocks_to_find.size() << " blocks");
+				non_obscured_extent_found = true;
+				break;
+			} else {
+				BEESCOUNT(scan_obscured_hit);
+				// BEESLOG("blocks_to_find: " << blocks_to_find.size() << " from " << *blocks_to_find.begin() << ".." << *blocks_to_find.rbegin());
+			}
+		}
+		if (!non_obscured_extent_found) {
+			// BEESLOG("No non-obscured extents found");
+			rewrite_extent = true;
+			BEESCOUNT(scan_obscured_rewrite);
+		}
+	}
+#endif
+
 	// If we deduped any blocks then we must rewrite the remainder of the extent
 	if (!noinsert_set.empty()) {
 		rewrite_extent = true;
@@ -668,34 +708,27 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 		BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
 	}

-	// Costs 10% on benchmarks
-	// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
 	return bfr;
 }

-shared_ptr<Exclusion>
-BeesContext::get_inode_mutex(const uint64_t inode)
+BeesFileRange
+BeesContext::scan_forward(const BeesFileRange &bfr)
 {
-	return m_inode_locks(inode);
-}
-
-bool
-BeesContext::scan_forward(const BeesFileRange &bfr_in)
-{
-	BEESTRACE("scan_forward " << bfr_in);
+	// What are we doing here?
+	BEESTRACE("scan_forward " << bfr);
 	BEESCOUNT(scan_forward);

 	Timer scan_timer;

 	// Silently filter out blacklisted files
-	if (is_blacklisted(bfr_in.fid())) {
+	if (is_blacklisted(bfr.fid())) {
 		BEESCOUNT(scan_blacklisted);
-		return false;
+		return bfr;
 	}

+	BEESNOTE("scan open " << bfr);
+
 	// Reconstitute FD
-	BEESNOTE("scan open " << bfr_in);
-	auto bfr = bfr_in;
 	bfr.fd(shared_from_this());

 	BEESNOTE("scan extent " << bfr);
@@ -704,35 +737,31 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
 	if (!bfr.fd()) {
 		// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
 		BEESCOUNT(scan_no_fd);
-		return false;
+		return bfr;
 	}

 	// Sanity check
 	if (bfr.begin() >= bfr.file_size()) {
 		BEESLOGWARN("past EOF: " << bfr);
 		BEESCOUNT(scan_eof);
-		return false;
+		return bfr;
 	}

 	BtrfsExtentWalker ew(bfr.fd(), bfr.begin(), root_fd());

+	BeesFileRange return_bfr(bfr);
+
 	Extent e;
-	bool start_over = false;
 	catch_all([&]() {
-		while (!stop_requested() && !start_over) {
+		while (!stop_requested()) {
 			e = ew.current();

 			catch_all([&]() {
 				uint64_t extent_bytenr = e.bytenr();
-				auto extent_mutex = m_extent_locks(extent_bytenr);
-				const auto extent_lock = extent_mutex->try_lock(Task::current_task());
-				if (!extent_lock) {
-					// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
-					BEESCOUNT(scanf_deferred_extent);
-					start_over = true;
-				}
+				BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr));
+				auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr);
 				Timer one_extent_timer;
-				scan_one_extent(bfr, e);
+				return_bfr = scan_one_extent(bfr, e);
 				BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
 				BEESCOUNT(scanf_extent);
 			});
@@ -750,13 +779,51 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
 	BEESCOUNTADD(scanf_total_ms, scan_timer.age() * 1000);
 	BEESCOUNT(scanf_total);

-	return start_over;
+	return return_bfr;
 }

 BeesResolveAddrResult::BeesResolveAddrResult()
 {
 }

+void
+BeesContext::wait_for_balance()
+{
+	if (!BEES_SERIALIZE_BALANCE) {
+		return;
+	}
+
+	Timer balance_timer;
+	BEESNOTE("WORKAROUND: waiting for balance to stop");
+	while (true) {
+		btrfs_ioctl_balance_args args;
+		memset_zero<btrfs_ioctl_balance_args>(&args);
+		const int ret = ioctl(root_fd(), BTRFS_IOC_BALANCE_PROGRESS, &args);
+		if (ret < 0) {
+			// Either can't get balance status or not running, exit either way
+			break;
+		}
+
+		if (!(args.state & BTRFS_BALANCE_STATE_RUNNING)) {
+			// Balance not running, doesn't matter if paused or cancelled
+			break;
+		}
+
+		BEESLOGDEBUG("WORKAROUND: Waiting " << balance_timer << "s for balance to stop");
+		unique_lock<mutex> lock(m_abort_mutex);
+		if (m_abort_requested) {
+			// Force the calling function to stop.	We cannot
+			// proceed to LOGICAL_INO while balance is running
+			// until the bugs are fixed, and it's probably
+			// not going to be particularly fast to have
+			// both bees and balance banging the disk anyway.
+			BeesTracer::set_silent();
+			throw std::runtime_error("Stop requested while balance running");
+		}
+		m_abort_condvar.wait_for(lock, chrono::duration<double>(BEES_BALANCE_POLL_INTERVAL));
+	}
+}
+
 BeesResolveAddrResult
 BeesContext::resolve_addr_uncached(BeesAddress addr)
 {
@@ -768,21 +835,37 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
 	// transaction latency, competing threads, and freeze/SIGSTOP
 	// pausing the bees process.

-        BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
+	// There can be only one of these running at a time, or some lingering
+	// backref bug will kill the whole system.  Also it looks like there
+	// are so many locks held while LOGICAL_INO runs that there is no
+	// point in trying to run two of them on the same filesystem.
+	// ...but it works most of the time, and the performance hit from
+	// not running resolve in multiple threads is significant.
+	// But "most of the time" really just means "between forced reboots",
+	// and with recent improvements in kernel uptime, this is now in the
+	// top 3 crash causes.
+	static mutex s_resolve_mutex;
+	unique_lock<mutex> lock(s_resolve_mutex, defer_lock);
+	if (BEES_SERIALIZE_RESOLVE) {
+		BEESNOTE("waiting to resolve addr " << addr);
+		lock.lock();
+	}
+
+	// Is there a bug where resolve and balance cause a crash (BUG_ON at fs/btrfs/ctree.c:1227)?
+	// Apparently yes, and more than one.
+	// Wait for the balance to finish before we run LOGICAL_INO
+	wait_for_balance();

 	// Time how long this takes
 	Timer resolve_timer;

+        BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
+
+	// Get this thread's system CPU usage
 	struct rusage usage_before;
+	DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_before));
+
 	{
-		BEESNOTE("waiting to resolve addr " << addr << " with LOGICAL_INO");
-		const auto lock = MultiLocker::get_lock("logical_ino");
-
-		// Get this thread's system CPU usage
-		DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_before));
-
-		// Restart timer now that we're no longer waiting for lock
-		resolve_timer.reset();
 		BEESTOOLONG("Resolving addr " << addr << " in " << root_path() << " refs " << log_ino.m_iors.size());
 		BEESNOTE("resolving addr " << addr << " with LOGICAL_INO");
 		if (log_ino.do_ioctl_nothrow(root_fd())) {
@@ -797,22 +880,22 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
 	struct rusage usage_after;
 	DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));

-	const double sys_usage_delta =
+	double sys_usage_delta =
 		(usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) -
 		(usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0);

-	const double user_usage_delta =
+	double user_usage_delta =
 		(usage_after.ru_utime.tv_sec + usage_after.ru_utime.tv_usec / 1000000.0) -
 		(usage_before.ru_utime.tv_sec + usage_before.ru_utime.tv_usec / 1000000.0);

-	const auto rt_age = resolve_timer.age();
+	auto rt_age = resolve_timer.age();

 	BeesResolveAddrResult rv;

 	// Avoid performance problems - pretend resolve failed if there are too many refs
 	const size_t rv_count = log_ino.m_iors.size();
 	if (rv_count < BEES_MAX_EXTENT_REF_COUNT) {
-		rv.m_biors = vector<BtrfsInodeOffsetRoot>(log_ino.m_iors.begin(), log_ino.m_iors.end());
+		rv.m_biors = log_ino.m_iors;
 	} else {
 		BEESLOGINFO("addr " << addr << " refs " << rv_count << " overflows configured ref limit " << BEES_MAX_EXTENT_REF_COUNT);
 		BEESCOUNT(resolve_overflow);
@@ -829,13 +912,12 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)

 	// Count how many times this happens so we can figure out how
 	// important this case is
-	static const size_t max_logical_ino_v1_refs = 2730; // (65536 - header_len) / (sizeof(uint64_t) * 3)
-	static size_t most_refs_ever = max_logical_ino_v1_refs;
+	static size_t most_refs_ever = 2730;
 	if (rv_count > most_refs_ever) {
 		BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever);
 		most_refs_ever = rv_count;
 	}
-	if (rv_count > max_logical_ino_v1_refs) {
+	if (rv_count > 2730) {
 		BEESCOUNT(resolve_large);
 	}

@@ -855,14 +937,6 @@ BeesContext::invalidate_addr(BeesAddress addr)
 	return m_resolve_cache.expire(addr.get_physical_or_zero());
 }

-void
-BeesContext::resolve_cache_clear()
-{
-	BEESNOTE("clearing resolve cache with size " << m_resolve_cache.size());
-	BEESLOGDEBUG("Clearing resolve cache with size " << m_resolve_cache.size());
-	return m_resolve_cache.clear();
-}
-
 void
 BeesContext::set_root_fd(Fd fd)
 {
@@ -882,21 +956,18 @@ BeesContext::set_root_fd(Fd fd)
 	});
 }

+const char *
+BeesHalt::what() const noexcept
+{
+	return "bees stop requested";
+}
+
 void
 BeesContext::start()
 {
 	BEESLOGNOTICE("Starting bees main loop...");
 	BEESNOTE("starting BeesContext");

-	m_extent_locks.func([](uint64_t bytenr) {
-		return make_shared<Exclusion>();
-		(void)bytenr;
-	});
-	m_inode_locks.func([](const uint64_t fid) {
-		return make_shared<Exclusion>();
-		(void)fid;
-	});
-	m_progress_thread = make_shared<BeesThread>("progress_report");
 	m_progress_thread = make_shared<BeesThread>("progress_report");
 	m_status_thread = make_shared<BeesThread>("status_report");
 	m_progress_thread->exec([=]() {
@@ -931,37 +1002,17 @@ BeesContext::stop()
 	Timer stop_timer;
 	BEESLOGNOTICE("Stopping bees...");

-	// Stop TaskConsumers without hurting the Task objects that carry the Crawl state
+	BEESNOTE("aborting blocked tasks");
+	BEESLOGDEBUG("Aborting blocked tasks");
+	unique_lock<mutex> abort_lock(m_abort_mutex);
+	m_abort_requested = true;
+	m_abort_condvar.notify_all();
+	abort_lock.unlock();
+
 	BEESNOTE("pausing work queue");
 	BEESLOGDEBUG("Pausing work queue");
-	TaskMaster::pause();
+	TaskMaster::set_thread_count(0);

-	// Stop crawlers first so we get good progress persisted on disk
-	BEESNOTE("stopping crawlers and flushing crawl state");
-	BEESLOGDEBUG("Stopping crawlers and flushing crawl state");
-	if (m_roots) {
-		m_roots->stop_request();
-	} else {
-		BEESLOGDEBUG("Crawlers not running");
-	}
-
-	BEESNOTE("stopping and flushing hash table");
-	BEESLOGDEBUG("Stopping and flushing hash table");
-	if (m_hash_table) {
-		m_hash_table->stop_request();
-	} else {
-		BEESLOGDEBUG("Hash table not running");
-	}
-
-	// Wait for crawler writeback to finish
-	BEESNOTE("waiting for crawlers to stop");
-	BEESLOGDEBUG("Waiting for crawlers to stop");
-	if (m_roots) {
-		m_roots->stop_wait();
-	}
-
-	// It is now no longer possible to update progress in $BEESHOME,
-	// so we can destroy Tasks with reckless abandon.
 	BEESNOTE("setting stop_request flag");
 	BEESLOGDEBUG("Setting stop_request flag");
 	unique_lock<mutex> lock(m_stop_mutex);
@@ -969,20 +1020,49 @@ BeesContext::stop()
 	m_stop_condvar.notify_all();
 	lock.unlock();

-	// Wait for hash table flush to complete
-	BEESNOTE("waiting for hash table flush to stop");
-	BEESLOGDEBUG("waiting for hash table flush to stop");
-	if (m_hash_table) {
-		m_hash_table->stop_wait();
+	// Stop crawlers first so we get good progress persisted on disk
+	BEESNOTE("stopping crawlers");
+	BEESLOGDEBUG("Stopping crawlers");
+	if (m_roots) {
+		m_roots->stop();
+		m_roots.reset();
+	} else {
+		BEESLOGDEBUG("Crawlers not running");
 	}

-	// Write status once with this message...
-	BEESNOTE("stopping status thread at " << stop_timer << " sec");
-	lock.lock();
-	m_stop_condvar.notify_all();
-	lock.unlock();
+	BEESNOTE("cancelling work queue");
+	BEESLOGDEBUG("Cancelling work queue");
+	TaskMaster::cancel();

-	// then wake the thread up one more time to exit the while loop
+	BEESNOTE("stopping hash table");
+	BEESLOGDEBUG("Stopping hash table");
+	if (m_hash_table) {
+		m_hash_table->stop();
+		m_hash_table.reset();
+	} else {
+		BEESLOGDEBUG("Hash table not running");
+	}
+
+	BEESNOTE("closing tmpfiles");
+	BEESLOGDEBUG("Closing tmpfiles");
+	m_tmpfile_pool.clear();
+
+	BEESNOTE("closing FD caches");
+	BEESLOGDEBUG("Closing FD caches");
+	if (m_fd_cache) {
+		m_fd_cache->clear();
+		BEESNOTE("destroying FD caches");
+		BEESLOGDEBUG("Destroying FD caches");
+		m_fd_cache.reset();
+	}
+
+	BEESNOTE("waiting for progress thread");
+	BEESLOGDEBUG("Waiting for progress thread");
+	m_progress_thread->join();
+
+	// XXX: nobody can see this BEESNOTE because we are killing the
+	// thread that publishes it
+	BEESNOTE("waiting for status thread");
 	BEESLOGDEBUG("Waiting for status thread");
 	lock.lock();
 	m_stop_status = true;
@@ -991,9 +1071,6 @@ BeesContext::stop()
 	m_status_thread->join();

 	BEESLOGNOTICE("bees stopped in " << stop_timer << " sec");
-
-	// Skip all destructors, do not pass GO, do not collect atexit() functions
-	_exit(EXIT_SUCCESS);
 }

 bool
@@ -1034,7 +1111,13 @@ shared_ptr<BeesTempFile>
 BeesContext::tmpfile()
 {
 	unique_lock<mutex> lock(m_stop_mutex);
+
+	if (m_stop_requested) {
+		throw BeesHalt();
+	}
+
 	lock.unlock();
+
 	return m_tmpfile_pool();
 }

@@ -1042,6 +1125,9 @@ shared_ptr<BeesFdCache>
 BeesContext::fd_cache()
 {
 	unique_lock<mutex> lock(m_stop_mutex);
+	if (m_stop_requested) {
+		throw BeesHalt();
+	}
 	if (!m_fd_cache) {
 		m_fd_cache = make_shared<BeesFdCache>(shared_from_this());
 	}
@@ -1052,6 +1138,9 @@ shared_ptr<BeesRoots>
 BeesContext::roots()
 {
 	unique_lock<mutex> lock(m_stop_mutex);
+	if (m_stop_requested) {
+		throw BeesHalt();
+	}
 	if (!m_roots) {
 		m_roots = make_shared<BeesRoots>(shared_from_this());
 	}
@@ -1062,6 +1151,9 @@ shared_ptr<BeesHashTable>
 BeesContext::hash_table()
 {
 	unique_lock<mutex> lock(m_stop_mutex);
+	if (m_stop_requested) {
+		throw BeesHalt();
+	}
 	if (!m_hash_table) {
 		m_hash_table = make_shared<BeesHashTable>(shared_from_this(), "beeshash.dat");
 	}
--- a/src/bees-hash.cc
+++ b/src/bees-hash.cc
@@ -3,9 +3,9 @@
 #include "crucible/city.h"
 #include "crucible/crc64.h"
 #include "crucible/string.h"
-#include "crucible/uname.h"

 #include <algorithm>
+#include <random>

 #include <sys/mman.h>

@@ -106,6 +106,12 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
 	BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents");

 	auto lock = lock_extent_by_index(extent_index);
+
+	// Not dirty, nothing to do
+	if (!m_extent_metadata.at(extent_index).m_dirty) {
+		return false;
+	}
+
 	bool wrote_extent = false;

 	catch_all([&]() {
@@ -117,7 +123,10 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
 		THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
 		BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
 		// Copy the extent because we might be stuck writing for a while
-		ByteVector extent_copy(dirty_extent, dirty_extent_end);
+		vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
+
+		// Mark extent non-dirty while we still hold the lock
+		m_extent_metadata.at(extent_index).m_dirty = false;

 		// Release the lock
 		lock.unlock();
@@ -130,10 +139,6 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
 		// const size_t dirty_extent_size   = dirty_extent_end - dirty_extent;
 		// bees_unreadahead(m_fd, dirty_extent_offset, dirty_extent_size);

-		// Mark extent clean if write was successful
-		lock.lock();
-		m_extent_metadata.at(extent_index).m_dirty = false;
-
 		wrote_extent = true;
 	});

@@ -147,28 +152,25 @@ BeesHashTable::flush_dirty_extents(bool slowly)

 	uint64_t wrote_extents = 0;
 	for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) {
-		// Skip the clean ones
-		auto lock = lock_extent_by_index(extent_index);
-		if (!m_extent_metadata.at(extent_index).m_dirty) {
-			continue;
-		}
-		lock.unlock();
-
 		if (flush_dirty_extent(extent_index)) {
 			++wrote_extents;
 			if (slowly) {
-				if (m_stop_requested) {
-					slowly = false;
-					continue;
-				}
 				BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
 				chrono::duration<double> sleep_time(m_flush_rate_limit.sleep_time(BLOCK_SIZE_HASHTAB_EXTENT));
 				unique_lock<mutex> lock(m_stop_mutex);
+				if (m_stop_requested) {
+					BEESLOGDEBUG("Stop requested in hash table flush_dirty_extents");
+					// This function is called by another thread with !slowly,
+					// so we just get out of the way here.
+					break;
+				}
 				m_stop_condvar.wait_for(lock, sleep_time);
 			}
 		}
 	}
-	BEESLOGINFO("Flushed " << wrote_extents << " of " << m_extents << " hash table extents");
+	if (!slowly) {
+		BEESLOGINFO("Flushed " << wrote_extents << " of " << m_extents << " extents");
+	}
 	return wrote_extents;
 }

@@ -202,28 +204,10 @@ BeesHashTable::writeback_loop()
 			m_dirty_condvar.wait(lock);
 		}
 	}
-
-	// The normal loop exits at the end of one iteration when stop requested,
-	// but stop request will be in the middle of the loop, and some extents
-	// will still be dirty.  Run the flush loop again to get those.
-	BEESNOTE("flushing hash table, round 2");
-	BEESLOGDEBUG("Flushing hash table");
-	flush_dirty_extents(false);
-
-	// If there were any Tasks still running, they may have updated
-	// some hash table pages during the second flush.  These updates
-	// will be lost.  The Tasks will be repeated on the next run because
-	// they were not completed prior to the stop request, and the
-	// Crawl progress was already flushed out before the Hash table
-	// started writing, so nothing is really lost here.
-
 	catch_all([&]() {
 		// trigger writeback on our way out
-#if 0
-		// seems to trigger huge latency spikes
-		BEESTOOLONG("unreadahead hash table size " <<
-		pretty(m_size)); bees_unreadahead(m_fd, 0, m_size);
-#endif
+		BEESTOOLONG("unreadahead hash table size " << pretty(m_size));
+		bees_unreadahead(m_fd, 0, m_size);
 	});
 	BEESLOGDEBUG("Exited hash table writeback_loop");
 }
@@ -242,7 +226,6 @@ percent(size_t num, size_t den)
 void
 BeesHashTable::prefetch_loop()
 {
-	Uname uname;
 	bool not_locked = true;
 	while (!m_stop_requested) {
 		size_t width = 64;
@@ -336,7 +319,6 @@ BeesHashTable::prefetch_loop()
 		graph_blob << "Now:     " << format_time(time(NULL)) << "\n";
 		graph_blob << "Uptime:  " << m_ctx->total_timer().age() << " seconds\n";
 		graph_blob << "Version: " << BEES_VERSION << "\n";
-		graph_blob << "Kernel:  " << uname.sysname << " " << uname.release << " " << uname.machine << " " << uname.version << "\n";

 		graph_blob
 			<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
@@ -556,8 +538,6 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
 	return found;
 }

-thread_local uniform_int_distribution<size_t> BeesHashTable::tl_distribution(0, c_cells_per_bucket - 1);
-
 /// Insert a hash entry at some unspecified point in the list.
 /// If entry is already present in list, returns true and does not
 /// modify list.  If entry is not present in list, returns false and
@@ -575,7 +555,9 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
 	Cell *ip = find(er.first, er.second, mv);
 	bool found = (ip < er.second);

-	const auto pos = tl_distribution(bees_generator);
+	thread_local default_random_engine generator;
+	thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
+	auto pos = distribution(generator);

 	int case_cond = 0;
 #if 0
@@ -807,7 +789,7 @@ BeesHashTable::~BeesHashTable()
 }

 void
-BeesHashTable::stop_request()
+BeesHashTable::stop()
 {
 	BEESNOTE("stopping BeesHashTable threads");
 	BEESLOGDEBUG("Stopping BeesHashTable threads");
@@ -821,11 +803,7 @@ BeesHashTable::stop_request()
 	unique_lock<mutex> dirty_lock(m_dirty_mutex);
 	m_dirty_condvar.notify_all();
 	dirty_lock.unlock();
-}

-void
-BeesHashTable::stop_wait()
-{
 	BEESNOTE("waiting for hash_prefetch thread");
 	BEESLOGDEBUG("Waiting for hash_prefetch thread");
 	m_prefetch_thread.join();
@@ -834,5 +812,11 @@ BeesHashTable::stop_wait()
 	BEESLOGDEBUG("Waiting for hash_writeback thread");
 	m_writeback_thread.join();

+	if (m_cell_ptr && m_size) {
+		BEESLOGDEBUG("Flushing hash table");
+		BEESNOTE("flushing hash table");
+		flush_dirty_extents(false);
+	}
+
 	BEESLOGDEBUG("BeesHashTable stopped");
 }
--- a/src/bees-resolve.cc
+++ b/src/bees-resolve.cc
@@ -385,15 +385,14 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
 }

 BeesFileRange
-BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
+BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
 {
-	BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
+	BEESTRACE("replace_dst dst_bfr " << dst_bfr);
 	BEESCOUNT(replacedst_try);

 	// Open dst, reuse it for all src
-	BEESNOTE("Opening dst bfr " << dst_bfr_in);
-	BEESTRACE("Opening dst bfr " << dst_bfr_in);
-	auto dst_bfr = dst_bfr_in;
+	BEESNOTE("Opening dst bfr " << dst_bfr);
+	BEESTRACE("Opening dst bfr " << dst_bfr);
 	dst_bfr.fd(m_ctx);

 	BeesFileRange overlap_bfr;
@@ -401,11 +400,10 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)

 	BeesBlockData bbd(dst_bfr);

-	for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
+	for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
 		// Open src
-		BEESNOTE("Opening src bfr " << src_bfr_in);
-		BEESTRACE("Opening src bfr " << src_bfr_in);
-		auto src_bfr = src_bfr_in;
+		BEESNOTE("Opening src bfr " << src_bfr);
+		BEESTRACE("Opening src bfr " << src_bfr);
 		src_bfr.fd(m_ctx);

 		if (dst_bfr.overlaps(src_bfr)) {
@@ -420,7 +418,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
 			BEESCOUNT(replacedst_same);
 			// stop looping here, all the other srcs will probably fail this test too
 			BeesTracer::set_silent();
-			throw runtime_error("FIXME: too many duplicate candidates, bailing out here");
+			throw runtime_error("FIXME: bailing out here, need to fix this further up the call stack");
 		}

 		// Make pair(src, dst)
--- a/src/bees-roots.cc
+++ b/src/bees-roots.cc
--- a/src/bees-trace.cc
+++ b/src/bees-trace.cc
@@ -111,7 +111,9 @@ void
 BeesNote::set_name(const string &name)
 {
 	tl_name = name;
-	pthread_setname(name);
+	catch_all([&]() {
+		DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), name.c_str()));
+	});
 }

 string
@@ -132,12 +134,19 @@ BeesNote::get_name()
 	}

 	// OK try the pthread name next.
+	char buf[24];
+	memset(buf, '\0', sizeof(buf));
+	int err = pthread_getname_np(pthread_self(), buf, sizeof(buf));
+	if (err) {
+		return string("pthread_getname_np: ") + strerror(err);
+	}
+	buf[sizeof(buf) - 1] = '\0';

 	// thread_getname_np returns process name
 	// ...by default?  ...for the main thread?
 	// ...except during exception handling?
 	// ...randomly?
-	return pthread_getname();
+	return buf;
 }

 BeesNote::ThreadStatusMap
--- a/src/bees-types.cc
+++ b/src/bees-types.cc
@@ -238,6 +238,42 @@ BeesFileRange::overlaps(const BeesFileRange &that) const
 	return false;
 }

+bool
+BeesFileRange::coalesce(const BeesFileRange &that)
+{
+	// Let's define coalesce-with-null as identity,
+	// and coalesce-null-with-null as coalesced
+	if (!*this) {
+		operator=(that);
+		return true;
+	}
+	if (!that) {
+		return true;
+	}
+
+	// Can't coalesce different files
+	if (!is_same_file(that)) return false;
+
+	pair<uint64_t, uint64_t> a(m_begin, m_end);
+	pair<uint64_t, uint64_t> b(that.m_begin, that.m_end);
+
+	// range a starts lower than or equal b
+	if (b.first < a.first) {
+		swap(a, b);
+	}
+
+	// if b starts within a, they overlap
+	// (and the intersecting region is b.first..min(a.second, b.second))
+	// (and the union region is a.first..max(a.second, b.second))
+	if (b.first >= a.first && b.first < a.second) {
+		m_begin = a.first;
+		m_end = max(a.second, b.second);
+		return true;
+	}
+
+	return false;
+}
+
 BeesFileRange::operator BeesBlockData() const
 {
 	BEESTRACE("operator BeesBlockData " << *this);
@@ -251,7 +287,7 @@ BeesFileRange::fd() const
 }

 Fd
-BeesFileRange::fd(const shared_ptr<BeesContext> &ctx)
+BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
 {
 	// If we don't have a fid we can't do much here
 	if (m_fid) {
--- a/src/bees.cc
+++ b/src/bees.cc
@@ -215,35 +215,38 @@ BeesTooLong::operator=(const func_type &f)
 }

 void
-bees_readahead(int const fd, const off_t offset, const size_t size)
+bees_sync(int fd)
+{
+	Timer sync_timer;
+	BEESNOTE("syncing " << name_fd(fd));
+	BEESTOOLONG("syncing " << name_fd(fd));
+	DIE_IF_NON_ZERO(fsync(fd));
+	BEESCOUNT(sync_count);
+	BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
+}
+
+void
+bees_readahead(int const fd, off_t offset, size_t size)
 {
 	Timer readahead_timer;
 	BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
 	BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
-#if 0
 	// In the kernel, readahead() is identical to posix_fadvise(..., POSIX_FADV_DONTNEED)
 	DIE_IF_NON_ZERO(readahead(fd, offset, size));
-#else
+#if 0
 	// Make sure this data is in page cache by brute force
-	// This isn't necessary and it might even be slower,
-	// but the btrfs kernel code does readahead with lower ioprio
-	// and might discard the readahead request entirely,
-	// so it's maybe, *maybe*, worth doing both.
+	// This isn't necessary and it might even be slower
 	BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
-	auto working_size = size;
-	auto working_offset = offset;
-	while (working_size) {
-		// don't care about multithreaded writes to this buffer--it is garbage anyway
+	while (size) {
 		static uint8_t dummy[BEES_READAHEAD_SIZE];
-		const size_t this_read_size = min(working_size, sizeof(dummy));
-		// Ignore errors and short reads.  It turns out our size
-		// parameter isn't all that accurate, so we can't use
-		// the pread_or_die template.
-		(void)!pread(fd, dummy, this_read_size, working_offset);
+		size_t this_read_size = min(size, sizeof(dummy));
+		// Ignore errors and short reads.
+		// It turns out our size parameter isn't all that accurate.
+		(void)!pread(fd, dummy, this_read_size, offset);
 		BEESCOUNT(readahead_count);
 		BEESCOUNTADD(readahead_bytes, this_read_size);
-		working_offset += this_read_size;
-		working_size -= this_read_size;
+		offset += this_read_size;
+		size -= this_read_size;
 	}
 #endif
 	BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
@@ -259,13 +262,6 @@ bees_unreadahead(int const fd, off_t offset, size_t size)
 	BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
 }

-thread_local random_device bees_random_device;
-thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
-	numeric_limits<default_random_engine::result_type>::min(),
-	numeric_limits<default_random_engine::result_type>::max()
-);
-thread_local default_random_engine bees_generator(bees_random_seed_dist(bees_random_device));
-
 BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
 	m_dir_fd(dir_fd),
 	m_name(name),
@@ -472,6 +468,7 @@ BeesTempFile::make_copy(const BeesFileRange &src)
 	auto src_p = src.begin();
 	auto dst_p = begin;

+	bool did_block_write = false;
 	while (dst_p < end) {
 		auto len = min(BLOCK_SIZE_CLONE, end - dst_p);
 		BeesBlockData bbd(src.fd(), src_p, len);
@@ -482,6 +479,7 @@ BeesTempFile::make_copy(const BeesFileRange &src)
 			BEESNOTE("copying " << src << " to " << rv << "\n"
 				"\tpwrite " << bbd << " to " << name_fd(m_fd) << " offset " << to_hex(dst_p) << " len " << len);
 			pwrite_or_die(m_fd, bbd.data().data(), len, dst_p);
+			did_block_write = true;
 			BEESCOUNT(tmp_block);
 			BEESCOUNTADD(tmp_bytes, len);
 		}
@@ -490,6 +488,16 @@ BeesTempFile::make_copy(const BeesFileRange &src)
 	}
 	BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);

+	if (did_block_write) {
+#if 0
+		// There were a lot of kernel bugs leading to lockups.
+		// Most of them are fixed now.
+		// Unnecessary sync makes us slow, but maybe it has some robustness utility.
+		// TODO:  make this configurable.
+		bees_sync(m_fd);
+#endif
+	}
+
 	BEESCOUNT(tmp_copy);
 	return rv;
 }
@@ -603,7 +611,7 @@ bees_main(int argc, char *argv[])
 	unsigned thread_min = 0;
 	double load_target = 0;
 	bool workaround_btrfs_send = false;
-	BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_INDEPENDENT;
+	BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_ZERO;

 	// Configure getopt_long
 	static const struct option long_options[] = {
@@ -774,8 +782,8 @@ main(int argc, char *argv[])
 		return EXIT_FAILURE;
 	}

-	int rv = EXIT_FAILURE;
-	catch_all([&]() {
+	int rv = 1;
+	catch_and_explain([&]() {
 		rv = bees_main(argc, argv);
 	});
 	BEESLOGNOTICE("Exiting with status " << rv << " " << (rv ? "(failure)" : "(success)"));
--- a/src/bees.h
+++ b/src/bees.h
@@ -1,7 +1,6 @@
 #ifndef BEES_H
 #define BEES_H

-#include "crucible/btrfs-tree.h"
 #include "crucible/cache.h"
 #include "crucible/chatter.h"
 #include "crucible/error.h"
@@ -9,21 +8,20 @@
 #include "crucible/fd.h"
 #include "crucible/fs.h"
 #include "crucible/lockset.h"
-#include "crucible/multilock.h"
 #include "crucible/pool.h"
 #include "crucible/progress.h"
 #include "crucible/time.h"
 #include "crucible/task.h"

+#include <atomic>
 #include <functional>
 #include <list>
 #include <mutex>
 #include <string>
-#include <random>
 #include <thread>

-#include <endian.h>
 #include <syslog.h>
+#include <endian.h>

 using namespace crucible;
 using namespace std;
@@ -61,9 +59,8 @@ const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP;
 // Extent size for hash table (since the nocow file attribute does not seem to be working today)
 const off_t BLOCK_SIZE_HASHTAB_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;

-// Bytes per second we want to flush from hash table
-// Optimistic sustained write rate for SD cards
-const double BEES_FLUSH_RATE = 128 * 1024;
+// Bytes per second we want to flush (8GB every two hours)
+const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;

 // Interval between writing crawl state to disk
 const int BEES_WRITEBACK_INTERVAL = 900;
@@ -101,8 +98,29 @@ const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1;
 // How long between hash table histograms
 const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;

-// Wait at least this long for a new transid
-const double BEES_TRANSID_POLL_INTERVAL = 30.0;
+// Stop growing the work queue after we have this many tasks queued
+const size_t BEES_MAX_QUEUE_SIZE = 128;
+
+// Read this many items at a time in SEARCHv2
+const size_t BEES_MAX_CRAWL_ITEMS = 8;
+
+// Read this many bytes at a time in SEARCHv2 (one maximum-sized metadata page)
+const size_t BEES_MAX_CRAWL_BYTES = 64 * 1024;
+
+// Insert this many items before switching to a new subvol
+const size_t BEES_MAX_CRAWL_BATCH = 128;
+
+// Wait this many transids between crawls
+const size_t BEES_TRANSID_FACTOR = 10;
+
+// Wait this long for a balance to stop
+const double BEES_BALANCE_POLL_INTERVAL = 60.0;
+
+// Workaround for backref bugs
+const bool BEES_SERIALIZE_RESOLVE = false;
+
+// Workaround for tree mod log bugs
+const bool BEES_SERIALIZE_BALANCE = false;

 // Workaround for silly dedupe / ineffective readahead behavior
 const size_t BEES_READAHEAD_SIZE = 1024 * 1024;
@@ -251,7 +269,7 @@ ostream& operator<<(ostream &os, const BeesFileId &bfi);

 class BeesFileRange {
 protected:
-	Fd			m_fd;
+	mutable Fd		m_fd;
 	mutable BeesFileId	m_fid;
 	off_t			m_begin = 0, m_end = 0;
 	mutable off_t		m_file_size = -1;
@@ -273,31 +291,35 @@ public:
 	bool is_same_file(const BeesFileRange &that) const;
 	bool overlaps(const BeesFileRange &that) const;

+	// If file ranges overlap, extends this to include that.
+	// Coalesce with empty bfr = non-empty bfr
+	bool coalesce(const BeesFileRange &that);
+
+	// Remove that from this, creating 0, 1, or 2 new objects
+	pair<BeesFileRange, BeesFileRange> subtract(const BeesFileRange &that) const;
+
 	off_t begin() const { return m_begin; }
 	off_t end() const { return m_end; }
 	off_t size() const;

-	/// @{ Lazy accessors
+	// Lazy accessors
 	off_t file_size() const;
 	BeesFileId fid() const;
-	/// @}

-	/// Get the fd if there is one
+	// Get the fd if there is one
 	Fd fd() const;

-	/// Get the fd, opening it if necessary
-	Fd fd(const shared_ptr<BeesContext> &ctx);
+	// Get the fd, opening it if necessary
+	Fd fd(const shared_ptr<BeesContext> &ctx) const;

-	/// Copy the BeesFileId but not the Fd
 	BeesFileRange copy_closed() const;

-	/// Is it defined?
+	// Is it defined?
 	operator bool() const { return !!m_fd || m_fid; }

-	/// @{ Make range larger
+	// Make range larger
 	off_t grow_end(off_t delta);
 	off_t grow_begin(off_t delta);
-	/// @}

 friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
 };
@@ -323,7 +345,6 @@ public:
 	BeesAddress(Type addr = ZERO) : m_addr(addr) {}
 	BeesAddress(MagicValue addr) : m_addr(addr) {}
 	BeesAddress& operator=(const BeesAddress &that) = default;
-	BeesAddress(const BeesAddress &that) = default;
 	operator Type() const { return m_addr; }
 	bool operator==(const BeesAddress &that) const;
 	bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
@@ -384,7 +405,6 @@ public:
 		HashType	e_hash;
 		AddrType	e_addr;
 		Cell(const Cell &) = default;
-		Cell &operator=(const Cell &) = default;
 		Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
 		bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
 		bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
@@ -409,14 +429,12 @@ public:
 	BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t size = BLOCK_SIZE_HASHTAB_EXTENT);
 	~BeesHashTable();

-	void stop_request();
-	void stop_wait();
+	void stop();

 	vector<Cell>	find_cell(HashType hash);
 	bool		push_random_hash_addr(HashType hash, AddrType addr);
 	void		erase_hash_addr(HashType hash, AddrType addr);
 	bool		push_front_hash_addr(HashType hash, AddrType addr);
-	bool            flush_dirty_extent(uint64_t extent_index);

 private:
 	string		m_filename;
@@ -450,7 +468,7 @@ private:
 	// Mutex/condvar for the writeback thread
 	mutex			m_dirty_mutex;
 	condition_variable	m_dirty_condvar;
-	bool			m_dirty = false;
+	bool			m_dirty;

 	// Mutex/condvar to stop
 	mutex			m_stop_mutex;
@@ -476,6 +494,7 @@ private:
 	void fetch_missing_extent_by_index(uint64_t extent_index);
 	void set_extent_dirty_locked(uint64_t extent_index);
 	size_t flush_dirty_extents(bool slowly);
+	bool flush_dirty_extent(uint64_t extent_index);

 	size_t			hash_to_extent_index(HashType ht);
 	unique_lock<mutex>	lock_extent_by_hash(HashType ht);
@@ -483,8 +502,6 @@ private:

 	BeesHashTable(const BeesHashTable &) = delete;
 	BeesHashTable &operator=(const BeesHashTable &) = delete;
-
-	static thread_local uniform_int_distribution<size_t> tl_distribution;
 };

 ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
@@ -504,48 +521,43 @@ class BeesCrawl {
 	shared_ptr<BeesContext>			m_ctx;

 	mutex					m_mutex;
-	BtrfsTreeItem				m_next_extent_data;
+	set<BeesFileRange>			m_extents;
 	bool					m_deferred = false;
 	bool					m_finished = false;

 	mutex					m_state_mutex;
 	ProgressTracker<BeesCrawlState>		m_state;

-	BtrfsTreeObjectFetcher			m_btof;
-
 	bool fetch_extents();
 	void fetch_extents_harder();
 	bool next_transid();
-	BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;

 public:
 	BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
 	BeesFileRange peek_front();
 	BeesFileRange pop_front();
-	ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesCrawlState &bcs);
+	ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesFileRange &bfr);
 	BeesCrawlState get_state_begin();
-	BeesCrawlState get_state_end() const;
+	BeesCrawlState get_state_end();
 	void set_state(const BeesCrawlState &bcs);
 	void deferred(bool def_setting);
 };

-class BeesScanMode;
-
 class BeesRoots : public enable_shared_from_this<BeesRoots> {
 	shared_ptr<BeesContext>			m_ctx;

 	BeesStringFile				m_crawl_state_file;
 	map<uint64_t, shared_ptr<BeesCrawl>>	m_root_crawl_map;
 	mutex					m_mutex;
-	uint64_t				m_crawl_dirty = 0;
-	uint64_t				m_crawl_clean = 0;
+	bool					m_crawl_dirty = false;
 	Timer					m_crawl_timer;
 	BeesThread				m_crawl_thread;
 	BeesThread				m_writeback_thread;
 	RateEstimator				m_transid_re;
+	size_t					m_transid_factor = BEES_TRANSID_FACTOR;
+	Task					m_crawl_task;
 	bool					m_workaround_btrfs_send = false;
-
-	shared_ptr<BeesScanMode>		m_scanner;
+	LRUCache<bool, uint64_t>		m_root_ro_cache;

 	mutex					m_tmpfiles_mutex;
 	map<BeesFileId, Fd>			m_tmpfiles;
@@ -558,6 +570,7 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
 	void insert_root(const BeesCrawlState &bcs);
 	Fd open_root_nocache(uint64_t root);
 	Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
+	bool is_root_ro_nocache(uint64_t root);
 	uint64_t transid_min();
 	uint64_t transid_max();
 	uint64_t transid_max_nocache();
@@ -573,38 +586,41 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
 	uint64_t next_root(uint64_t root = 0);
 	void current_state_set(const BeesCrawlState &bcs);
 	RateEstimator& transid_re();
-	bool crawl_batch(shared_ptr<BeesCrawl> crawl);
+	size_t crawl_batch(shared_ptr<BeesCrawl> crawl);
 	void clear_caches();
+	void insert_tmpfile(Fd fd);
+	void erase_tmpfile(Fd fd);

-friend class BeesCrawl;
 friend class BeesFdCache;
-friend class BeesScanMode;
+friend class BeesCrawl;
+friend class BeesTempFile;

 public:
 	BeesRoots(shared_ptr<BeesContext> ctx);
 	void start();
-	void stop_request();
-	void stop_wait();
-
-	void insert_tmpfile(Fd fd);
-	void erase_tmpfile(Fd fd);
+	void stop();

 	Fd open_root(uint64_t root);
 	Fd open_root_ino(uint64_t root, uint64_t ino);
 	Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
 	bool is_root_ro(uint64_t root);

-	// TODO:  do extent-tree scans instead
+	// TODO:  think of better names for these.
+	// or TODO:  do extent-tree scans instead
 	enum ScanMode {
-		SCAN_MODE_LOCKSTEP,
-		SCAN_MODE_INDEPENDENT,
-		SCAN_MODE_SEQUENTIAL,
-		SCAN_MODE_RECENT,
+		SCAN_MODE_ZERO,
+		SCAN_MODE_ONE,
+		SCAN_MODE_TWO,
 		SCAN_MODE_COUNT, // must be last
 	};

 	void set_scan_mode(ScanMode new_mode);
 	void set_workaround_btrfs_send(bool do_avoid);
+
+private:
+	ScanMode m_scan_mode = SCAN_MODE_ZERO;
+	static string scan_mode_ntoa(ScanMode new_mode);
+
 };

 struct BeesHash {
@@ -623,7 +639,7 @@ private:
 ostream & operator<<(ostream &os, const BeesHash &bh);

 class BeesBlockData {
-	using Blob = ByteVector;
+	using Blob = vector<uint8_t>;

 	mutable Fd		m_fd;
 	off_t			m_offset;
@@ -707,7 +723,13 @@ struct BeesResolveAddrResult {
 	bool is_toxic() const { return m_is_toxic; }
 };

+struct BeesHalt : exception {
+	const char *what() const noexcept override;
+};
+
 class BeesContext : public enable_shared_from_this<BeesContext> {
+	shared_ptr<BeesContext>				m_parent_ctx;
+
 	Fd						m_home_fd;

 	shared_ptr<BeesFdCache>				m_fd_cache;
@@ -725,25 +747,30 @@ class BeesContext : public enable_shared_from_this<BeesContext> {

 	Timer						m_total_timer;

-	NamedPtr<Exclusion, uint64_t>			m_extent_locks;
-	NamedPtr<Exclusion, uint64_t>			m_inode_locks;
+	LockSet<uint64_t>				m_extent_lock_set;

 	mutable mutex					m_stop_mutex;
 	condition_variable				m_stop_condvar;
 	bool						m_stop_requested = false;
 	bool						m_stop_status = false;

+	mutable mutex					m_abort_mutex;
+	condition_variable				m_abort_condvar;
+	bool						m_abort_requested = false;
+
 	shared_ptr<BeesThread>				m_progress_thread;
 	shared_ptr<BeesThread>				m_status_thread;

 	void set_root_fd(Fd fd);

 	BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
+	void wait_for_balance();

 	BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
 	void rewrite_file_range(const BeesFileRange &bfr);

 public:
+	BeesContext() = default;

 	void set_root_path(string path);

@@ -751,7 +778,7 @@ public:
 	Fd home_fd();
 	string root_path() const { return m_root_path; }

-	bool scan_forward(const BeesFileRange &bfr);
+	BeesFileRange scan_forward(const BeesFileRange &bfr);

 	bool is_root_ro(uint64_t root);
 	BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
@@ -761,11 +788,8 @@ public:
 	void blacklist_erase(const BeesFileId &fid);
 	bool is_blacklisted(const BeesFileId &fid) const;

-	shared_ptr<Exclusion> get_inode_mutex(uint64_t inode);
-
 	BeesResolveAddrResult resolve_addr(BeesAddress addr);
 	void invalidate_addr(BeesAddress addr);
-	void resolve_cache_clear();

 	void dump_status();
 	void show_progress();
@@ -780,6 +804,7 @@ public:
 	shared_ptr<BeesTempFile> tmpfile();

 	const Timer &total_timer() const { return m_total_timer; }
+	LockSet<uint64_t> &extent_lock_set() { return m_extent_lock_set; }
 };

 class BeesResolver {
@@ -787,7 +812,7 @@ class BeesResolver {
 	BeesAddress				m_addr;
 	vector<BtrfsInodeOffsetRoot>		m_biors;
 	set<BeesFileRange>			m_ranges;
-	size_t					m_bior_count;
+	unsigned				m_bior_count;

 	// We found matching data, so we can dedupe
 	bool					m_found_data = false;
@@ -862,8 +887,8 @@ public:
 extern int bees_log_level;
 extern const char *BEES_USAGE;
 extern const char *BEES_VERSION;
-extern thread_local default_random_engine bees_generator;
 string pretty(double d);
+void bees_sync(int fd);
 void bees_readahead(int fd, off_t offset, size_t size);
 void bees_unreadahead(int fd, off_t offset, size_t size);
 string format_time(time_t t);
--- a/src/fiemap.cc
+++ b/src/fiemap.cc
@@ -0,0 +1,53 @@
+#include "crucible/fd.h"
+#include "crucible/fs.h"
+#include "crucible/error.h"
+#include "crucible/string.h"
+
+#include <iostream>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+using namespace crucible;
+using namespace std;
+
+int
+main(int argc, char **argv)
+{
+	catch_all([&]() {
+		THROW_CHECK1(invalid_argument, argc, argc > 1);
+		string filename = argv[1];
+
+	
+		cout << "File: " << filename << endl;
+		Fd fd = open_or_die(filename, O_RDONLY);
+		Fiemap fm;
+		fm.fm_flags &= ~(FIEMAP_FLAG_SYNC);
+		fm.m_max_count = 100;
+		if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
+		if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
+		if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
+		fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
+		uint64_t stop_at = fm.fm_start + fm.fm_length;
+		uint64_t last_byte = fm.fm_start;
+		do {
+			fm.do_ioctl(fd);
+			// cerr << fm;
+			uint64_t last_logical = FIEMAP_MAX_OFFSET;
+			for (auto &extent : fm.m_extents) {
+				if (extent.fe_logical > last_byte) {
+					cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
+				}
+				cout << "Log " << to_hex(extent.fe_logical) << ".." << to_hex(extent.fe_logical + extent.fe_length)
+					<< " Phy " << to_hex(extent.fe_physical) << ".." << to_hex(extent.fe_physical + extent.fe_length)
+					<< " Flags " << fiemap_extent_flags_ntoa(extent.fe_flags) << endl;
+				last_logical = extent.fe_logical + extent.fe_length;
+				last_byte = last_logical;
+			}
+			fm.fm_start = last_logical;
+		} while (fm.fm_start < stop_at);
+	});
+	exit(EXIT_SUCCESS);
+}
+
--- a/src/fiewalk.cc
+++ b/src/fiewalk.cc
@@ -0,0 +1,40 @@
+#include "crucible/extentwalker.h"
+#include "crucible/error.h"
+#include "crucible/string.h"
+
+#include <iostream>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+using namespace crucible;
+using namespace std;
+
+int
+main(int argc, char **argv)
+{
+	catch_all([&]() {
+		THROW_CHECK1(invalid_argument, argc, argc > 1);
+		string filename = argv[1];
+
+		cout << "File: " << filename << endl;
+		Fd fd = open_or_die(filename, O_RDONLY);
+		BtrfsExtentWalker ew(fd);
+		off_t pos = 0;
+		if (argc > 2) { pos = stoull(argv[2], nullptr, 0); }
+		ew.seek(pos);
+		do {
+			// cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
+			cout << ew.current() << endl;
+		} while (ew.next());
+#if 0
+		cout << "\n\n\nAnd now, backwards...\n\n\n" << endl;
+		do {
+			cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
+		} while (ew.prev());
+		cout << "\n\n\nDone!\n\n\n" << endl;
+#endif
+	});
+	exit(EXIT_SUCCESS);
+}
+
--- a/test/Makefile
+++ b/test/Makefile
@@ -7,7 +7,6 @@ PROGRAMS = \
 	path \
 	process \
 	progress \
-	seeker \
 	task \

 all: test
@@ -21,10 +20,17 @@ include ../makeflags
 LIBS = -lcrucible -lpthread
 BEES_LDFLAGS = -L../lib $(LDFLAGS)

-%.dep: %.cc tests.h Makefile
+.depends:
+	mkdir -p $@
+
+.depends/%.dep: %.cc tests.h Makefile | .depends
 	$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<

-include $(PROGRAMS:%=%.dep)
+depends.mk: $(PROGRAMS:%=.depends/%.dep)
+	cat $^ > $@.new
+	mv -f $@.new $@
+
+include depends.mk

 $(PROGRAMS:%=%.o): %.o: %.cc ../makeflags Makefile
 	$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<
--- a/test/seeker.cc
+++ b/test/seeker.cc
@@ -1,101 +0,0 @@
-#include "tests.h"
-
-#include "crucible/seeker.h"
-
-#include <set>
-#include <vector>
-
-#include <unistd.h>
-
-using namespace crucible;
-
-static
-set<uint64_t>
-seeker_finder(const vector<uint64_t> &vec, uint64_t lower, uint64_t upper)
-{
-	set<uint64_t> s(vec.begin(), vec.end());
-	auto lb = s.lower_bound(lower);
-	auto ub = lb;
-	if (ub != s.end()) ++ub;
-	if (ub != s.end()) ++ub;
-	for (; ub != s.end(); ++ub) {
-		if (*ub > upper) break;
-	}
-	return set<uint64_t>(lb, ub);
-}
-
-static bool test_fails = false;
-
-static
-void
-seeker_test(const vector<uint64_t> &vec, uint64_t const target)
-{
-	cerr << "Find " << target << " in {";
-	for (auto i : vec) {
-		cerr << " " << i;
-	}
-	cerr << " } = ";
-	size_t loops = 0;
-	bool excepted = catch_all([&]() {
-		auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) {
-			++loops;
-			return seeker_finder(vec, lower, upper);
-		});
-		cerr << found;
-		uint64_t my_found = 0;
-		for (auto i : vec) {
-			if (i <= target) {
-				my_found = i;
-			}
-		}
-		if (found == my_found) {
-			cerr << " (correct)";
-		} else {
-			cerr << " (INCORRECT - right answer is " << my_found << ")";
-			test_fails = true;
-		}
-	});
-	cerr << " (" << loops << " loops)" << endl;
-	if (excepted) {
-		test_fails = true;
-	}
-}
-
-static
-void
-test_seeker()
-{
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 3);
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 5);
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 0);
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 1);
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 4);
-	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 2);
-
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 2);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 25);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 52);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 99);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55, 56 }, 99);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 1);
-	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 55);
-	seeker_test(vector<uint64_t> { 11 }, 55);
-	seeker_test(vector<uint64_t> { 11 }, 10);
-	seeker_test(vector<uint64_t> { 55 }, 55);
-	seeker_test(vector<uint64_t> { }, 55);
-	seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max());
-	seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max() - 1);
-	seeker_test(vector<uint64_t> { }, numeric_limits<uint64_t>::max());
-	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max());
-	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1);
-	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max());
-}
-
-
-int main(int, const char **)
-{
-
-	RUN_A_TEST(test_seeker());
-
-	return test_fails ? EXIT_FAILURE : EXIT_SUCCESS;
-}
--- a/test/task.cc
+++ b/test/task.cc
@@ -90,51 +90,47 @@ test_barrier(size_t count)

 	mutex mtx;
 	condition_variable cv;
-	bool done_flag = false;

 	unique_lock<mutex> lock(mtx);

-	Barrier b;
+	auto b = make_shared<Barrier>();

 	// Run several tasks in parallel
 	for (size_t c = 0; c < count; ++c) {
+		auto bl = b->lock();
 		ostringstream oss;
 		oss << "task #" << c;
-		auto b_hold = b;
 		Task t(
 			oss.str(),
-			[c, &task_done, &mtx, b_hold]() mutable {
-				// ostringstream oss;
-				// oss << "Task #" << c << endl;
+			[c, &task_done, &mtx, bl]() mutable {
+				// cerr << "Task #" << c << endl;
 				unique_lock<mutex> lock(mtx);
-				// cerr << oss.str();
 				task_done.at(c) = true;
-				b_hold.release();
+				bl.release();
 			}
 		);
 		t.run();
 	}

-	// Need completed to go out of local scope so it will release b
-	{
-		Task completed(
-			"Waiting for Barrier",
-			[&mtx, &cv, &done_flag]() {
-				unique_lock<mutex> lock(mtx);
-				// cerr << "Running cv notify" << endl;
-				done_flag = true;
-				cv.notify_all();
-			}
-		);
-		b.insert_task(completed);
-	}
-
 	// Get current status
-	// TaskMaster::print_queue(cerr);
-	// TaskMaster::print_workers(cerr);
+	ostringstream oss;
+	TaskMaster::print_queue(oss);
+	TaskMaster::print_workers(oss);

-	// Release our b
-	b.release();
+	bool done_flag = false;
+
+	Task completed(
+		"Waiting for Barrier",
+		[&mtx, &cv, &done_flag]() {
+			unique_lock<mutex> lock(mtx);
+			// cerr << "Running cv notify" << endl;
+			done_flag = true;
+			cv.notify_all();
+		}
+	);
+	b->insert_task(completed);
+
+	b.reset();

 	while (true) {
 		size_t tasks_done = 0;
@@ -143,7 +139,7 @@ test_barrier(size_t count)
 				++tasks_done;
 			}
 		}
-		cerr << "Tasks done: " << tasks_done << " done_flag " << done_flag << endl;
+		// cerr << "Tasks done: " << tasks_done << " done_flag " << done_flag << endl;
 		if (tasks_done == count && done_flag) {
 			break;
 		}
@@ -157,7 +153,7 @@ void
 test_exclusion(size_t count)
 {
 	mutex only_one;
-	auto excl = make_shared<Exclusion>();
+	auto excl = make_shared<Exclusion>("test_excl");

 	mutex mtx;
 	condition_variable cv;
@@ -178,8 +174,9 @@ test_exclusion(size_t count)
 			[c, &only_one, excl, &lock_success_count, &lock_failure_count, &pings, &tasks_running, &cv, &mtx]() mutable {
 				// cerr << "Task #" << c << endl;
 				(void)c;
-				auto lock = excl->try_lock(Task::current_task());
+				auto lock = excl->try_lock();
 				if (!lock) {
+					excl->insert_task(Task::current_task());
 					++lock_failure_count;
 					return;
 				}
@@ -199,7 +196,7 @@ test_exclusion(size_t count)
 		t.run();
 	}

-	excl.reset();
+	// excl.reset();

 	unique_lock<mutex> lock(mtx);
 	while (tasks_running) {
Author	SHA1	Message	Date
KhalilSantana	27857406f5	Fixes a bad grep pattern caused by `dffd6e0` Fixes #233	2022-10-13 16:32:48 -04:00
Khalil Santana	b44ed287dd	Get rid of errors by using grep -E "egrep: warning: egrep is obsolescent; using grep -E"	2022-10-05 22:36:33 -03:00
Ayla Ounce	20c469245c	Fix beesd script arg parsing to respect PREFIX Without this, if you install to a different PREFIX such as /usr/local it will fail to recognize any arguments and if you use the systemd unit, that makes --no-timestamps the first NOT_SUPPORTED_ARG which will get passed to uuidparse, which doesn't recognize it and errors.	2022-10-05 22:36:33 -03:00
Javi Vilarroig	77cf2d794e	Minimal changes in beesd script to make it functional in my system	2022-10-05 22:36:33 -03:00