bees: remove local cruft, throw at github

2025-11-03 03:30:33 +01:00 · 2016-11-15 23:32:44 -05:00
commit cca0ee26a8
66 changed files with 12785 additions and 0 deletions
--- a/src/Makefile
+++ b/src/Makefile
@@ -0,0 +1,39 @@
+PROGRAMS = \
+	../bin/bees \
+	../bin/fiemap \
+	../bin/fiewalk \
+
+all: $(PROGRAMS) depends.mk
+
+include ../makeflags
+
+LIBS = -lcrucible -lpthread
+LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib)
+
+depends.mk: Makefile *.cc
+	for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new
+	mv -fv depends.mk.new depends.mk
+
+-include depends.mk
+
+%.o: %.cc %.h
+	$(CXX) $(CXXFLAGS) -o "$@" -c "$<"
+
+../bin/%: %.o
+	@echo Implicit bin rule "$<" '->' "$@"
+	$(CXX) $(CXXFLAGS) -o "$@" "$<" $(LDFLAGS) $(LIBS)
+
+BEES_OBJS = \
+	bees.o \
+	bees-context.o \
+	bees-hash.o \
+	bees-resolve.o \
+	bees-roots.o \
+	bees-thread.o \
+	bees-types.o \
+
+../bin/bees: $(BEES_OBJS)
+	$(CXX) $(CXXFLAGS) -o "$@" $(BEES_OBJS) $(LDFLAGS) $(LIBS)
+
+clean:
+	-rm -fv *.o
--- a/src/bees-context.cc
+++ b/src/bees-context.cc
--- a/src/bees-hash.cc
+++ b/src/bees-hash.cc
@@ -0,0 +1,682 @@
+#include "bees.h"
+
+#include "crucible/crc64.h"
+#include "crucible/string.h"
+
+#include <algorithm>
+#include <random>
+
+#include <sys/mman.h>
+
+using namespace crucible;
+using namespace std;
+
+static inline
+bool
+using_any_madvise()
+{
+	return true;
+}
+
+ostream &
+operator<<(ostream &os, const BeesHash &bh)
+{
+	return os << to_hex(BeesHash::Type(bh));
+}
+
+ostream &
+operator<<(ostream &os, const BeesHashTable::Cell &bhte)
+{
+	return os << "BeesHashTable::Cell { hash = " << BeesHash(bhte.e_hash) << ", addr = "
+		  << BeesAddress(bhte.e_addr) << " }";
+}
+
+void
+dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
+{
+	// Must be called while holding m_bucket_mutex
+	for (auto i = p; i < q; ++i) {
+		BEESLOG("Entry " << i - p << " " << *i);
+	}
+}
+
+const bool VERIFY_CLEARS_BUGS = false;
+
+bool
+verify_cell_range(BeesHashTable::Cell *p, BeesHashTable::Cell *q, bool clear_bugs = VERIFY_CLEARS_BUGS)
+{
+	// Must be called while holding m_bucket_mutex
+	bool bugs_found = false;
+	set<BeesHashTable::Cell> seen_it;
+	for (BeesHashTable::Cell *cell = p; cell < q; ++cell) {
+		if (cell->e_addr && cell->e_addr < 0x1000) {
+			BEESCOUNT(bug_hash_magic_addr);
+			BEESINFO("Bad hash table address hash " << to_hex(cell->e_hash) << " addr " << to_hex(cell->e_addr));
+			if (clear_bugs) {
+				cell->e_addr = 0;
+				cell->e_hash = 0;
+			}
+			bugs_found = true;
+		}
+		if (cell->e_addr && !seen_it.insert(*cell).second) {
+			BEESCOUNT(bug_hash_duplicate_cell);
+			// BEESLOG("Duplicate hash table entry:\nthis = " << *cell << "\nold = " << *seen_it.find(*cell));
+			BEESINFO("Duplicate hash table entry: " << *cell);
+			if (clear_bugs) {
+				cell->e_addr = 0;
+				cell->e_hash = 0;
+			}
+			bugs_found = true;
+		}
+	}
+	return bugs_found;
+}
+
+pair<BeesHashTable::Cell *, BeesHashTable::Cell *>
+BeesHashTable::get_cell_range(HashType hash)
+{
+	THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
+	THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
+	Bucket *pp = &m_bucket_ptr[hash % m_buckets];
+	Cell *bp = pp[0].p_cells;
+	Cell *ep = pp[1].p_cells;
+	THROW_CHECK2(out_of_range, m_cell_ptr,     bp, bp >= m_cell_ptr);
+	THROW_CHECK2(out_of_range, m_cell_ptr_end, ep, ep <= m_cell_ptr_end);
+	return make_pair(bp, ep);
+}
+
+pair<uint8_t *, uint8_t *>
+BeesHashTable::get_extent_range(HashType hash)
+{
+	THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
+	THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
+	Extent *iop = &m_extent_ptr[ (hash % m_buckets) / c_buckets_per_extent ];
+	uint8_t *bp = iop[0].p_byte;
+	uint8_t *ep = iop[1].p_byte;
+	THROW_CHECK2(out_of_range, m_byte_ptr,     bp, bp >= m_byte_ptr);
+	THROW_CHECK2(out_of_range, m_byte_ptr_end, ep, ep <= m_byte_ptr_end);
+	return make_pair(bp, ep);
+}
+
+void
+BeesHashTable::flush_dirty_extents()
+{
+	if (using_shared_map()) return;
+
+	THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
+
+	unique_lock<mutex> lock(m_extent_mutex);
+	auto dirty_extent_copy = m_buckets_dirty;
+	m_buckets_dirty.clear();
+	if (dirty_extent_copy.empty()) {
+		BEESNOTE("idle");
+		m_condvar.wait(lock);
+		return; // please call later, i.e. immediately
+	}
+	lock.unlock();
+
+	size_t extent_counter = 0;
+	for (auto extent_number : dirty_extent_copy) {
+		++extent_counter;
+		BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
+		catch_all([&]() {
+			uint8_t *dirty_extent     = m_extent_ptr[extent_number].p_byte;
+			uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
+			THROW_CHECK1(out_of_range, dirty_extent,     dirty_extent     >= m_byte_ptr);
+			THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
+			if (using_shared_map()) {
+				BEESTOOLONG("flush extent " << extent_number);
+				copy(dirty_extent, dirty_extent_end, dirty_extent);
+			} else {
+				BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
+				// Page locks slow us down more than copying the data does
+				vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
+				pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
+				BEESCOUNT(hash_extent_out);
+			}
+		});
+		BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
+		m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
+	}
+}
+
+void
+BeesHashTable::set_extent_dirty(HashType hash)
+{
+	if (using_shared_map()) return;
+	THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
+	auto pr = get_extent_range(hash);
+	uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
+	THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
+	unique_lock<mutex> lock(m_extent_mutex);
+	m_buckets_dirty.insert(extent_number);
+	m_condvar.notify_one();
+}
+
+void
+BeesHashTable::writeback_loop()
+{
+	if (!using_shared_map()) {
+		while (1) {
+			flush_dirty_extents();
+		}
+	}
+}
+
+static
+string
+percent(size_t num, size_t den)
+{
+	if (den) {
+		return astringprintf("%u%%", num * 100 / den);
+	} else {
+		return "--%";
+	}
+}
+
+void
+BeesHashTable::prefetch_loop()
+{
+	// Always do the mlock, whether shared or not
+	THROW_CHECK1(runtime_error, m_size, m_size > 0);
+	catch_all([&]() {
+		BEESNOTE("mlock " << pretty(m_size));
+		DIE_IF_NON_ZERO(mlock(m_byte_ptr, m_size));
+	});
+
+	while (1) {
+		size_t width = 64;
+		vector<size_t> occupancy(width, 0);
+		size_t occupied_count = 0;
+		size_t total_count = 0;
+		size_t compressed_count = 0;
+		size_t compressed_offset_count = 0;
+		size_t toxic_count = 0;
+		size_t unaligned_eof_count = 0;
+
+		for (uint64_t ext = 0; ext < m_extents; ++ext) {
+			BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
+			catch_all([&]() {
+				fetch_missing_extent(ext * c_buckets_per_extent);
+
+				BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
+				bool duplicate_bugs_found = false;
+				unique_lock<mutex> lock(m_bucket_mutex);
+				for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
+					if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
+						duplicate_bugs_found = true;
+					}
+					size_t this_bucket_occupied_count = 0;
+					for (Cell *cell = bucket[0].p_cells; cell < bucket[1].p_cells; ++cell) {
+						if (cell->e_addr) {
+							++this_bucket_occupied_count;
+							BeesAddress a(cell->e_addr);
+							if (a.is_compressed()) {
+								++compressed_count;
+								if (a.has_compressed_offset()) {
+									++compressed_offset_count;
+								}
+							}
+							if (a.is_toxic()) {
+								++toxic_count;
+							}
+							if (a.is_unaligned_eof()) {
+								++unaligned_eof_count;
+							}
+						}
+						++total_count;
+					}
+					++occupancy.at(this_bucket_occupied_count * width / (1 + c_cells_per_bucket) );
+					// Count these instead of calculating the number so we get better stats in case of exceptions
+					occupied_count += this_bucket_occupied_count;
+				}
+				lock.unlock();
+				if (duplicate_bugs_found) {
+					set_extent_dirty(ext);
+				}
+			});
+		}
+
+		BEESNOTE("calculating hash table statistics");
+
+		vector<string> histogram;
+		vector<size_t> thresholds;
+		size_t threshold = 1;
+		bool threshold_exceeded = false;
+		do {
+			threshold_exceeded = false;
+			histogram.push_back(string(width, ' '));
+			thresholds.push_back(threshold);
+			for (size_t x = 0; x < width; ++x) {
+				if (occupancy.at(x) >= threshold) {
+					histogram.back().at(x) = '#';
+					threshold_exceeded = true;
+				}
+			}
+			threshold *= 2;
+		} while (threshold_exceeded);
+
+		ostringstream out;
+		size_t count = histogram.size();
+		bool first_line = true;
+		for (auto it = histogram.rbegin(); it != histogram.rend(); ++it) {
+			out << *it << " " << thresholds.at(--count);
+			if (first_line) {
+				first_line = false;
+				out << " pages";
+			}
+			out << "\n";
+		}
+
+		size_t uncompressed_count = occupied_count - compressed_count;
+		size_t legacy_count = compressed_count - compressed_offset_count;
+
+		ostringstream graph_blob;
+
+		graph_blob << "Now:     " << format_time(time(NULL)) << "\n";
+		graph_blob << "Uptime:  " << m_ctx->total_timer().age() << " seconds\n";
+
+		graph_blob 
+			<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n" 
+			<< out.str() << "0%      |      25%      |      50%      |      75%      |   100% page fill\n"
+			<< "compressed " << compressed_count << " (" << percent(compressed_count, occupied_count) << ")"
+			<< " new-style " << compressed_offset_count << " (" << percent(compressed_offset_count, occupied_count) << ")"
+			<< " old-style " << legacy_count << " (" << percent(legacy_count, occupied_count) << ")\n"
+			<< "uncompressed " << uncompressed_count << " (" << percent(uncompressed_count, occupied_count) << ")"
+			<< " unaligned_eof " << unaligned_eof_count << " (" << percent(unaligned_eof_count, occupied_count) << ")"
+			<< " toxic " << toxic_count << " (" << percent(toxic_count, occupied_count) << ")";
+
+		graph_blob << "\n\n";
+
+		graph_blob << "TOTAL:\n";
+		auto thisStats = BeesStats::s_global;
+		graph_blob << "\t" << thisStats << "\n";
+
+		graph_blob << "\nRATES:\n";
+		auto avg_rates = thisStats / m_ctx->total_timer().age();
+		graph_blob << "\t" << avg_rates << "\n";
+
+		BEESLOG(graph_blob.str());
+		catch_all([&]() {
+			m_stats_file.write(graph_blob.str());
+		});
+
+		BEESNOTE("idle " << BEES_HASH_TABLE_ANALYZE_INTERVAL << "s");
+		nanosleep(BEES_HASH_TABLE_ANALYZE_INTERVAL);
+	}
+}
+
+void
+BeesHashTable::fetch_missing_extent(HashType hash)
+{
+	BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
+	if (using_shared_map()) return;
+	THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
+	auto pr = get_extent_range(hash);
+	uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
+	THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
+
+	unique_lock<mutex> lock(m_extent_mutex);
+	if (!m_buckets_missing.count(extent_number)) {
+		return;
+	}
+
+	size_t missing_buckets = m_buckets_missing.size();
+	lock.unlock();
+
+	BEESNOTE("fetch waiting for hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
+
+	// Acquire blocking lock on this extent only
+	LockSet<uint64_t>::Lock extent_lock(m_extent_lock_set, extent_number);
+
+	// Check missing again because someone else might have fetched this
+	// extent for us while we didn't hold any locks
+	lock.lock();
+	if (!m_buckets_missing.count(extent_number)) {
+		BEESCOUNT(hash_extent_in_twice);
+		return;
+	}
+	lock.unlock();
+
+	// OK we have to read this extent
+	BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
+
+	BEESTRACE("Fetching missing hash extent " << extent_number);
+	uint8_t *dirty_extent     = m_extent_ptr[extent_number].p_byte;
+	uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
+
+	{
+		BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
+		pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
+	}
+
+	BEESCOUNT(hash_extent_in);
+	// We don't block when fetching an extent but we do slow down the
+	// prefetch thread.
+	m_prefetch_rate_limit.borrow(BLOCK_SIZE_HASHTAB_EXTENT);
+	lock.lock();
+	m_buckets_missing.erase(extent_number);
+}
+
+bool
+BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
+{
+	return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
+}
+
+vector<BeesHashTable::Cell>
+BeesHashTable::find_cell(HashType hash)
+{
+	// This saves a lot of time prefilling the hash table, and there's no risk of eviction
+	if (is_toxic_hash(hash)) {
+		BEESCOUNT(hash_toxic);
+		BeesAddress toxic_addr(0x1000);
+		toxic_addr.set_toxic();
+		Cell toxic_cell(hash, toxic_addr);
+		vector<Cell> rv;
+		rv.push_back(toxic_cell);
+		return rv;
+	}
+	fetch_missing_extent(hash);
+	BEESTOOLONG("find_cell hash " << BeesHash(hash));
+	vector<Cell> rv;
+	unique_lock<mutex> lock(m_bucket_mutex);
+	auto er = get_cell_range(hash);
+	// FIXME:  Weed out zero addresses in the table due to earlier bugs
+	copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
+	BEESCOUNT(hash_lookup);
+	return rv;
+}
+
+// Move an entry to the end of the list.  Used after an attempt to resolve
+// an address in the hash table fails.  Probably more correctly called
+// push_back_hash_addr, except it never inserts.  Shared hash tables
+// never erase anything, since there is no way to tell if an entry is
+// out of date or just belonging to the wrong filesystem.
+void
+BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
+{
+	// if (m_shared) return;
+	fetch_missing_extent(hash);
+	BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
+	unique_lock<mutex> lock(m_bucket_mutex);
+	auto er = get_cell_range(hash);
+	Cell mv(hash, addr);
+	Cell *ip = find(er.first, er.second, mv);
+	bool found = (ip < er.second);
+	if (found) {
+		// Lookups on invalid addresses really hurt us.  Kill it with fire!
+		*ip = Cell(0, 0);
+		set_extent_dirty(hash);
+		BEESCOUNT(hash_erase);
+#if 0
+		if (verify_cell_range(er.first, er.second)) {
+			BEESINFO("while erasing hash " << hash << " addr " << addr);
+		}
+#endif
+	}
+}
+
+// If entry is already present in list, move it to the front of the
+// list without dropping any entries, and return true.  If entry is not
+// present in list, insert it at the front of the list, possibly dropping
+// the last entry in the list, and return false.  Used to move duplicate
+// hash blocks to the front of the list.
+bool
+BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
+{
+	fetch_missing_extent(hash);
+	BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
+	unique_lock<mutex> lock(m_bucket_mutex);
+	auto er = get_cell_range(hash);
+	Cell mv(hash, addr);
+	Cell *ip = find(er.first, er.second, mv);
+	bool found = (ip < er.second);
+	if (!found) {
+		// If no match found, get rid of an empty space instead
+		// If no empty spaces, ip will point to end
+		ip = find(er.first, er.second, Cell(0, 0));
+	}
+	if (ip > er.first) {
+		// Delete matching entry, first empty entry,
+		// or last entry whether empty or not
+		// move_backward(er.first, ip - 1, ip);
+		auto sp = ip;
+		auto dp = ip;
+		--sp;
+		// If we are deleting the last entry then don't copy it
+		if (ip == er.second) {
+			--sp;
+			--dp;
+			BEESCOUNT(hash_evict);
+		}
+		while (dp > er.first) {
+			*dp-- = *sp--;
+		}
+	}
+	// There is now a space at the front, insert there if different
+	if (er.first[0] != mv) {
+		er.first[0] = mv;
+		set_extent_dirty(hash);
+		BEESCOUNT(hash_front);
+	}
+#if 0
+	if (verify_cell_range(er.first, er.second)) {
+		BEESINFO("while push_fronting hash " << hash << " addr " << addr);
+	}
+#endif
+	return found;
+}
+
+// If entry is already present in list, returns true and does not
+// modify list.  If entry is not present in list, returns false and
+// inserts at a random position in the list, possibly evicting the entry
+// at the end of the list.  Used to insert new unique (not-yet-duplicate)
+// blocks in random order.
+bool
+BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
+{
+	fetch_missing_extent(hash);
+	BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
+	unique_lock<mutex> lock(m_bucket_mutex);
+	auto er = get_cell_range(hash);
+	Cell mv(hash, addr);
+	Cell *ip = find(er.first, er.second, mv);
+	bool found = (ip < er.second);
+
+	thread_local default_random_engine generator;
+	thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
+	auto pos = distribution(generator);
+
+	int case_cond = 0;
+	vector<Cell> saved(er.first, er.second);
+
+	if (found) {
+		// If hash already exists after pos, swap with pos
+		if (ip > er.first + pos) {
+
+			// move_backward(er.first + pos, ip - 1, ip);
+			auto sp = ip;
+			auto dp = ip;
+			--sp;
+			while (dp > er.first + pos) {
+				*dp-- = *sp--;
+			}
+			*dp = mv;
+			BEESCOUNT(hash_bump);
+			case_cond = 1;
+			goto ret_dirty;
+		}
+		// Hash already exists before (or at) pos, leave it there
+		BEESCOUNT(hash_already);
+		case_cond = 2;
+		goto ret;
+	}
+
+	// Find an empty space to back of pos
+	for (ip = er.first + pos; ip < er.second; ++ip) {
+		if (*ip == Cell(0, 0)) {
+			*ip = mv;
+			case_cond = 3;
+			goto ret_dirty;
+		}
+	}
+
+	// Find an empty space to front of pos
+	// if there is anything to front of pos
+	if (pos > 0) {
+		for (ip = er.first + pos - 1; ip >= er.first; --ip) {
+			if (*ip == Cell(0, 0)) {
+				*ip = mv;
+				case_cond = 4;
+				goto ret_dirty;
+			}
+		}
+	}
+
+	// Evict something and insert at pos
+	move_backward(er.first + pos, er.second - 1, er.second);
+	er.first[pos] = mv;
+	BEESCOUNT(hash_evict);
+	case_cond = 5;
+ret_dirty:
+	BEESCOUNT(hash_insert);
+	set_extent_dirty(hash);
+ret:
+#if 0
+	if (verify_cell_range(er.first, er.second, false)) {
+		BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
+			<< " ip " << (ip - er.first) << " " << mv);
+		// dump_bucket(saved.data(), saved.data() + saved.size());
+		// dump_bucket(er.first, er.second);
+	}
+#else
+	(void)case_cond;
+#endif
+	return found;
+}
+
+void
+BeesHashTable::try_mmap_flags(int flags)
+{
+	if (!m_cell_ptr) {
+		THROW_CHECK1(out_of_range, m_size, m_size > 0);
+		Timer map_time;
+		catch_all([&]() {
+			BEESLOG("mapping hash table size " << m_size << " with flags " << mmap_flags_ntoa(flags));
+			void *ptr = mmap_or_die(nullptr, m_size, PROT_READ | PROT_WRITE, flags, flags & MAP_ANONYMOUS ? -1 : int(m_fd), 0);
+			BEESLOG("mmap done in " << map_time << " sec");
+			m_cell_ptr = static_cast<Cell *>(ptr);
+			void *ptr_end = static_cast<uint8_t *>(ptr) + m_size;
+			m_cell_ptr_end = static_cast<Cell *>(ptr_end);
+		});
+	}
+}
+
+void
+BeesHashTable::set_shared(bool shared)
+{
+	m_shared = shared;
+}
+
+BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
+	m_ctx(ctx),
+	m_size(0),
+	m_void_ptr(nullptr),
+	m_void_ptr_end(nullptr),
+	m_buckets(0),
+	m_cells(0),
+	m_writeback_thread("hash_writeback"),
+	m_prefetch_thread("hash_prefetch " + m_ctx->root_path()),
+	m_flush_rate_limit(BEES_FLUSH_RATE),
+	m_prefetch_rate_limit(BEES_FLUSH_RATE),
+	m_stats_file(m_ctx->home_fd(), "beesstats.txt")
+{
+	BEESNOTE("opening hash table " << filename);
+
+	m_fd = openat_or_die(m_ctx->home_fd(), filename, FLAGS_OPEN_FILE_RW, 0700);
+	Stat st(m_fd);
+	m_size = st.st_size;
+
+	BEESTRACE("hash table size " << m_size);
+	BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET);
+	BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT);
+
+	THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0);
+
+	// Does the union work?
+	THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr);
+	THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr);
+	THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr);
+	THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr);
+
+	// There's more than one union
+	THROW_CHECK2(runtime_error, sizeof(Bucket), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket));
+	THROW_CHECK2(runtime_error, sizeof(Bucket::p_byte), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket::p_byte));
+	THROW_CHECK2(runtime_error, sizeof(Extent), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent));
+	THROW_CHECK2(runtime_error, sizeof(Extent::p_byte), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent::p_byte));
+
+	BEESLOG("opened hash table filename '" << filename << "' length " << m_size);
+	m_buckets = m_size / BLOCK_SIZE_HASHTAB_BUCKET;
+	m_cells = m_buckets * c_cells_per_bucket;
+	m_extents = (m_size + BLOCK_SIZE_HASHTAB_EXTENT - 1) / BLOCK_SIZE_HASHTAB_EXTENT;
+	BEESLOG("\tcells " << m_cells << ", buckets " << m_buckets << ", extents " << m_extents);
+
+	BEESLOG("\tflush rate limit " << BEES_FLUSH_RATE);
+
+	if (using_shared_map()) {
+		try_mmap_flags(MAP_SHARED);
+	} else {
+		try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS);
+	}
+
+	if (!m_cell_ptr) {
+		THROW_ERROR(runtime_error, "unable to mmap " << filename);
+	}
+
+	if (!using_shared_map()) {
+		// madvise fails if MAP_SHARED
+		if (using_any_madvise()) {
+			// DONTFORK because we sometimes do fork,
+			// but the child doesn't touch any of the many, many pages
+			BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)");
+			DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK));
+		}
+		for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
+			m_buckets_missing.insert(i);
+		}
+	}
+
+	m_writeback_thread.exec([&]() {
+		writeback_loop();
+        });
+
+	m_prefetch_thread.exec([&]() {
+		prefetch_loop();
+        });
+
+	// Blacklist might fail if the hash table is not stored on a btrfs
+	catch_all([&]() {
+		m_ctx->blacklist_add(BeesFileId(m_fd));
+	});
+
+	// Skip zero because we already weed that out before it gets near a hash function
+	for (unsigned i = 1; i < 256; ++i) {
+		vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
+		HashType hash = Digest::CRC::crc64(v.data(), v.size());
+		m_toxic_hashes.insert(hash);
+	}
+}
+
+BeesHashTable::~BeesHashTable()
+{
+	if (m_cell_ptr && m_size) {
+		flush_dirty_extents();
+		catch_all([&]() {
+			DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
+			m_cell_ptr = nullptr;
+			m_size = 0;
+		});
+	}
+}
+
--- a/src/bees-resolve.cc
+++ b/src/bees-resolve.cc
@@ -0,0 +1,487 @@
+#include "bees.h"
+
+#include "crucible/limits.h"
+#include "crucible/string.h"
+
+using namespace crucible;
+using namespace std;
+
+BeesAddress
+BeesResolver::addr(BeesAddress new_addr)
+{
+	THROW_CHECK1(invalid_argument, new_addr, !new_addr.is_magic());
+
+	m_found_data = false;
+	m_found_dup = false;
+	m_found_hash = false;
+	m_wrong_data = false;
+	m_biors.clear();
+	m_ranges.clear();
+	m_addr = new_addr;
+	m_bior_count = 0;
+
+	auto rv = m_ctx->resolve_addr(m_addr);
+	m_biors = rv.m_biors;
+	m_is_toxic = rv.m_is_toxic;
+	m_bior_count = m_biors.size();
+
+	return m_addr;
+}
+
+BeesResolver::BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress new_addr) :
+	m_ctx(ctx),
+	m_bior_count(0)
+{
+	addr(new_addr);
+}
+
+BeesBlockData
+BeesResolver::adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle)
+{
+	BEESTRACE("Searching for needle " << needle << "\n\tin haystack " << haystack);
+
+	BEESCOUNT(adjust_try);
+
+	// Constraint checks
+	THROW_CHECK1(invalid_argument, needle.begin(), (needle.begin() & BLOCK_MASK_CLONE) == 0);
+	THROW_CHECK1(invalid_argument, haystack.begin(), (haystack.begin() & BLOCK_MASK_CLONE) == 0);
+
+	// Need to know the precise dimensions of the haystack and needle
+	off_t haystack_size = haystack.file_size();
+
+	// If the needle is not a full block then it can only match at EOF
+	off_t needle_len = needle.size();
+	bool is_unaligned_eof = needle_len & BLOCK_MASK_CLONE;
+	BEESTRACE("is_unaligned_eof = " << is_unaligned_eof << ", needle_len = " << to_hex(needle_len) << ", haystack_size = " << to_hex(haystack_size));
+
+	// Unaligned EOF can only match at EOF, so only check there
+	if (is_unaligned_eof) {
+		BEESTRACE("Construct needle_bfr from " << needle);
+		BeesFileRange needle_bfr(needle);
+
+		// Census
+		if (haystack_size & BLOCK_MASK_CLONE) {
+			BEESCOUNT(adjust_eof_haystack);
+		}
+		if (needle_bfr.end() & BLOCK_MASK_CLONE) {
+			BEESCOUNT(adjust_eof_needle);
+		}
+
+		// Non-aligned part of the lengths must be the same
+		if ( (haystack_size & BLOCK_MASK_CLONE) != (needle_bfr.end() & BLOCK_MASK_CLONE) ) {
+			BEESCOUNT(adjust_eof_fail);
+			return BeesBlockData();
+		}
+
+		// Read the haystack block
+		BEESTRACE("Reading haystack (haystack_size = " << to_hex(haystack_size) << ")");
+		BeesBlockData straw(haystack.fd(), haystack_size & ~BLOCK_MASK_CLONE, haystack_size & BLOCK_MASK_CLONE);
+
+		// It either matches or it doesn't
+		BEESTRACE("Verifying haystack " << straw);
+		if (straw.is_data_equal(needle)) {
+			BEESCOUNT(adjust_eof_hit);
+			m_found_data = true;
+			m_found_hash = true;
+			return straw;
+		}
+
+		// Check for matching hash
+		BEESTRACE("Verifying haystack hash");
+		if (straw.hash() == needle.hash()) {
+			// OK at least the hash is still valid
+			m_found_hash = true;
+		}
+
+		BEESCOUNT(adjust_eof_miss);
+		// BEESLOG("adjust_eof_miss " << straw);
+		return BeesBlockData();
+	}
+
+	off_t lower_offset = haystack.begin();
+	off_t upper_offset = haystack.end();
+	bool is_compressed_offset = false;
+	bool is_exact = false;
+	bool is_legacy = false;
+	if (m_addr.is_compressed()) {
+		BtrfsExtentWalker ew(haystack.fd(), haystack.begin(), m_ctx->root_fd());
+		BEESTRACE("haystack extent data " << ew); 
+		Extent e = ew.current();
+		if (m_addr.has_compressed_offset()) {
+			off_t coff = m_addr.get_compressed_offset();
+			if (e.offset() > coff) {
+				// this extent begins after the target block
+				BEESCOUNT(adjust_offset_low);
+				return BeesBlockData();
+			}
+			coff -= e.offset();
+			if (e.size() <= coff) {
+				// this extent ends before the target block
+				BEESCOUNT(adjust_offset_high);
+				return BeesBlockData();
+			}
+			lower_offset = e.begin() + coff;
+			upper_offset = lower_offset + BLOCK_SIZE_CLONE;
+			BEESCOUNT(adjust_offset_hit);
+			is_compressed_offset = true;
+		} else {
+			lower_offset = e.begin();
+			upper_offset = e.end();
+			BEESCOUNT(adjust_legacy);
+			is_legacy = true;
+		}
+	} else {
+		BEESCOUNT(adjust_exact);
+		is_exact = true;
+	}
+
+	BEESTRACE("Checking haystack " << haystack << " offsets " << to_hex(lower_offset) << ".." << to_hex(upper_offset));
+
+	// Check all the blocks in the list
+	for (off_t haystack_offset = lower_offset; haystack_offset < upper_offset; haystack_offset += BLOCK_SIZE_CLONE) {
+		THROW_CHECK1(out_of_range, haystack_offset, (haystack_offset & BLOCK_MASK_CLONE) == 0);
+
+		// Straw cannot extend beyond end of haystack
+		if (haystack_offset + needle.size() > haystack_size) {
+			BEESCOUNT(adjust_needle_too_long);
+			break;
+		}
+
+		// Read the haystack
+		BEESTRACE("straw " << name_fd(haystack.fd()) << ", offset " << to_hex(haystack_offset) << ", length " << needle.size());
+		BeesBlockData straw(haystack.fd(), haystack_offset, needle.size());
+
+		BEESTRACE("straw = " << straw);
+
+		// Stop if we find a match
+		if (straw.is_data_equal(needle)) {
+			BEESCOUNT(adjust_hit);
+			m_found_data = true;
+			m_found_hash = true;
+			if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_correct);
+			if (is_legacy) BEESCOUNT(adjust_legacy_correct);
+			if (is_exact) BEESCOUNT(adjust_exact_correct);
+			return straw;
+		}
+
+		if (straw.hash() != needle.hash()) {
+			// Not the same hash or data, try next block
+			BEESCOUNT(adjust_miss);
+			continue;
+		}
+
+		// Found the hash but not the data.  Yay!
+		m_found_hash = true;
+		BEESLOG("HASH COLLISION\n"
+			<< "\tneedle " << needle << "\n"
+			<< "\tstraw " << straw);
+		BEESCOUNT(hash_collision);
+	}
+
+	// Ran out of offsets to try
+	BEESCOUNT(adjust_no_match);
+	if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_wrong);
+	if (is_legacy) BEESCOUNT(adjust_legacy_wrong);
+	if (is_exact) BEESCOUNT(adjust_exact_wrong);
+	m_wrong_data = true;
+	return BeesBlockData();
+}
+
+BeesFileRange
+BeesResolver::chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd)
+{
+	BEESTRACE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
+	BEESNOTE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
+	BEESCOUNT(chase_try);
+
+	Fd file_fd = m_ctx->roots()->open_root_ino(bior.m_root, bior.m_inum);
+	if (!file_fd) {
+		// Delete snapshots generate craptons of these
+		// BEESINFO("No FD in chase_extent_ref " << bior);
+		BEESCOUNT(chase_no_fd);
+		return BeesFileRange();
+	}
+
+	BEESNOTE("searching at offset " << to_hex(bior.m_offset) << " in file " << name_fd(file_fd) << "\n\tfor " << needle_bbd);
+
+	BEESTRACE("bior file " << name_fd(file_fd));
+	BEESTRACE("get file_addr " << bior);
+	BeesAddress file_addr(file_fd, bior.m_offset, m_ctx);
+	BEESTRACE("file_addr " << file_addr);
+
+	// ...or are we?
+	if (file_addr.is_magic()) {
+		BEESINFO("file_addr is magic: file_addr = " << file_addr << " bior = " << bior << " needle_bbd = " << needle_bbd);
+		BEESCOUNT(chase_wrong_magic);
+		return BeesFileRange();
+	}
+	THROW_CHECK1(invalid_argument, m_addr, !m_addr.is_magic());
+
+	// Did we get the physical block we asked for?  The magic bits have to match too,
+	// but the compressed offset bits do not.
+	if (file_addr.get_physical_or_zero() != m_addr.get_physical_or_zero()) {
+		// BEESINFO("found addr " << file_addr << " at " << name_fd(file_fd) << " offset " << to_hex(bior.m_offset) << " but looking for " << m_addr);
+		// FIEMAP/resolve are working, but the data is old.
+		BEESCOUNT(chase_wrong_addr);
+		return BeesFileRange();
+	}
+
+	// Calculate end of range, which is a sum block or less
+	// It's a sum block because we have to compare content now
+	off_t file_size = Stat(file_fd).st_size;
+	off_t bior_offset = ranged_cast<off_t>(bior.m_offset);
+	off_t end_offset = min(file_size, bior_offset + needle_bbd.size());
+	BeesBlockData haystack_bbd(file_fd, bior_offset, end_offset - bior_offset);
+
+	BEESTRACE("matched haystack_bbd " << haystack_bbd << " file_addr " << file_addr);
+
+	// If the data was compressed and no offset was captured then
+	// we won't get an exact address from resolve.
+	// Search near the resolved address for a matching data block.
+	// ...even if it's not compressed, we should do this sanity
+	// check before considering the block as a duplicate candidate.
+	auto new_bbd = adjust_offset(haystack_bbd, needle_bbd);
+	if (new_bbd.empty()) {
+		// matching offset search failed
+		BEESCOUNT(chase_wrong_data);
+		return BeesFileRange();
+	}
+	if (new_bbd.begin() == haystack_bbd.begin()) {
+		BEESCOUNT(chase_uncorrected);
+	} else {
+		// corrected the bfr
+		BEESCOUNT(chase_corrected);
+		haystack_bbd = new_bbd;
+	}
+
+	// We have found at least one duplicate block, so resolve was a success
+	BEESCOUNT(chase_hit);
+
+	// Matching block
+	BEESTRACE("Constructing dst_bfr { " << BeesFileId(haystack_bbd.fd()) << ", " << to_hex(haystack_bbd.begin()) << ".." << to_hex(haystack_bbd.end()) << " }");
+	BeesFileRange dst_bfr(BeesFileId(haystack_bbd.fd()), haystack_bbd.begin(), haystack_bbd.end());
+
+	return dst_bfr;
+}
+
+void
+BeesResolver::replace_src(const BeesFileRange &src_bfr)
+{
+	BEESTRACE("replace_src src_bfr " << src_bfr);
+	THROW_CHECK0(runtime_error, !m_is_toxic);
+	BEESCOUNT(replacesrc_try);
+
+	// Open src, reuse it for all dst
+	auto i_bfr = src_bfr;
+	BEESNOTE("Opening src bfr " << i_bfr);
+	BEESTRACE("Opening src bfr " << i_bfr);
+	i_bfr.fd(m_ctx);
+
+	BeesBlockData bbd(i_bfr);
+
+	for_each_extent_ref(bbd, [&](const BeesFileRange &j) -> bool {
+		// Open dst
+		auto j_bfr = j;
+		BEESNOTE("Opening dst bfr " << j_bfr);
+		BEESTRACE("Opening dst bfr " << j_bfr);
+		j_bfr.fd(m_ctx);
+
+		if (i_bfr.overlaps(j_bfr)) {
+			BEESCOUNT(replacesrc_overlaps);
+			return false; // i.e. continue
+		}
+
+		// Make pair(src, dst)
+		BEESTRACE("creating brp (" << i_bfr << ", " << j_bfr << ")");
+		BeesRangePair brp(i_bfr, j_bfr);
+		BEESTRACE("Found matching range: " << brp);
+
+		// Extend range at beginning
+		BEESNOTE("Extending matching range: " << brp);
+		// No particular reason to be constrained?
+		if (brp.grow(m_ctx, true)) {
+			BEESCOUNT(replacesrc_grown);
+		}
+
+		// Dedup
+		BEESNOTE("dedup " << brp);
+		if (m_ctx->dedup(brp)) {
+			BEESCOUNT(replacesrc_dedup_hit);
+			m_found_dup = true;
+		} else {
+			BEESCOUNT(replacesrc_dedup_miss);
+		}
+		return false; // i.e. continue
+	});
+}
+
+void
+BeesResolver::find_matches(bool just_one, BeesBlockData &bbd)
+{
+	// Walk through the (ino, offset, root) tuples until we find a match.
+	BEESTRACE("finding all matches for " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
+	THROW_CHECK0(runtime_error, !m_is_toxic);
+	bool stop_now = false;
+	for (auto ino_off_root : m_biors) {
+		if (m_wrong_data) {
+			return;
+		}
+
+		BEESTRACE("ino_off_root " << ino_off_root);
+		BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
+
+		// Silently ignore blacklisted files, e.g. BeesTempFile files
+		if (m_ctx->is_blacklisted(this_fid)) {
+			continue;
+		}
+
+		// Look at the old data
+		catch_all([&]() {
+			BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
+			auto new_range = chase_extent_ref(ino_off_root, bbd);
+			if (new_range) {
+				m_ranges.insert(new_range.copy_closed());
+				stop_now = true;
+			}
+		});
+
+		if (just_one && stop_now) {
+			break;
+		}
+	}
+}
+
+bool
+BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor)
+{
+	// Walk through the (ino, offset, root) tuples until we are told to stop
+	BEESTRACE("for_each_extent_ref " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
+	THROW_CHECK0(runtime_error, !m_is_toxic);
+	bool stop_now = false;
+	for (auto ino_off_root : m_biors) {
+		BEESTRACE("ino_off_root " << ino_off_root);
+		BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
+
+		// Silently ignore blacklisted files, e.g. BeesTempFile files
+		if (m_ctx->is_blacklisted(this_fid)) {
+			continue;
+		}
+
+		// Look at the old data
+		catch_all([&]() {
+			BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
+			auto new_range = chase_extent_ref(ino_off_root, bbd);
+			// XXX: should we catch visitor's exceptions here?
+			if (new_range) {
+				stop_now = visitor(new_range);
+			} else {
+				// We have reliable block addresses now, so we guarantee we can hit the desired block.
+				// Failure in chase_extent_ref means we are done, and don't need to look up all the
+				// other references.
+				stop_now = true;
+			}
+		});
+
+		if (stop_now) {
+			break;
+		}
+	}
+	return stop_now;
+}
+
+BeesFileRange
+BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
+{
+	BEESTRACE("replace_dst dst_bfr " << dst_bfr);
+	BEESCOUNT(replacedst_try);
+
+	// Open dst, reuse it for all src
+	BEESNOTE("Opening dst bfr " << dst_bfr);
+	BEESTRACE("Opening dst bfr " << dst_bfr);
+	dst_bfr.fd(m_ctx);
+
+	BeesFileRange overlap_bfr;
+	BEESTRACE("overlap_bfr " << overlap_bfr);
+
+	BeesBlockData bbd(dst_bfr);
+
+	for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
+		// Open src
+		BEESNOTE("Opening src bfr " << src_bfr);
+		BEESTRACE("Opening src bfr " << src_bfr);
+		src_bfr.fd(m_ctx);
+
+		if (dst_bfr.overlaps(src_bfr)) {
+			BEESCOUNT(replacedst_overlaps);
+			return false; // i.e. continue
+		}
+
+		// If dst is already occupying src, skip.
+		// FIXME: BeesContext::scan_one_extent should be weeding these out, but does not.
+		BeesBlockData src_bbd(src_bfr.fd(), src_bfr.begin(), min(BLOCK_SIZE_SUMS, src_bfr.size()));
+		if (bbd.addr().get_physical_or_zero() == src_bbd.addr().get_physical_or_zero()) {
+			BEESCOUNT(replacedst_same);
+			return false; // i.e. continue
+		}
+
+		// Make pair(src, dst)
+		BEESTRACE("creating brp (" << src_bfr << ", " << dst_bfr << ")");
+		BeesRangePair brp(src_bfr, dst_bfr);
+		BEESTRACE("Found matching range: " << brp);
+
+		// Extend range at beginning
+		BEESNOTE("Extending matching range: " << brp);
+		// 'false' Has nasty loops, and may not be faster.
+		// 'true' At best, keeps fragmentation constant...but can also make it worse
+		if (brp.grow(m_ctx, true)) {
+			BEESCOUNT(replacedst_grown);
+		}
+
+		// Dedup
+		BEESNOTE("dedup " << brp);
+		if (m_ctx->dedup(brp)) {
+			BEESCOUNT(replacedst_dedup_hit);
+			m_found_dup = true;
+			overlap_bfr = brp.second;
+			// FIXME:  find best range first, then dedup that
+			return true; // i.e. break
+		} else {
+			BEESCOUNT(replacedst_dedup_miss);
+			return false; // i.e. continue
+		}
+	});
+	// BEESLOG("overlap_bfr after " << overlap_bfr);
+	return overlap_bfr.copy_closed();
+}
+
+BeesFileRange
+BeesResolver::find_one_match(BeesBlockData &bbd)
+{
+	THROW_CHECK0(runtime_error, !m_is_toxic);
+	find_matches(true, bbd);
+	if (m_ranges.empty()) {
+		return BeesFileRange();
+	} else {
+		return *m_ranges.begin();
+	}
+}
+
+set<BeesFileRange>
+BeesResolver::find_all_matches(BeesBlockData &bbd)
+{
+	THROW_CHECK0(runtime_error, !m_is_toxic);
+	find_matches(false, bbd);
+	return m_ranges;
+}
+
+bool
+BeesResolver::operator<(const BeesResolver &that) const
+{
+	if (that.m_bior_count < m_bior_count) {
+		return true;
+	} else if (m_bior_count < that.m_bior_count) {
+		return false;
+	}
+	return m_addr < that.m_addr;
+}
+
--- a/src/bees-roots.cc
+++ b/src/bees-roots.cc
@@ -0,0 +1,823 @@
+#include "bees.h"
+
+#include "crucible/cache.h"
+#include "crucible/string.h"
+
+#include <fstream>
+#include <tuple>
+
+using namespace crucible;
+using namespace std;
+
+string
+format_time(time_t t)
+{
+	struct tm *tmp = localtime(&t);
+	char buf[1024];
+	strftime(buf, sizeof(buf), "%Y-%m-%d-%H-%M-%S", tmp);
+	return buf;
+}
+
+ostream &
+operator<<(ostream &os, const BeesCrawlState &bcs)
+{
+	time_t now = time(NULL);
+	auto age = now - bcs.m_started;
+	return os << "BeesCrawlState "
+		<< bcs.m_root << ":" << bcs.m_objectid << " offset " << to_hex(bcs.m_offset)
+		<< " transid " << bcs.m_min_transid << ".." << bcs.m_max_transid
+		<< " started " << format_time(bcs.m_started) << " (" << age << "s ago)";
+}
+
+BeesCrawlState::BeesCrawlState() :
+	m_root(0),
+	m_objectid(0),
+	m_offset(0),
+	m_min_transid(0),
+	m_max_transid(0),
+	m_started(time(NULL))
+{
+}
+
+bool
+BeesCrawlState::operator<(const BeesCrawlState &that) const
+{
+	return tie(m_root, m_objectid, m_offset, m_min_transid, m_max_transid)
+		< tie(that.m_root, that.m_objectid, that.m_offset, that.m_min_transid, that.m_max_transid);
+}
+
+string
+BeesRoots::crawl_state_filename() const
+{
+	string rv;
+	rv += "beescrawl.";
+	rv += m_ctx->root_uuid();
+	rv += ".dat";
+	return rv;
+}
+
+void
+BeesRoots::state_save()
+{
+	// Make sure we have a full complement of crawlers
+	insert_new_crawl();
+
+	BEESNOTE("saving crawl state");
+	BEESLOG("Saving crawl state");
+	BEESTOOLONG("Saving crawl state");
+
+	Timer save_time;
+
+	unique_lock<mutex> lock(m_mutex);
+
+	// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
+	ostringstream ofs;
+
+	if (!m_crawl_dirty) {
+		BEESLOG("Nothing to save");
+		return;
+	}
+
+	for (auto i : m_root_crawl_map) {
+		auto ibcs = i.second->get_state();
+		if (ibcs.m_max_transid) {
+			ofs << "root "        << ibcs.m_root                 << " ";
+			ofs << "objectid "    << ibcs.m_objectid             << " ";
+			ofs << "offset "      << ibcs.m_offset               << " ";
+			ofs << "min_transid " << ibcs.m_min_transid          << " ";
+			ofs << "max_transid " << ibcs.m_max_transid          << " ";
+			ofs << "started "     << ibcs.m_started              << " ";
+			ofs << "start_ts "    << format_time(ibcs.m_started) << "\n";
+		}
+	}
+
+	if (ofs.str().empty()) {
+		BEESLOG("Crawl state empty!");
+		m_crawl_dirty = false;
+		return;
+	}
+
+	lock.unlock();
+
+	m_crawl_state_file.write(ofs.str());
+
+	BEESNOTE("relocking crawl state");
+	lock.lock();
+	// Not really correct but probably close enough
+	m_crawl_dirty = false;
+	BEESLOG("Saved crawl state in " << save_time << "s");
+}
+
+BeesCrawlState
+BeesRoots::crawl_state_get(uint64_t rootid)
+{
+	unique_lock<mutex> lock(m_mutex);
+	auto rv = m_root_crawl_map.at(rootid)->get_state();
+	THROW_CHECK2(runtime_error, rv.m_root, rootid, rv.m_root == rootid);
+	return rv;
+}
+
+void
+BeesRoots::crawl_state_set_dirty()
+{
+	unique_lock<mutex> lock(m_mutex);
+	m_crawl_dirty = true;
+}
+
+void
+BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
+{
+	unique_lock<mutex> lock(m_mutex);
+
+	// Do not delete the last entry, it holds our max_transid
+	if (m_root_crawl_map.size() < 2) {
+		BEESCOUNT(crawl_no_empty);
+		return;
+	}
+
+	if (m_root_crawl_map.count(bcs.m_root)) {
+		m_root_crawl_map.erase(bcs.m_root);
+		m_crawl_dirty = true;
+	}
+}
+
+uint64_t
+BeesRoots::transid_min()
+{
+	BEESNOTE("Calculating transid_min");
+	unique_lock<mutex> lock(m_mutex);
+	if (m_root_crawl_map.empty()) {
+		return 0;
+	}
+	uint64_t rv = numeric_limits<uint64_t>::max();
+	for (auto i : m_root_crawl_map) {
+		rv = min(rv, i.second->get_state().m_min_transid);
+	}
+	return rv;
+}
+
+uint64_t
+BeesRoots::transid_max()
+{
+	BEESNOTE("Calculating transid_max");
+	uint64_t rv = 0;
+	uint64_t root = 0;
+	BEESTRACE("Calculating transid_max...");
+	do {
+		root = next_root(root);
+		if (root) {
+			catch_all([&]() {
+				auto transid = btrfs_get_root_transid(open_root(root));
+				rv = max(rv, transid);
+				// BEESLOG("\troot " << root << " transid " << transid << " max " << rv);
+			});
+		}
+	} while (root);
+	return rv;
+}
+
+void
+BeesRoots::crawl_roots()
+{
+	BEESNOTE("Crawling roots");
+
+	unique_lock<mutex> lock(m_mutex);
+	if (m_root_crawl_map.empty()) {
+		BEESNOTE("idle, crawl map is empty");
+		m_condvar.wait(lock);
+		// Don't count the time we were waiting as part of the crawl time
+		m_crawl_timer.reset();
+	}
+
+	// Work from a copy because BeesCrawl might change the world under us
+	auto crawl_map_copy = m_root_crawl_map;
+	lock.unlock();
+
+	BeesFileRange first_range;
+	shared_ptr<BeesCrawl> first_crawl;
+	for (auto i : crawl_map_copy) {
+		auto this_crawl = i.second;
+		auto this_range = this_crawl->peek_front();
+		if (this_range) {
+			auto tuple_this = make_tuple(this_range.fid().ino(), this_range.fid().root(), this_range.begin());
+			auto tuple_first = make_tuple(first_range.fid().ino(), first_range.fid().root(), first_range.begin());
+			if (!first_range || tuple_this < tuple_first) {
+				first_crawl = this_crawl;
+				first_range = this_range;
+			}
+		}
+	}
+
+	if (first_range) {
+		catch_all([&]() {
+			// BEESINFO("scan_forward " << first_range);
+			m_ctx->scan_forward(first_range);
+		});
+		BEESCOUNT(crawl_scan);
+		m_crawl_current = first_crawl->get_state();
+		auto first_range_popped = first_crawl->pop_front();
+		THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped);
+		return;
+	}
+
+	BEESLOG("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more...");
+	BEESCOUNT(crawl_done);
+	BEESNOTE("idle, waiting for more data");
+	lock.lock();
+	m_condvar.wait(lock);
+
+	// Don't count the time we were waiting as part of the crawl time
+	m_crawl_timer.reset();
+}
+
+void
+BeesRoots::crawl_thread()
+{
+	BEESNOTE("crawling");
+	while (1) {
+		catch_all([&]() {
+			crawl_roots();
+		});
+	}
+}
+
+void
+BeesRoots::writeback_thread()
+{
+	while (1) {
+		BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : ""));
+
+		catch_all([&]() {
+			BEESNOTE("saving crawler state");
+			state_save();
+		});
+
+		nanosleep(BEES_WRITEBACK_INTERVAL);
+
+	}
+}
+
+void
+BeesRoots::insert_root(const BeesCrawlState &new_bcs)
+{
+	unique_lock<mutex> lock(m_mutex);
+	if (!m_root_crawl_map.count(new_bcs.m_root)) {
+		auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
+		auto new_pair = make_pair(new_bcs.m_root, new_bcp);
+		m_root_crawl_map.insert(new_pair);
+		m_crawl_dirty = true;
+	}
+}
+
+void
+BeesRoots::insert_new_crawl()
+{
+	BEESNOTE("adding crawlers for new subvols and removing crawlers for removed subvols");
+
+	BeesCrawlState new_bcs;
+	// Avoid a wasted loop iteration by starting from root 5
+	new_bcs.m_root = BTRFS_FS_TREE_OBJECTID;
+	new_bcs.m_min_transid = transid_min();
+	new_bcs.m_max_transid = transid_max();
+
+	unique_lock<mutex> lock(m_mutex);
+	set<uint64_t> excess_roots;
+	for (auto i : m_root_crawl_map) {
+		excess_roots.insert(i.first);
+	}
+	lock.unlock();
+
+	while (new_bcs.m_root) {
+		excess_roots.erase(new_bcs.m_root);
+		insert_root(new_bcs);
+		BEESCOUNT(crawl_create);
+		new_bcs.m_root = next_root(new_bcs.m_root);
+	}
+
+	for (auto i : excess_roots) {
+		new_bcs.m_root = i;
+		crawl_state_erase(new_bcs);
+	}
+
+	// Wake up crawl_roots if sleeping
+	lock.lock();
+	m_condvar.notify_all();
+}
+
+void
+BeesRoots::state_load()
+{
+	BEESNOTE("loading crawl state");
+	BEESLOG("loading crawl state");
+
+	string crawl_data = m_crawl_state_file.read();
+
+	for (auto line : split("\n", crawl_data)) {
+		BEESLOG("Read line: " << line);
+		map<string, uint64_t> d;
+		auto words = split(" ", line);
+		for (auto it = words.begin(); it < words.end(); ++it) {
+			auto it1 = it;
+			++it;
+			THROW_CHECK1(out_of_range, words.size(), it < words.end());
+			string key = *it1;
+			uint64_t val = from_hex(*it);
+			BEESTRACE("key " << key << " val " << val);
+			auto result = d.insert(make_pair(key, val));
+			THROW_CHECK0(runtime_error, result.second);
+		}
+		BeesCrawlState loaded_state;
+		loaded_state.m_root        = d.at("root");
+		loaded_state.m_objectid    = d.at("objectid");
+		loaded_state.m_offset      = d.at("offset");
+		loaded_state.m_min_transid = d.count("gen_current") ? d.at("gen_current") : d.at("min_transid");
+		loaded_state.m_max_transid = d.count("gen_next") ? d.at("gen_next") : d.at("max_transid");
+		if (d.count("started")) {
+			loaded_state.m_started = d.at("started");
+		}
+		BEESLOG("loaded_state " << loaded_state);
+		insert_root(loaded_state);
+	}
+}
+
+BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
+	m_ctx(ctx),
+	m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
+	m_crawl_thread("crawl " + ctx->root_path()),
+	m_writeback_thread("crawl_writeback " + ctx->root_path())
+{
+	m_crawl_thread.exec([&]() {
+		catch_all([&]() {
+			state_load();
+		});
+		m_writeback_thread.exec([&]() {
+			writeback_thread();
+		});
+		crawl_thread();
+	});
+}
+
+Fd
+BeesRoots::open_root_nocache(uint64_t rootid)
+{
+	BEESTRACE("open_root_nocache " << rootid);
+	BEESNOTE("open_root_nocache " << rootid);
+
+	// Stop recursion at the root of the filesystem tree
+	if (rootid == BTRFS_FS_TREE_OBJECTID) {
+		return m_ctx->root_fd();
+	}
+
+	// Find backrefs for this rootid and follow up to root
+	BtrfsIoctlSearchKey sk;
+	sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
+	sk.min_objectid = sk.max_objectid = rootid;
+	sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
+
+	BEESTRACE("sk " << sk);
+	while (sk.min_objectid <= rootid) {
+		sk.nr_items = 1024;
+		sk.do_ioctl(m_ctx->root_fd());
+
+		if (sk.m_result.empty()) {
+			break;
+		}
+
+		for (auto i : sk.m_result) {
+			sk.next_min(i);
+			if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
+				auto dirid = call_btrfs_get(btrfs_stack_root_ref_dirid, i.m_data);
+				auto name_len = call_btrfs_get(btrfs_stack_root_ref_name_len, i.m_data);
+				auto name_start = sizeof(struct btrfs_root_ref);
+				auto name_end = name_len + name_start;
+				THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
+				string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
+
+				auto parent_rootid = i.offset;
+				// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
+				BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
+				Fd parent_fd = open_root(parent_rootid);
+				if (!parent_fd) {
+					BEESLOGTRACE("no parent_fd");
+					continue;
+				}
+
+				if (dirid != BTRFS_FIRST_FREE_OBJECTID) {
+					BEESTRACE("dirid " << dirid << " root " << rootid << " INO_PATH");
+					BtrfsIoctlInoPathArgs ino(dirid);
+					if (!ino.do_ioctl_nothrow(parent_fd)) {
+						BEESINFO("dirid " << dirid << " inode path lookup failed in parent_fd " << name_fd(parent_fd));
+						continue;
+					}
+					if (ino.m_paths.empty()) {
+						BEESINFO("dirid " << dirid << " inode has no paths in parent_fd " << name_fd(parent_fd));
+						continue;
+					}
+					BEESTRACE("dirid " << dirid << " path " << ino.m_paths.at(0));
+					parent_fd = openat(parent_fd, ino.m_paths.at(0).c_str(), FLAGS_OPEN_DIR);
+					if (!parent_fd) {
+						BEESLOGTRACE("no parent_fd from dirid");
+						continue;
+					}
+				}
+				// BEESLOG("openat(" << name_fd(parent_fd) << ", " << name << ")");
+				BEESTRACE("openat(" << name_fd(parent_fd) << ", " << name << ")");
+				Fd rv = openat(parent_fd, name.c_str(), FLAGS_OPEN_DIR);
+				if (!rv) {
+					BEESLOGTRACE("open failed for name " << name);
+					continue;
+				}
+				BEESCOUNT(root_found);
+
+				// Verify correct root ID
+				auto new_root_id = btrfs_get_root_id(rv);
+				THROW_CHECK2(runtime_error, new_root_id, rootid, new_root_id == rootid);
+				Stat st(rv);
+				THROW_CHECK1(runtime_error, st.st_ino, st.st_ino == BTRFS_FIRST_FREE_OBJECTID);
+				BEESINFO("open_root_nocache " << rootid << ": " << name_fd(rv));
+				return rv;
+			}
+		}
+	}
+	BEESINFO("No path for rootid " << rootid);
+	BEESCOUNT(root_notfound);
+	return Fd();
+}
+
+Fd
+BeesRoots::open_root(uint64_t rootid)
+{
+	// Ignore some of the crap that comes out of LOGICAL_INO
+	if (rootid == BTRFS_ROOT_TREE_OBJECTID) {
+		return Fd();
+	}
+
+	return m_ctx->fd_cache()->open_root(m_ctx, rootid);
+}
+
+
+uint64_t
+BeesRoots::next_root(uint64_t root)
+{
+	BEESNOTE("Next root from " << root);
+	BEESTRACE("Next root from " << root);
+
+	// BTRFS_FS_TREE_OBJECTID has no backref keys so we can't find it that way
+	if (root < BTRFS_FS_TREE_OBJECTID) {
+		// BEESLOG("First root is BTRFS_FS_TREE_OBJECTID = " << BTRFS_FS_TREE_OBJECTID);
+		return BTRFS_FS_TREE_OBJECTID;
+	}
+
+	BtrfsIoctlSearchKey sk;
+	sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
+	sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
+	sk.min_objectid = root + 1;
+
+	while (true) {
+		sk.nr_items = 1024;
+		sk.do_ioctl(m_ctx->root_fd());
+
+		if (sk.m_result.empty()) {
+			return 0;
+		}
+
+		for (auto i : sk.m_result) {
+			sk.next_min(i);
+			if (i.type == BTRFS_ROOT_BACKREF_KEY) {
+				// BEESLOG("Found root " << i.objectid << " parent " << i.offset);
+				return i.objectid;
+			}
+		}
+	}
+}
+
+Fd
+BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
+{
+	BEESTRACE("opening root " << root << " ino " << ino);
+
+	Fd root_fd = open_root(root);
+	if (!root_fd) {
+		return root_fd;
+	}
+
+	BEESTOOLONG("open_root_ino(root " << root << ", ino " << ino << ")");
+
+	BEESTRACE("looking up ino " << ino);
+	BtrfsIoctlInoPathArgs ipa(ino);
+	if (!ipa.do_ioctl_nothrow(root_fd)) {
+		BEESINFO("Lookup root " << root << " ino " << ino << " failed: " << strerror(errno));
+		return Fd();
+	}
+
+	BEESTRACE("searching paths for root " << root << " ino " << ino);
+	Fd rv;
+	if (ipa.m_paths.empty()) {
+		BEESLOG("No paths for root " << root << " ino " << ino);
+	}
+	for (auto file_path : ipa.m_paths) {
+		BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
+		BEESCOUNT(open_file);
+		// Try to open file RW, fall back to RO
+		const char *fp_cstr = file_path.c_str();
+		rv = openat(root_fd, fp_cstr, FLAGS_OPEN_FILE);
+		if (!rv) {
+			BEESCOUNT(open_fail);
+			// errno == ENOENT is common during snapshot delete, ignore it
+			if (errno != ENOENT) {
+				BEESLOG("Could not open path '" << file_path << "' at root " << root << " " << name_fd(root_fd) << ": " << strerror(errno));
+				BEESNOTE("ipa" << ipa);
+			}
+			continue;
+		}
+
+		// Correct inode?
+		Stat file_stat(rv);
+		if (file_stat.st_ino != ino) {
+			BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong inode " << file_stat.st_ino << " instead of " << ino);
+			rv = Fd();
+			BEESCOUNT(open_wrong_ino);
+			break;
+		}
+
+		// Correct root?
+		auto file_root = btrfs_get_root_id(rv);
+		if (file_root != root) {
+			BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong root " << file_root << " instead of " << root);
+			rv = Fd();
+			BEESCOUNT(open_wrong_root);
+			break;
+		}
+
+		// Same filesystem?
+		Stat root_stat(root_fd);
+		if (root_stat.st_dev != file_stat.st_dev) {
+			BEESLOG("Opening root " << name_fd(root_fd) << " path " << file_path << " found path st_dev " << file_stat.st_dev << " but root st_dev is " << root_stat.st_dev);
+			rv = Fd();
+			BEESCOUNT(open_wrong_dev);
+			break;
+		}
+
+		BEESTRACE("mapped " << BeesFileId(root, ino));
+		BEESTRACE("\tto " << name_fd(rv));
+		BEESCOUNT(open_hit);
+		return rv;
+	}
+
+	// Odd, we didn't find a path.
+	return Fd();
+}
+
+Fd
+BeesRoots::open_root_ino(uint64_t root, uint64_t ino)
+{
+	return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino);
+}
+
+BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) :
+	m_ctx(ctx),
+	m_state(initial_state)
+{
+}
+
+bool
+BeesCrawl::next_transid()
+{
+	// If this crawl is recently empty, quickly and _silently_ bail out
+	auto current_time = time(NULL);
+	auto crawl_state = get_state();
+	auto elapsed_time = current_time - crawl_state.m_started;
+	if (elapsed_time < BEES_COMMIT_INTERVAL) {
+		if (!m_deferred) {
+			BEESLOG("Deferring next transid in " << get_state());
+		}
+		m_deferred = true;
+		BEESCOUNT(crawl_defer);
+		return false;
+	}
+
+	// Log performance stats from the old crawl
+	BEESLOG("Next transid in " << get_state());
+
+	// Start new crawl
+	m_deferred = false;
+	auto roots = m_ctx->roots();
+	crawl_state.m_min_transid = crawl_state.m_max_transid;
+	crawl_state.m_max_transid = roots->transid_max();
+	crawl_state.m_objectid = 0;
+	crawl_state.m_offset = 0;
+	crawl_state.m_started = current_time;
+	BEESLOG("Restarting crawl " << get_state());
+	BEESCOUNT(crawl_restart);
+	set_state(crawl_state);
+	return true;
+}
+
+bool
+BeesCrawl::fetch_extents()
+{
+	THROW_CHECK1(runtime_error, m_extents.size(), m_extents.empty());
+
+	auto old_state = get_state();
+	if (m_deferred || old_state.m_max_transid <= old_state.m_min_transid) {
+		BEESTRACE("Nothing to crawl in " << get_state());
+		return next_transid();
+	}
+
+	BEESNOTE("crawling " << get_state());
+	BEESLOG("Crawling " << get_state());
+
+	Timer crawl_timer;
+
+	BtrfsIoctlSearchKey sk;
+	sk.tree_id = old_state.m_root;
+	sk.min_objectid = old_state.m_objectid;
+	sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
+	sk.min_offset = old_state.m_offset;
+	sk.min_transid = old_state.m_min_transid;
+	sk.max_transid = old_state.m_max_transid;
+	sk.nr_items = BEES_MAX_CRAWL_SIZE;
+
+	// Lock in the old state
+	set_state(old_state);
+
+	BEESTRACE("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
+	bool ioctl_ok = false;
+	{
+		BEESNOTE("searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
+		BEESTOOLONG("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
+		ioctl_ok = sk.do_ioctl_nothrow(m_ctx->root_fd());
+	}
+
+	if (ioctl_ok) {
+		BEESCOUNT(crawl_search);
+	} else {
+		BEESLOG("Search ioctl failed: " << strerror(errno));
+		BEESCOUNT(crawl_fail);
+	}
+
+	if (!ioctl_ok || sk.m_result.empty()) {
+		BEESCOUNT(crawl_empty);
+		BEESLOG("Crawl empty " << get_state());
+		return next_transid();
+	}
+
+	BEESLOG("Crawling " << sk.m_result.size() << " results from " << get_state());
+	auto results_left = sk.m_result.size();
+	BEESNOTE("crawling " << results_left << " results from " << get_state());
+	size_t count_other = 0;
+	size_t count_inline = 0;
+	size_t count_unknown = 0;
+	size_t count_data = 0;
+	size_t count_low = 0;
+	size_t count_high = 0;
+	BeesFileRange last_bfr;
+	for (auto i : sk.m_result) {
+		sk.next_min(i);
+		--results_left;
+		BEESCOUNT(crawl_items);
+
+		BEESTRACE("i = " << i);
+
+#if 1
+		// We need the "+ 1" and objectid rollover that next_min does.
+		auto new_state = get_state();
+		new_state.m_objectid = sk.min_objectid;
+		new_state.m_offset = sk.min_offset;
+
+		// Saving state here means we can skip a search result
+		// if we are interrupted.  Not saving state here means we
+		// can fail to make forward progress in cases where there
+		// is a lot of metadata we can't process.  Favor forward
+		// progress over losing search results.
+		set_state(new_state);
+#endif
+
+		// Ignore things that aren't EXTENT_DATA_KEY
+		if (i.type != BTRFS_EXTENT_DATA_KEY) {
+			++count_other;
+			BEESCOUNT(crawl_nondata);
+			continue;
+		}
+
+		auto gen = call_btrfs_get(btrfs_stack_file_extent_generation, i.m_data);
+		if (gen < get_state().m_min_transid) {
+			BEESCOUNT(crawl_gen_low);
+			++count_low;
+			// We probably want (need?) to scan these anyway.
+			// continue;
+		}
+		if (gen > get_state().m_max_transid) {
+			BEESCOUNT(crawl_gen_high);
+			++count_high;
+			// This shouldn't ever happen
+			// continue;
+		}
+
+		auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data);
+		switch (type) {
+			default:
+				BEESINFO("Unhandled file extent type " << type << " in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
+				++count_unknown;
+				BEESCOUNT(crawl_unknown);
+				break;
+			case BTRFS_FILE_EXTENT_INLINE:
+				// Ignore these for now.
+				// BEESINFO("Ignored file extent type INLINE in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
+				++count_inline;
+				// TODO:  replace with out-of-line dup extents
+				BEESCOUNT(crawl_inline);
+				break;
+			case BTRFS_FILE_EXTENT_PREALLOC:
+				BEESCOUNT(crawl_prealloc);
+			case BTRFS_FILE_EXTENT_REG: {
+				auto physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data);
+				auto ram = call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data);
+				auto len = call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data);
+				auto offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data);
+				BEESTRACE("Root " << get_state().m_root << " ino " << i.objectid << " physical " << to_hex(physical)
+					<< " logical " << to_hex(i.offset) << ".." << to_hex(i.offset + len)
+					<< " gen " << gen);
+				++count_data;
+				if (physical) {
+					THROW_CHECK1(runtime_error, ram, ram > 0);
+					THROW_CHECK1(runtime_error, len, len > 0);
+					THROW_CHECK2(runtime_error, offset, ram, offset < ram);
+					BeesFileId bfi(get_state().m_root, i.objectid);
+					if (m_ctx->is_blacklisted(bfi)) {
+						BEESCOUNT(crawl_blacklisted);
+					} else {
+						BeesFileRange bfr(bfi, i.offset, i.offset + len);
+						// BEESNOTE("pushing bfr " << bfr << " limit " << BEES_MAX_QUEUE_SIZE);
+						m_extents.insert(bfr);
+						BEESCOUNT(crawl_push);
+					}
+				} else {
+					BEESCOUNT(crawl_hole);
+				}
+				break;
+			}
+		}
+	}
+	BEESLOG("Crawled inline " << count_inline << " data " << count_data << " other " << count_other << " unknown " << count_unknown << " gen_low " << count_low << " gen_high " << count_high << " " << get_state() << " in " << crawl_timer << "s");
+
+	return true;
+}
+
+void
+BeesCrawl::fetch_extents_harder()
+{
+	BEESNOTE("fetch_extents_harder " << get_state() << " with " << m_extents.size() << " extents");
+	while (m_extents.empty()) {
+		bool progress_made = fetch_extents();
+		if (!progress_made) {
+			return;
+		}
+	}
+}
+
+BeesFileRange
+BeesCrawl::peek_front()
+{
+	unique_lock<mutex> lock(m_mutex);
+	fetch_extents_harder();
+	if (m_extents.empty()) {
+		return BeesFileRange();
+	}
+	return *m_extents.begin();
+}
+
+BeesFileRange
+BeesCrawl::pop_front()
+{
+	unique_lock<mutex> lock(m_mutex);
+	fetch_extents_harder();
+	if (m_extents.empty()) {
+		return BeesFileRange();
+	}
+	auto rv = *m_extents.begin();
+	m_extents.erase(m_extents.begin());
+#if 0
+	auto state = get_state();
+	state.m_objectid = rv.fid().ino();
+	state.m_offset = rv.begin();
+	set_state(state);
+#endif
+	return rv;
+}
+
+BeesCrawlState
+BeesCrawl::get_state()
+{
+	unique_lock<mutex> lock(m_state_mutex);
+	return m_state;
+}
+
+void
+BeesCrawl::set_state(const BeesCrawlState &bcs)
+{
+	unique_lock<mutex> lock(m_state_mutex);
+	m_state = bcs;
+	lock.unlock();
+	m_ctx->roots()->crawl_state_set_dirty();
+}
--- a/src/bees-thread.cc
+++ b/src/bees-thread.cc
@@ -0,0 +1,91 @@
+#include "bees.h"
+
+using namespace crucible;
+using namespace std;
+
+BeesThread::BeesThread(string name) :
+	m_name(name)
+{
+	THROW_CHECK1(invalid_argument, name, !name.empty());
+}
+
+void
+BeesThread::exec(function<void()> func)
+{
+	m_timer.reset();
+	BEESLOG("BeesThread exec " << m_name);
+	m_thread_ptr = make_shared<thread>([=]() {
+		BEESLOG("Starting thread " << m_name);
+		BeesNote::set_name(m_name);
+		BEESNOTE("thread function");
+		Timer thread_time;
+		catch_all([&]() {
+			DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_name.c_str()));
+		});
+		catch_all([&]() {
+			func();
+		});
+		BEESLOG("Exiting thread " << m_name << ", " << thread_time << " sec");
+	});
+}
+
+BeesThread::BeesThread(string name, function<void()> func) :
+	m_name(name)
+{
+	THROW_CHECK1(invalid_argument, name, !name.empty());
+	BEESLOG("BeesThread construct " << m_name);
+	exec(func);
+}
+
+void
+BeesThread::join()
+{
+	if (!m_thread_ptr) {
+		BEESLOG("Thread " << m_name << " no thread ptr");
+		return;
+	}
+
+	BEESLOG("BeesThread::join " << m_name);
+	if (m_thread_ptr->joinable()) {
+		BEESLOG("Joining thread " << m_name);
+		Timer thread_time;
+		m_thread_ptr->join();
+		BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
+	} else if (!m_name.empty()) {
+		BEESLOG("BeesThread " << m_name << " not joinable");
+	} else {
+		BEESLOG("BeesThread else " << m_name);
+	}
+}
+
+void
+BeesThread::set_name(const string &name)
+{
+	m_name = name;
+}
+
+BeesThread::~BeesThread()
+{
+	if (!m_thread_ptr) {
+		BEESLOG("Thread " << m_name << " no thread ptr");
+		return;
+	}
+
+	BEESLOG("BeesThread destructor " << m_name);
+	if (m_thread_ptr->joinable()) {
+		BEESLOG("Cancelling thread " << m_name);
+		int rv = pthread_cancel(m_thread_ptr->native_handle());
+		if (rv) {
+			BEESLOG("pthread_cancel returned " << strerror(-rv));
+		}
+		BEESLOG("Waiting for thread " << m_name);
+		Timer thread_time;
+		m_thread_ptr->join();
+		BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
+	} else if (!m_name.empty()) {
+		BEESLOG("Thread " << m_name << " not joinable");
+	} else {
+		BEESLOG("Thread destroy else " << m_name);
+	}
+}
+
--- a/src/bees-types.cc
+++ b/src/bees-types.cc
--- a/src/bees.cc
+++ b/src/bees.cc
@@ -0,0 +1,599 @@
+#include "bees.h"
+
+#include "crucible/interp.h"
+#include "crucible/limits.h"
+#include "crucible/process.h"
+#include "crucible/string.h"
+
+#include <cctype>
+#include <cmath>
+
+#include <iostream>
+#include <memory>
+
+// PRIx64
+#include <inttypes.h>
+
+#include <sched.h>
+#include <sys/fanotify.h>
+
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+
+using namespace crucible;
+using namespace std;
+
+int
+do_cmd_help(const ArgList &argv)
+{
+	cerr << "Usage: " << argv[0] << " fs-root-path [fs-root-path-2...]\n"
+		"Performs best-effort extent-same deduplication on btrfs.\n"
+		"\n"
+		"fs-root-path MUST be the root of a btrfs filesystem tree (id 5).\n"
+		"Other directories will be rejected.\n"
+		"\n"
+		"Multiple filesystems can share a single hash table (BEESHOME)\n"
+		"but this only works well if the content of each filesystem\n"
+		"is distinct from all the others.\n"
+		"\n"
+		"Required environment variables:\n"
+		"\tBEESHOME\tPath to hash table and configuration files\n"
+		"\n"
+		"Optional environment variables:\n"
+		"\tBEESSTATUS\tFile to write status to (tmpfs recommended, e.g. /run)\n"
+		"\n"
+	<< endl;
+	return 0;
+}
+
+// tracing ----------------------------------------
+
+RateLimiter bees_info_rate_limit(BEES_INFO_RATE, BEES_INFO_BURST);
+
+thread_local BeesTracer *BeesTracer::s_next_tracer = nullptr;
+
+BeesTracer::~BeesTracer()
+{
+	if (uncaught_exception()) {
+		m_func();
+		if (!m_next_tracer) {
+			BEESLOG("---  END  TRACE --- exception ---");
+		}
+	}
+	s_next_tracer = m_next_tracer;
+}
+
+BeesTracer::BeesTracer(function<void()> f) :
+	m_func(f)
+{
+	m_next_tracer = s_next_tracer;
+	s_next_tracer = this;
+}
+
+void
+BeesTracer::trace_now()
+{
+	BeesTracer *tp = s_next_tracer;
+	BEESLOG("--- BEGIN TRACE ---");
+	while (tp) {
+		tp->m_func();
+		tp = tp->m_next_tracer;
+	}
+	BEESLOG("---  END  TRACE ---");
+}
+
+thread_local BeesNote *BeesNote::s_next = nullptr;
+mutex BeesNote::s_mutex;
+map<pid_t, BeesNote*> BeesNote::s_status;
+thread_local string BeesNote::s_name;
+
+BeesNote::~BeesNote()
+{
+	unique_lock<mutex> lock(s_mutex);
+	s_next = m_prev;
+	if (s_next) {
+		s_status[gettid()] = s_next;
+	} else {
+		s_status.erase(gettid());
+	}
+}
+
+BeesNote::BeesNote(function<void(ostream &os)> f) :
+	m_func(f)
+{
+	unique_lock<mutex> lock(s_mutex);
+	m_name = s_name;
+	m_prev = s_next;
+	s_next = this;
+	s_status[gettid()] = s_next;
+}
+
+void
+BeesNote::set_name(const string &name)
+{
+	unique_lock<mutex> lock(s_mutex);
+	s_name = name;
+}
+
+string
+BeesNote::get_name()
+{
+	unique_lock<mutex> lock(s_mutex);
+	if (s_name.empty()) {
+		return "bees";
+	} else {
+		return s_name;
+	}
+}
+
+BeesNote::ThreadStatusMap
+BeesNote::get_status()
+{
+	unique_lock<mutex> lock(s_mutex);
+	ThreadStatusMap rv;
+	for (auto t : s_status) {
+		ostringstream oss;
+		if (!t.second->m_name.empty()) {
+			oss << t.second->m_name << ": ";
+		}
+		if (t.second->m_timer.age() > BEES_TOO_LONG) {
+			oss << "[" << t.second->m_timer << "s] ";
+		}
+		t.second->m_func(oss);
+		rv[t.first] = oss.str();
+	}
+	return rv;
+}
+
+// static inline helpers ----------------------------------------
+
+static inline
+bool
+bees_addr_check(uint64_t v)
+{
+	return !(v & (1ULL << 63));
+}
+
+static inline
+bool
+bees_addr_check(int64_t v)
+{
+	return !(v & (1ULL << 63));
+}
+
+string
+pretty(double d)
+{
+	static const char * units[] = { "", "K", "M", "G", "T", "P", "E" };
+	static const char * *units_stop = units + sizeof(units) / sizeof(units[0]) - 1;
+	const char * *unit = units;
+	while (d >= 1024 && unit < units_stop) {
+		d /= 1024;
+		++unit;
+	}
+	ostringstream oss;
+	oss << (round(d * 1000.0) / 1000.0) << *unit;
+	return oss.str();
+}
+
+// ostream operators ----------------------------------------
+
+template <class T>
+ostream &
+operator<<(ostream &os, const BeesStatTmpl<T> &bs)
+{
+	unique_lock<mutex> lock(bs.m_mutex);
+	bool first = true;
+	string last_tag;
+	for (auto i : bs.m_stats_map) {
+		if (i.second == 0) {
+			continue;
+		}
+		string tag = i.first.substr(0, i.first.find_first_of("_"));
+		if (!last_tag.empty() && tag != last_tag) {
+			os << "\n\t";
+		} else if (!first) {
+			os << " ";
+		}
+		last_tag = tag;
+		first = false;
+		os << i.first << "=" << i.second;
+	}
+	return os;
+}
+
+// other ----------------------------------------
+
+template <class T>
+T&
+BeesStatTmpl<T>::at(string idx)
+{
+	unique_lock<mutex> lock(m_mutex);
+    if (!m_stats_map.count(idx)) {
+		m_stats_map[idx] = 0;
+	}
+    return m_stats_map[idx];
+}
+
+template <class T>
+T
+BeesStatTmpl<T>::at(string idx) const
+{
+	unique_lock<mutex> lock(m_mutex);
+    return m_stats_map.at(idx);
+}
+
+template <class T>
+void
+BeesStatTmpl<T>::add_count(string idx, size_t amount)
+{
+	unique_lock<mutex> lock(m_mutex);
+    if (!m_stats_map.count(idx)) {
+		m_stats_map[idx] = 0;
+	}
+	m_stats_map.at(idx) += amount;
+}
+
+template <class T>
+BeesStatTmpl<T>::BeesStatTmpl(const BeesStatTmpl &that)
+{
+	if (&that == this) return;
+	unique_lock<mutex> lock(m_mutex);
+	unique_lock<mutex> lock2(that.m_mutex);
+	m_stats_map = that.m_stats_map;
+}
+
+template <class T>
+BeesStatTmpl<T> &
+BeesStatTmpl<T>::operator=(const BeesStatTmpl<T> &that)
+{
+	if (&that == this) return *this;
+	unique_lock<mutex> lock(m_mutex);
+	unique_lock<mutex> lock2(that.m_mutex);
+	m_stats_map = that.m_stats_map;
+	return *this;
+}
+
+BeesStats BeesStats::s_global;
+
+BeesStats
+BeesStats::operator-(const BeesStats &that) const
+{
+	if (&that == this) return BeesStats();
+	unique_lock<mutex> this_lock(m_mutex);
+	BeesStats this_copy;
+	this_copy.m_stats_map = m_stats_map;
+	unique_lock<mutex> that_lock(that.m_mutex);
+	BeesStats that_copy;
+	that_copy.m_stats_map = that.m_stats_map;
+	this_lock.unlock();
+	that_lock.unlock();
+	for (auto i : that.m_stats_map) {
+		if (i.second != 0) {
+			this_copy.at(i.first) -= i.second;
+		}
+	}
+	return this_copy;
+}
+
+BeesRates
+BeesStats::operator/(double d) const
+{
+	BeesRates rv;
+	unique_lock<mutex> lock(m_mutex);
+	for (auto i : m_stats_map) {
+		rv.m_stats_map[i.first] = ceil(i.second / d * 1000) / 1000;
+	}
+	return rv;
+}
+
+BeesStats::operator bool() const
+{
+	unique_lock<mutex> lock(m_mutex);
+	for (auto i : m_stats_map) {
+		if (i.second != 0) {
+			return true;
+		}
+	}
+	return false;
+}
+
+BeesTooLong::BeesTooLong(const string &s, double limit) :
+	m_limit(limit),
+	m_func([s](ostream &os) { os << s; })
+{
+}
+
+BeesTooLong::BeesTooLong(const func_type &func, double limit) :
+	m_limit(limit),
+	m_func(func)
+{
+}
+
+void
+BeesTooLong::check() const
+{
+	if (age() > m_limit) {
+		ostringstream oss;
+		m_func(oss);
+		BEESLOG("PERFORMANCE: " << *this << " sec: " << oss.str());
+	}
+}
+
+BeesTooLong::~BeesTooLong()
+{
+	check();
+}
+
+BeesTooLong &
+BeesTooLong::operator=(const func_type &f)
+{
+	m_func = f;
+	return *this;
+}
+
+void
+bees_sync(int fd)
+{
+	Timer sync_timer;
+	BEESNOTE("syncing " << name_fd(fd));
+	BEESTOOLONG("syncing " << name_fd(fd));
+	DIE_IF_NON_ZERO(fsync(fd));
+	BEESCOUNT(sync_count);
+	BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
+}
+
+BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
+	m_dir_fd(dir_fd),
+	m_name(name),
+	m_limit(limit)
+{
+	BEESLOG("BeesStringFile " << name_fd(m_dir_fd) << "/" << m_name << " max size " << pretty(m_limit));
+}
+
+string
+BeesStringFile::read()
+{
+	BEESNOTE("opening " << m_name << " in " << name_fd(m_dir_fd));
+	Fd fd(openat(m_dir_fd, m_name.c_str(), FLAGS_OPEN_FILE));
+	if (!fd) {
+		return string();
+	}
+
+	BEESNOTE("sizing " << m_name << " in " << name_fd(m_dir_fd));
+	Stat st(fd);
+	THROW_CHECK1(out_of_range, st.st_size, st.st_size > 0);
+	THROW_CHECK1(out_of_range, st.st_size, st.st_size < ranged_cast<off_t>(m_limit));
+
+	BEESNOTE("reading " << m_name << " in " << name_fd(m_dir_fd));
+	return read_string(fd, st.st_size);
+}
+
+void
+BeesStringFile::write(string contents)
+{
+	THROW_CHECK2(out_of_range, contents.size(), m_limit, contents.size() < m_limit);
+	auto tmpname = m_name + ".tmp";
+
+	BEESNOTE("unlinking " << tmpname << " in " << name_fd(m_dir_fd));
+	unlinkat(m_dir_fd, tmpname.c_str(), 0);
+	// ignore error
+
+	BEESNOTE("closing " << tmpname << " in " << name_fd(m_dir_fd));
+	{
+		Fd ofd = openat_or_die(m_dir_fd, tmpname, FLAGS_CREATE_FILE, S_IRUSR | S_IWUSR);
+		BEESNOTE("writing " << tmpname << " in " << name_fd(m_dir_fd));
+		write_or_die(ofd, contents);
+		BEESNOTE("fsyncing " << tmpname << " in " << name_fd(m_dir_fd));
+		DIE_IF_NON_ZERO(fsync(ofd));
+	}
+	BEESNOTE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
+	BEESTRACE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
+	renameat_or_die(m_dir_fd, tmpname, m_dir_fd, m_name);
+}
+
+void
+BeesTempFile::create()
+{
+	// BEESLOG("creating temporary file in " << m_ctx->root_path());
+	BEESNOTE("creating temporary file in " << m_ctx->root_path());
+	BEESTOOLONG("creating temporary file in " << m_ctx->root_path());
+
+	DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR));
+	BEESCOUNT(tmp_create);
+
+	// Can't reopen this file, so don't allow any resolves there
+	// Resolves won't work there anyway.  There are lots of tempfiles
+	// and they're short-lived, so this ends up being just a memory leak
+	// m_ctx->blacklist_add(BeesFileId(m_fd));
+	m_ctx->insert_root_ino(m_fd);
+
+	// Set compression attribute
+	int flags = 0;
+	BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
+	DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_GETFLAGS, &flags));
+	flags |= FS_COMPR_FL;
+	BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
+	DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_SETFLAGS, &flags));
+
+	// Always leave first block empty to avoid creating a file with an inline extent
+	m_end_offset = BLOCK_SIZE_CLONE;
+}
+
+void
+BeesTempFile::resize(off_t offset)
+{
+	BEESTOOLONG("Resizing temporary file to " << to_hex(offset));
+	BEESNOTE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
+	BEESTRACE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
+
+	// Ensure that file covers m_end_offset..offset
+	THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset);
+
+	// Truncate
+	DIE_IF_NON_ZERO(ftruncate(m_fd, offset));
+	BEESCOUNT(tmp_resize);
+
+	// Success
+	m_end_offset = offset;
+}
+
+BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
+	m_ctx(ctx),
+	m_end_offset(0)
+{
+	create();
+}
+
+void
+BeesTempFile::realign()
+{ 
+	if (m_end_offset > BLOCK_SIZE_MAX_TEMP_FILE) {
+		BEESLOG("temporary file size " << to_hex(m_end_offset) << " > max " << BLOCK_SIZE_MAX_TEMP_FILE);
+		BEESCOUNT(tmp_trunc);
+		return create();
+	}
+	if (m_end_offset & BLOCK_MASK_CLONE) {
+		// BEESTRACE("temporary file size " << to_hex(m_end_offset) << " not aligned");
+		BEESCOUNT(tmp_realign);
+		return create();
+	}
+	// OK as is
+	BEESCOUNT(tmp_aligned);
+}
+
+BeesFileRange
+BeesTempFile::make_hole(off_t count)
+{
+	THROW_CHECK1(invalid_argument, count, count > 0);
+	realign();
+
+	BEESTRACE("make hole at " << m_end_offset);
+
+	auto end = m_end_offset + count;
+	BeesFileRange rv(m_fd, m_end_offset, end);
+
+	resize(end);
+
+	BEESTRACE("created temporary hole " << rv);
+	BEESCOUNT(tmp_hole);
+	return rv;
+}
+
+BeesFileRange
+BeesTempFile::make_copy(const BeesFileRange &src)
+{
+	BEESLOG("copy: " << src);
+	BEESNOTE("Copying " << src);
+	BEESTRACE("Copying " << src);
+
+	THROW_CHECK1(invalid_argument, src, src.size() > 0);
+
+	// FIXME:  don't know where these come from, but we can't handle them.
+	// Grab a trace for the log.
+	THROW_CHECK1(invalid_argument, src, src.size() < BLOCK_SIZE_MAX_TEMP_FILE);
+
+	realign();
+
+	auto begin = m_end_offset;
+	auto end = m_end_offset + src.size();
+	resize(end);
+
+	BeesFileRange rv(m_fd, begin, end);
+	BEESTRACE("copying to: " << rv);
+	BEESNOTE("copying " << src << " to " << rv);
+
+	auto src_p = src.begin();
+	auto dst_p = begin;
+
+	bool did_block_write = false;
+	while (dst_p < end) {
+		auto len = min(BLOCK_SIZE_CLONE, end - dst_p);
+		BeesBlockData bbd(src.fd(), src_p, len);
+		// Don't fill in holes
+		if (bbd.is_data_zero()) {
+			BEESCOUNT(tmp_block_zero);
+		} else {
+			BEESNOTE("copying " << src << " to " << rv << "\n"
+				"\tpwrite " << bbd << " to " << name_fd(m_fd) << " offset " << to_hex(dst_p) << " len " << len);
+			pwrite_or_die(m_fd, bbd.data().data(), len, dst_p);
+			did_block_write = true;
+			BEESCOUNT(tmp_block);
+			BEESCOUNTADD(tmp_bytes, len);
+		}
+		src_p += len;
+		dst_p += len;
+	}
+
+	// We seem to get lockups without this!
+	if (did_block_write) {
+		bees_sync(m_fd);
+	}
+
+	BEESCOUNT(tmp_copy);
+	return rv;
+}
+
+int
+bees_main(ArgList args)
+{
+	set_catch_explainer([&](string s) {
+		BEESLOG("\n\n*** EXCEPTION ***\n\t" << s << "\n***\n");
+		BEESCOUNT(exception_caught);
+	});
+
+	BEESNOTE("main");
+	BeesNote::set_name("main");
+
+	list<shared_ptr<BeesContext>> all_contexts;
+	shared_ptr<BeesContext> bc;
+
+	// Subscribe to fanotify events
+	bool did_subscription = false;
+	for (string arg : args) {
+		catch_all([&]() {
+			bc = make_shared<BeesContext>(bc);
+			bc->set_root_path(arg);
+			did_subscription = true;
+		});
+	}
+
+	if (!did_subscription) {
+		BEESLOG("WARNING: no filesystems added");
+	}
+
+	BeesThread status_thread("status", [&]() {
+		bc->dump_status();
+	});
+
+	// Now we just wait forever
+	bc->show_progress();
+
+	// That is all.
+	return 0;
+}
+
+int
+main(int argc, const char **argv)
+{
+	if (argc < 2) {
+		do_cmd_help(argv);
+		return 2;
+	}
+
+	ArgList args(argv + 1);
+
+	int rv = 1;
+	catch_and_explain([&]() {
+		rv = bees_main(args);
+	});
+	return rv;
+}
+
+// instantiate templates for linkage ----------------------------------------
+
+template class BeesStatTmpl<uint64_t>;
+template ostream & operator<<(ostream &os, const BeesStatTmpl<uint64_t> &bs);
+
+template class BeesStatTmpl<double>;
+template ostream & operator<<(ostream &os, const BeesStatTmpl<double> &bs);
--- a/src/bees.h
+++ b/src/bees.h
@@ -0,0 +1,828 @@
+#ifndef BEES_H
+#define BEES_H
+
+#include "crucible/bool.h"
+#include "crucible/cache.h"
+#include "crucible/chatter.h"
+#include "crucible/error.h"
+#include "crucible/extentwalker.h"
+#include "crucible/fd.h"
+#include "crucible/fs.h"
+#include "crucible/lockset.h"
+#include "crucible/time.h"
+#include "crucible/timequeue.h"
+#include "crucible/workqueue.h"
+
+#include <array>
+#include <functional>
+#include <list>
+#include <mutex>
+#include <string>
+#include <thread>
+
+#include <endian.h>
+
+using namespace crucible;
+using namespace std;
+
+// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
+const off_t BLOCK_SIZE_CLONE = 4096;
+
+// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
+const off_t BLOCK_SIZE_SUMS = 4096;
+
+// Block size for memory allocations and file mappings  (FIXME: should be CPU page size)
+const off_t BLOCK_SIZE_MMAP = 4096;
+
+// Maximum length parameter to extent-same ioctl (FIXME: hardcoded in kernel)
+const off_t BLOCK_SIZE_MAX_EXTENT_SAME = 4096 * 4096;
+
+// Maximum length of a compressed extent in bytes
+const off_t BLOCK_SIZE_MAX_COMPRESSED_EXTENT = 128 * 1024;
+
+// Try to combine smaller extents into larger ones
+const off_t BLOCK_SIZE_MIN_EXTENT_DEFRAG = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
+
+// Avoid splitting extents that are already too small
+const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
+// const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = 1024LL * 1024 * 1024 * 1024;
+
+// Maximum length of any extent in bytes
+// except we've seen 1.03G extents...
+// ...FIEMAP is slow and full of lies
+const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024;
+
+// Masks, so we don't have to write "(BLOCK_SIZE_CLONE - 1)" everywhere
+const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1;
+const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1;
+const off_t BLOCK_MASK_MMAP = BLOCK_SIZE_MMAP - 1;
+const off_t BLOCK_MASK_MAX_COMPRESSED_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT * 2 - 1;
+
+// Maximum temporary file size
+const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024;
+
+// Bucket size for hash table (size of one hash bucket)
+const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP;
+
+// Extent size for hash table (since the nocow file attribute does not seem to be working today)
+const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024;
+
+// Bytes per second we want to flush (8GB every two hours)
+const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;
+
+// Interval between writing non-hash-table things to disk (15 minutes)
+const int BEES_WRITEBACK_INTERVAL = 900;
+
+// Statistics reports while scanning
+const int BEES_STATS_INTERVAL = 3600;
+
+// Progress shows instantaneous rates and thread status
+const int BEES_PROGRESS_INTERVAL = 3600;
+
+// Status is output every freakin second.  Use a ramdisk.
+const int BEES_STATUS_INTERVAL = 1;
+
+// Log warnings when an operation takes too long
+const double BEES_TOO_LONG = 2.5;
+
+// Avoid any extent where LOGICAL_INO takes this long
+const double BEES_TOXIC_DURATION = 9.9;
+
+// How long we should wait for new btrfs transactions
+const double BEES_COMMIT_INTERVAL = 900;
+
+// How long between hash table histograms
+const double BEES_HASH_TABLE_ANALYZE_INTERVAL = 3600;
+
+// Rate limiting of informational messages
+const double BEES_INFO_RATE = 10.0;
+const double BEES_INFO_BURST = 1.0;
+
+// After we have this many events queued, wait
+const size_t BEES_MAX_QUEUE_SIZE = 1024;
+
+// Read this many items at a time in SEARCHv2
+const size_t BEES_MAX_CRAWL_SIZE = 4096;
+
+// If an extent has this many refs, pretend it does not exist
+// to avoid a crippling btrfs performance bug
+// The actual limit in LOGICAL_INO seems to be 2730, but let's leave a little headroom
+const size_t BEES_MAX_EXTENT_REF_COUNT = 2560;
+
+// Flags
+const int FLAGS_OPEN_COMMON   = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY;
+const int FLAGS_OPEN_DIR      = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY;
+const int FLAGS_OPEN_FILE     = FLAGS_OPEN_COMMON | O_RDONLY;
+const int FLAGS_OPEN_FILE_RW  = FLAGS_OPEN_COMMON | O_RDWR;
+const int FLAGS_OPEN_TMPFILE  = FLAGS_OPEN_FILE_RW | O_TMPFILE | O_TRUNC | O_EXCL;
+const int FLAGS_CREATE_FILE   = FLAGS_OPEN_COMMON | O_WRONLY | O_CREAT | O_EXCL;
+
+// Fanotify allows O_APPEND, O_DSYNC, O_NOATIME, O_NONBLOCK, O_CLOEXEC, O_LARGEFILE
+const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
+
+// macros ----------------------------------------
+
+#define BEESLOG(x)      do { Chatter c(BeesNote::get_name()); c << x; } while (0)
+#define BEESLOGTRACE(x) do { BEESLOG(x); BeesTracer::trace_now(); } while (0)
+
+#define BEESTRACE(x)   BeesTracer  SRSLY_WTF_C(beesTracer_,  __LINE__) ([&]()                 { BEESLOG(x);   })
+#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
+#define BEESNOTE(x)    BeesNote    SRSLY_WTF_C(beesNote_,    __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
+#define BEESINFO(x) do { \
+	if (bees_info_rate_limit.is_ready()) { \
+		bees_info_rate_limit.borrow(1); \
+		Chatter c(BeesNote::get_name()); \
+		c << x; \
+	} \
+} while (0)
+
+#define BEESCOUNT(stat) do { \
+	BeesStats::s_global.add_count(#stat); \
+} while (0)
+
+#define BEESCOUNTADD(stat, amount) do { \
+	BeesStats::s_global.add_count(#stat, (amount)); \
+} while (0)
+
+// ----------------------------------------
+
+template <class T> class BeesStatTmpl;
+template <class T> ostream& operator<<(ostream &os, const BeesStatTmpl<T> &bs);
+
+template <class T>
+class BeesStatTmpl {
+	map<string, T>	m_stats_map;
+	mutable mutex	m_mutex;
+
+public:
+	BeesStatTmpl() = default;
+	BeesStatTmpl(const BeesStatTmpl &that);
+	BeesStatTmpl &operator=(const BeesStatTmpl &that);
+	void add_count(string idx, size_t amount = 1);
+	T& at(string idx);
+	T at(string idx) const;
+
+friend ostream& operator<< <>(ostream &os, const BeesStatTmpl<T> &bs);
+friend class BeesStats;
+};
+
+using BeesRates = BeesStatTmpl<double>;
+
+struct BeesStats : public BeesStatTmpl<uint64_t> {
+	static BeesStats s_global;
+
+	BeesStats operator-(const BeesStats &that) const;
+	BeesRates operator/(double d) const;
+	explicit operator bool() const;
+};
+
+class BeesContext;
+class BeesBlockData;
+
+class BeesTracer {
+	function<void()> m_func;
+	BeesTracer *m_next_tracer = 0;
+	
+	thread_local static BeesTracer *s_next_tracer;
+public:
+	BeesTracer(function<void()> f);
+	~BeesTracer();
+	static void trace_now();
+};
+
+class BeesNote {
+	function<void(ostream &)>	m_func;
+	BeesNote			*m_prev;
+	Timer				m_timer;
+	string				m_name;
+
+	static mutex			s_mutex;
+	static map<pid_t, BeesNote*>	s_status;
+
+	thread_local static BeesNote	*s_next;
+	thread_local static string	s_name;
+
+public:
+	BeesNote(function<void(ostream &)> f);
+	~BeesNote();
+
+	using ThreadStatusMap = map<pid_t, string>;
+
+	static ThreadStatusMap get_status();
+
+	static void set_name(const string &name);
+	static string get_name();
+};
+
+// C++ threads dumbed down even further
+class BeesThread {
+	string			m_name;
+	Timer			m_timer;
+	shared_ptr<thread>	m_thread_ptr;
+
+public:
+	~BeesThread();
+	BeesThread(string name);
+	BeesThread(string name, function<void()> args);
+	void exec(function<void()> args);
+	void join();
+	void set_name(const string &name);
+};
+
+class BeesFileId {
+	uint64_t	m_root;
+	uint64_t	m_ino;
+
+public:
+	uint64_t root() const { return m_root; }
+	uint64_t ino() const { return m_ino; }
+	bool operator<(const BeesFileId &that) const;
+	bool operator!=(const BeesFileId &that) const;
+	bool operator==(const BeesFileId &that) const;
+	operator bool() const;
+	BeesFileId(const BtrfsInodeOffsetRoot &bior);
+	BeesFileId(int fd);
+	BeesFileId(uint64_t root, uint64_t ino);
+	BeesFileId();
+};
+
+ostream& operator<<(ostream &os, const BeesFileId &bfi);
+
+class BeesFileRange {
+protected:
+	static mutex		s_mutex;
+	mutable Fd		m_fd;
+	mutable BeesFileId	m_fid;
+	off_t			m_begin, m_end;
+	mutable off_t		m_file_size;
+
+public:
+
+	BeesFileRange();
+	BeesFileRange(Fd fd, off_t begin, off_t end);
+	BeesFileRange(const BeesFileId &fid, off_t begin, off_t end);
+	BeesFileRange(const BeesBlockData &bbd);
+
+	operator BeesBlockData() const;
+
+	bool operator<(const BeesFileRange &that) const;
+	bool operator==(const BeesFileRange &that) const;
+	bool operator!=(const BeesFileRange &that) const;
+
+	bool empty() const;
+	bool is_same_file(const BeesFileRange &that) const;
+	bool overlaps(const BeesFileRange &that) const;
+
+	// If file ranges overlap, extends this to include that.
+	// Coalesce with empty bfr = non-empty bfr
+	bool coalesce(const BeesFileRange &that);
+
+	// Remove that from this, creating 0, 1, or 2 new objects
+	pair<BeesFileRange, BeesFileRange> subtract(const BeesFileRange &that) const;
+
+	off_t begin() const { return m_begin; }
+	off_t end() const { return m_end; }
+	off_t size() const;
+
+	// Lazy accessors
+	off_t file_size() const;
+	BeesFileId fid() const;
+
+	// Get the fd if there is one
+	Fd fd() const;
+
+	// Get the fd, opening it if necessary
+	Fd fd(const shared_ptr<BeesContext> &ctx) const;
+
+	BeesFileRange copy_closed() const;
+
+	// Is it defined?
+	operator bool() const { return !!m_fd || m_fid; }
+
+	// Make range larger
+	off_t grow_end(off_t delta);
+	off_t grow_begin(off_t delta);
+
+friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
+};
+
+class BeesAddress {
+public:
+	using Type = uint64_t;
+private:
+	Type	m_addr = ZERO;
+	bool magic_check(uint64_t flags);
+public:
+
+	// Blocks with no physical address (not yet allocated, hole, or "other").
+	// PREALLOC blocks have a physical address so they're not magic enough to be handled here.
+	// Compressed blocks have a physical address but it's two-dimensional.
+	enum MagicValue {
+		ZERO,		// BeesAddress uninitialized
+		DELALLOC,	// delayed allocation
+		HOLE,		// no extent present, no space allocated
+		UNUSABLE,	// inline extent or unrecognized FIEMAP flags
+		LAST,		// all further values are non-magic
+	};
+
+	BeesAddress(Type addr = ZERO) : m_addr(addr) {}
+	BeesAddress(MagicValue addr) : m_addr(addr) {}
+	BeesAddress& operator=(const BeesAddress &that) = default;
+	operator Type() const { return m_addr; }
+	bool operator==(const BeesAddress &that) const;
+	bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
+	bool operator!=(const BeesAddress &that) const { return !(*this == that); }
+	bool operator!=(const MagicValue that) const { return *this != BeesAddress(that); }
+	bool operator<(const BeesAddress &that) const;
+
+	static const Type c_offset_min = 1;
+	static const Type c_offset_max = BLOCK_SIZE_MAX_COMPRESSED_EXTENT / BLOCK_SIZE_CLONE;
+
+	// if this isn't 0x3f we will have problems
+	static const Type c_offset_mask = (c_offset_max - 1) | (c_offset_max);
+
+	static const Type c_compressed_mask = 1 << 11;
+	static const Type c_eof_mask = 1 << 10;
+	static const Type c_toxic_mask = 1 << 9;
+
+	static const Type c_all_mask = c_compressed_mask | c_eof_mask | c_offset_mask | c_toxic_mask;
+
+	bool is_compressed() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask); }
+	bool has_compressed_offset() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask) && (m_addr & c_offset_mask); }
+	bool is_toxic() const { return m_addr >= MagicValue::LAST && (m_addr & c_toxic_mask); }
+	bool is_unaligned_eof() const { return m_addr >= MagicValue::LAST && (m_addr & c_eof_mask); }
+	bool is_magic() const { return m_addr < MagicValue::LAST; }
+
+	Type get_compressed_offset() const;
+	Type get_physical_or_zero() const;
+
+	void set_toxic();
+
+	BeesAddress(int fd, off_t offset);
+	BeesAddress(int fd, off_t offset, shared_ptr<BeesContext> ctx);
+	BeesAddress(const Extent &e, off_t offset);
+};
+
+ostream & operator<<(ostream &os, const BeesAddress &ba);
+
+class BeesStringFile {
+	Fd	m_dir_fd;
+	string	m_name;
+	size_t	m_limit;
+
+public:
+	BeesStringFile(Fd dir_fd, string name, size_t limit = 1024 * 1024);
+	string read();
+	void write(string contents);
+};
+
+class BeesHashTable {
+	shared_ptr<BeesContext>	m_ctx;
+public:
+	using HashType = uint64_t;
+	using AddrType = uint64_t;
+
+	struct Cell {
+		HashType	e_hash;
+		AddrType	e_addr;
+		Cell(const Cell &) = default;
+		Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
+		bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
+		bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
+		bool operator<(const Cell &e) const { return tie(e_hash, e_addr) < tie(e.e_hash, e.e_addr); }
+	} __attribute__((packed));
+
+private:
+	static const uint64_t c_cells_per_bucket = BLOCK_SIZE_HASHTAB_BUCKET / sizeof(Cell);
+	static const uint64_t c_buckets_per_extent = BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET;
+
+public:
+	union Bucket {
+		Cell	p_cells[c_cells_per_bucket];
+		uint8_t	p_byte[BLOCK_SIZE_HASHTAB_BUCKET];
+	} __attribute__((packed));
+
+	union Extent {
+		Bucket	p_buckets[BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET];
+		uint8_t	p_byte[BLOCK_SIZE_HASHTAB_EXTENT];
+	} __attribute__((packed));
+
+	BeesHashTable(shared_ptr<BeesContext> ctx, string filename);
+	~BeesHashTable();
+
+	vector<Cell>	find_cell(HashType hash);
+	bool		push_random_hash_addr(HashType hash, AddrType addr);
+	void		erase_hash_addr(HashType hash, AddrType addr);
+	bool		push_front_hash_addr(HashType hash, AddrType addr);
+
+	void		set_shared(bool shared);
+
+private:
+	string		m_filename;
+	Fd		m_fd;
+	uint64_t	m_size;
+	union {
+		void	*m_void_ptr;	// Save some casting
+		uint8_t	*m_byte_ptr;	// for pointer arithmetic
+		Cell	*m_cell_ptr;	// pointer to one table cell (entry)
+		Bucket	*m_bucket_ptr;	// all cells in one LRU unit
+		Extent	*m_extent_ptr;	// all buckets in one I/O unit
+	};
+	union {
+		void	*m_void_ptr_end;
+		uint8_t	*m_byte_ptr_end;
+		Cell	*m_cell_ptr_end;
+		Bucket	*m_bucket_ptr_end;
+		Extent	*m_extent_ptr_end;
+	};
+	uint64_t		m_buckets;
+	uint64_t		m_extents;
+	uint64_t		m_cells;
+	set<uint64_t>		m_buckets_dirty;
+	set<uint64_t>		m_buckets_missing;
+	BeesThread  		m_writeback_thread;
+	BeesThread	        m_prefetch_thread;
+	RateLimiter		m_flush_rate_limit;
+	RateLimiter		m_prefetch_rate_limit;
+	mutex			m_extent_mutex;
+	mutex			m_bucket_mutex;
+	condition_variable	m_condvar;
+	set<HashType>		m_toxic_hashes;
+	BeesStringFile		m_stats_file;
+
+	LockSet<uint64_t> 	m_extent_lock_set;
+
+	DefaultBool		m_shared;
+
+	void writeback_loop();
+	void prefetch_loop();
+	void try_mmap_flags(int flags);
+	pair<Cell *, Cell *> get_cell_range(HashType hash);
+	pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
+	void fetch_missing_extent(HashType hash);
+	void set_extent_dirty(HashType hash);
+	void flush_dirty_extents();
+	bool is_toxic_hash(HashType h) const;
+
+	bool using_shared_map() const { return false; }
+
+	BeesHashTable(const BeesHashTable &) = delete;
+	BeesHashTable &operator=(const BeesHashTable &) = delete;
+};
+
+ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
+
+struct BeesCrawlState {
+	uint64_t	m_root;
+	uint64_t	m_objectid;
+	uint64_t	m_offset;
+	uint64_t	m_min_transid;
+	uint64_t	m_max_transid;
+	time_t		m_started;
+	BeesCrawlState();
+	bool operator<(const BeesCrawlState &that) const;
+};
+
+class BeesCrawl {
+	shared_ptr<BeesContext>			m_ctx;
+
+	mutex					m_mutex;
+	set<BeesFileRange>			m_extents;
+	DefaultBool				m_deferred;
+
+	mutex					m_state_mutex;
+	BeesCrawlState				m_state;
+
+	bool fetch_extents();
+	void fetch_extents_harder();
+	bool next_transid();
+
+public:
+	BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
+	BeesFileRange peek_front();
+	BeesFileRange pop_front();
+	BeesCrawlState get_state();
+	void set_state(const BeesCrawlState &bcs);
+};
+
+class BeesRoots {
+	shared_ptr<BeesContext>			m_ctx;
+
+	BeesStringFile				m_crawl_state_file;
+	BeesCrawlState				m_crawl_current;
+	map<uint64_t, shared_ptr<BeesCrawl>>	m_root_crawl_map;
+	mutex					m_mutex;
+	condition_variable			m_condvar;
+	DefaultBool				m_crawl_dirty;
+	Timer					m_crawl_timer;
+	BeesThread				m_crawl_thread;
+	BeesThread				m_writeback_thread;
+
+	void insert_new_crawl();
+	void insert_root(const BeesCrawlState &bcs);
+	Fd open_root_nocache(uint64_t root);
+	Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
+	uint64_t transid_min();
+	uint64_t transid_max();
+	void state_load();
+	void state_save();
+	void crawl_roots();
+	string crawl_state_filename() const;
+	BeesCrawlState crawl_state_get(uint64_t root);
+	void crawl_state_set_dirty();
+	void crawl_state_erase(const BeesCrawlState &bcs);
+	void crawl_thread();
+	void writeback_thread();
+	uint64_t next_root(uint64_t root = 0);
+	void current_state_set(const BeesCrawlState &bcs);
+
+friend class BeesFdCache;
+friend class BeesCrawl;
+
+public:
+	BeesRoots(shared_ptr<BeesContext> ctx);
+	Fd open_root(uint64_t root);
+	Fd open_root_ino(uint64_t root, uint64_t ino);
+	Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
+};
+
+struct BeesHash {
+	using Type = uint64_t;
+
+	BeesHash() : m_hash(0) { }
+	BeesHash(Type that) : m_hash(that) { }
+	operator Type() const { return m_hash; }
+	BeesHash& operator=(const Type that) { m_hash = that; return *this; }
+private:
+	Type	m_hash;
+	
+};
+
+ostream & operator<<(ostream &os, const BeesHash &bh);
+
+class BeesBlockData {
+	using Blob = vector<char>;
+
+	mutable Fd		m_fd;
+	off_t			m_offset;
+	off_t			m_length;
+	mutable BeesAddress	m_addr;
+	mutable Blob		m_data;
+	mutable BeesHash	m_hash;
+	mutable DefaultBool	m_hash_done;
+
+public:
+	// Constructor with the immutable fields
+	BeesBlockData(Fd fd, off_t offset, size_t read_length = BLOCK_SIZE_SUMS);
+	BeesBlockData();
+
+	// Non-lazy accessors
+	Fd fd() const { return m_fd; }
+
+	// Renaming
+	off_t begin() const { return m_offset; }
+	off_t end() const { return m_offset + m_length; }
+	off_t size() const { return m_length; }
+	bool empty() const { return !m_length; }
+
+	// Lazy accessors may modify const things
+	const Blob &data() const;
+	BeesHash hash() const;
+	BeesAddress addr() const;
+	bool is_data_zero() const;
+	bool is_data_equal(const BeesBlockData &that) const;
+
+	// Setters
+	BeesBlockData &addr(const BeesAddress &a);
+
+friend ostream &operator<<(ostream &, const BeesBlockData &);
+};
+
+class BeesRangePair : public pair<BeesFileRange, BeesFileRange> {
+public:
+	BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
+	bool grow(shared_ptr<BeesContext> ctx, bool constrained);
+	BeesRangePair copy_closed() const;
+	bool operator<(const BeesRangePair &that) const;
+friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
+};
+
+class BeesWorkQueueBase {
+	string 				m_name; 
+
+protected:
+	static mutex			s_mutex;
+	static set<BeesWorkQueueBase *>	s_all_workers;
+
+public:
+	virtual ~BeesWorkQueueBase();
+	BeesWorkQueueBase(const string &name);
+
+	string name() const;
+	void name(const string &new_name);
+
+	virtual size_t active_size() const = 0;
+	virtual list<string> peek_active(size_t count) const = 0;
+ 
+	static void for_each_work_queue(function<void(BeesWorkQueueBase *)> f);
+};
+
+template <class Task>
+class BeesWorkQueue : public BeesWorkQueueBase {
+	WorkQueue<Task>				m_active_queue;
+
+public:
+	BeesWorkQueue(const string &name);
+	~BeesWorkQueue();
+	void push_active(const Task &task, size_t limit);
+	void push_active(const Task &task);
+
+	size_t active_size() const override;
+	list<string> peek_active(size_t count) const override;
+
+	Task pop();
+};
+
+class BeesTempFile {
+	shared_ptr<BeesContext> m_ctx;
+	Fd			m_fd;
+	off_t			m_end_offset;
+
+	void create();
+	void realign();
+	void resize(off_t new_end_offset);
+
+public:
+	BeesTempFile(shared_ptr<BeesContext> ctx);
+	BeesFileRange make_hole(off_t count);
+	BeesFileRange make_copy(const BeesFileRange &src);
+};
+
+class BeesFdCache {
+	LRUCache<Fd, shared_ptr<BeesContext>, uint64_t>			m_root_cache;
+	LRUCache<Fd, shared_ptr<BeesContext>, uint64_t, uint64_t>	m_file_cache;
+	Timer								m_root_cache_timer;
+
+public:
+	BeesFdCache();
+	Fd open_root(shared_ptr<BeesContext> ctx, uint64_t root);
+	Fd open_root_ino(shared_ptr<BeesContext> ctx, uint64_t root, uint64_t ino);
+	void insert_root_ino(shared_ptr<BeesContext> ctx, Fd fd);
+};
+
+struct BeesResolveAddrResult {
+	BeesResolveAddrResult();
+	vector<BtrfsInodeOffsetRoot> m_biors;
+	DefaultBool m_is_toxic;
+	bool is_toxic() const { return m_is_toxic; }
+};
+
+class BeesContext : public enable_shared_from_this<BeesContext> {
+	shared_ptr<BeesContext>				m_parent_ctx;
+
+	Fd						m_home_fd;
+
+	shared_ptr<BeesFdCache>				m_fd_cache;
+	shared_ptr<BeesHashTable>			m_hash_table;
+	shared_ptr<BeesRoots>				m_roots;
+
+	map<thread::id, shared_ptr<BeesTempFile>>	m_tmpfiles;
+
+	LRUCache<BeesResolveAddrResult, BeesAddress>	m_resolve_cache;
+
+	string						m_root_path;
+	Fd						m_root_fd;
+	string						m_root_uuid;
+
+	mutable mutex					m_blacklist_mutex;
+	set<BeesFileId>					m_blacklist;
+
+	string						m_uuid;
+
+	Timer						m_total_timer;
+
+	void set_root_fd(Fd fd);
+
+	BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
+
+	BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
+	void rewrite_file_range(const BeesFileRange &bfr);
+
+public:
+	BeesContext(shared_ptr<BeesContext> parent_ctx = nullptr);
+
+	void set_root_path(string path);
+
+	Fd root_fd() const { return m_root_fd; }
+	Fd home_fd() const { return m_home_fd; }
+	string root_path() const { return m_root_path; }
+	string root_uuid() const { return m_root_uuid; }
+
+	BeesFileRange scan_forward(const BeesFileRange &bfr);
+
+	BeesRangePair dup_extent(const BeesFileRange &src);
+	bool dedup(const BeesRangePair &brp);
+
+	void blacklist_add(const BeesFileId &fid);
+	bool is_blacklisted(const BeesFileId &fid) const;
+
+	BeesResolveAddrResult resolve_addr(BeesAddress addr);
+	void invalidate_addr(BeesAddress addr);
+
+	void dump_status();
+	void show_progress();
+
+	shared_ptr<BeesFdCache> fd_cache();
+	shared_ptr<BeesHashTable> hash_table();
+	shared_ptr<BeesRoots> roots();
+	shared_ptr<BeesTempFile> tmpfile();
+
+	const Timer &total_timer() const { return m_total_timer; }
+
+	// TODO: move the rest of the FD cache methods here
+	void insert_root_ino(Fd fd);
+};
+
+class BeesResolver {
+	shared_ptr<BeesContext>			m_ctx;
+	BeesAddress				m_addr;
+	vector<BtrfsInodeOffsetRoot>		m_biors;
+	set<BeesFileRange>			m_ranges;
+	unsigned				m_bior_count;
+
+	// We found matching data, so we can dedup
+	DefaultBool				m_found_data;
+
+	// We found matching data, so we *did* dedup
+	DefaultBool				m_found_dup;
+
+	// We found matching hash, so the hash table is still correct
+	DefaultBool				m_found_hash;
+
+	// We found matching physical address, so the hash table isn't totally wrong
+	DefaultBool				m_found_addr;
+
+	// We found matching physical address, but data did not match
+	DefaultBool				m_wrong_data;
+
+	// The whole thing is a placebo to avoid crippling btrfs performance bugs
+	DefaultBool				m_is_toxic;
+
+	BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd);
+	BeesBlockData adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle);
+	void find_matches(bool just_one, BeesBlockData &bbd);
+
+	// FIXME: Do we need these?  We probably always have at least one BBD
+	BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesHash hash);
+	BeesBlockData adjust_offset(const BeesFileRange &haystack, bool inexact, BeesHash needle);
+	void find_matches(bool just_one, BeesHash hash);
+
+public:
+	BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress addr);
+	BeesAddress addr(BeesAddress new_addr);
+
+	// visitor returns true to stop loop, false to continue
+	bool for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor);
+
+	set<BeesFileRange> find_all_matches(BeesBlockData &bbd);
+	set<BeesFileRange> find_all_matches(BeesHash hash);
+
+	// TODO:  Replace these with "for_each_extent_ref"
+	BeesFileRange find_one_match(BeesBlockData &bbd);
+	BeesFileRange find_one_match(BeesHash hash);
+
+	void replace_src(const BeesFileRange &src_bfr);
+	BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
+
+	bool found_addr() const { return m_found_addr; }
+	bool found_data() const { return m_found_data; }
+	bool found_dup() const { return m_found_dup; }
+	bool found_hash() const { return m_found_hash; }
+	bool is_toxic() const { return m_is_toxic; }
+	size_t count() const { return m_bior_count; }
+	BeesAddress addr() const { return m_addr; }
+
+	bool operator<(const BeesResolver &that) const;
+};
+
+class BeesTooLong : public Timer {
+	using func_type = function<void(ostream &)>;
+	double m_limit;
+	func_type m_func;
+
+public:
+	BeesTooLong(const func_type &func = [](ostream &os) { os << __PRETTY_FUNCTION__; }, double limit = BEES_TOO_LONG);
+	BeesTooLong(const string &s, double limit = BEES_TOO_LONG);
+	BeesTooLong &operator=(const func_type &s);
+	~BeesTooLong();
+	void check() const;
+
+};
+
+// And now, a giant pile of extern declarations
+string pretty(double d);
+extern RateLimiter bees_info_rate_limit;
+void bees_sync(int fd);
+string format_time(time_t t);
+
+#endif
--- a/src/fiemap.cc
+++ b/src/fiemap.cc
@@ -0,0 +1,52 @@
+#include "crucible/fd.h"
+#include "crucible/fs.h"
+#include "crucible/error.h"
+#include "crucible/string.h"
+
+#include <iostream>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+using namespace crucible;
+using namespace std;
+
+int
+main(int argc, char **argv)
+{
+	catch_all([&]() {
+		THROW_CHECK1(invalid_argument, argc, argc > 1);
+		string filename = argv[1];
+
+	
+		cout << "File: " << filename << endl;
+		Fd fd = open_or_die(filename, O_RDONLY);
+		Fiemap fm;
+		fm.m_max_count = 100;
+		if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
+		if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
+		if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
+		fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
+		uint64_t stop_at = fm.fm_start + fm.fm_length;
+		uint64_t last_byte = fm.fm_start;
+		do {
+			fm.do_ioctl(fd);
+			// cerr << fm;
+			uint64_t last_logical = FIEMAP_MAX_OFFSET;
+			for (auto &extent : fm.m_extents) {
+				if (extent.fe_logical > last_byte) {
+					cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
+				}
+				cout << "Log " << to_hex(extent.fe_logical) << ".." << to_hex(extent.fe_logical + extent.fe_length)
+					<< " Phy " << to_hex(extent.fe_physical) << ".." << to_hex(extent.fe_physical + extent.fe_length)
+					<< " Flags " << fiemap_extent_flags_ntoa(extent.fe_flags) << endl;
+				last_logical = extent.fe_logical + extent.fe_length;
+				last_byte = last_logical;
+			}
+			fm.fm_start = last_logical;
+		} while (fm.fm_start < stop_at);
+	});
+	exit(EXIT_SUCCESS);
+}
+
--- a/src/fiewalk.cc
+++ b/src/fiewalk.cc
@@ -0,0 +1,40 @@
+#include "crucible/extentwalker.h"
+#include "crucible/error.h"
+#include "crucible/string.h"
+
+#include <iostream>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+using namespace crucible;
+using namespace std;
+
+int
+main(int argc, char **argv)
+{
+	catch_all([&]() {
+		THROW_CHECK1(invalid_argument, argc, argc > 1);
+		string filename = argv[1];
+
+		cout << "File: " << filename << endl;
+		Fd fd = open_or_die(filename, O_RDONLY);
+		BtrfsExtentWalker ew(fd);
+		off_t pos = 0;
+		if (argc > 2) { pos = stoull(argv[2], nullptr, 0); }
+		ew.seek(pos);
+		do {
+			// cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
+			cout << ew.current() << endl;
+		} while (ew.next());
+#if 0
+		cout << "\n\n\nAnd now, backwards...\n\n\n" << endl;
+		do {
+			cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
+		} while (ew.prev());
+		cout << "\n\n\nDone!\n\n\n" << endl;
+#endif
+	});
+	exit(EXIT_SUCCESS);
+}
+