diff --git a/.gitignore b/.gitignore index cb02a07..e00f61d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ make.log make.log.new localconf scripts/beesd +scripts/beesd@.service diff --git a/Makefile b/Makefile index 5da1465..363289c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,9 @@ PREFIX ?= / -LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees +LIBDIR ?= lib +USR_PREFIX ?= $(PREFIX)/usr +USRLIB_PREFIX ?= $(USR_PREFIX)/$(LIBDIR) +SYSTEMD_LIB_PREFIX ?= $(PREFIX)/lib/systemd +LIBEXEC_PREFIX ?= $(USRLIB_PREFIX)/bees MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null)) MARKDOWN ?= markdown @@ -7,10 +11,10 @@ MARKDOWN ?= markdown # allow local configuration to override above variables -include localconf -default all: lib src test README.html +default all: lib src scripts test README.html clean: ## Cleanup - git clean -dfx + git clean -dfx -e localconf .PHONY: lib src test @@ -25,10 +29,7 @@ test: ## Run tests test: lib src $(MAKE) -C test -scripts/beesd: scripts/beesd.in - sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" - -scripts/beesd@.service: scripts/beesd@.service.in +scripts/%: scripts/%.in sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" scripts: scripts/beesd scripts/beesd@.service @@ -37,16 +38,22 @@ README.html: README.md $(MARKDOWN) README.md > README.html.new mv -f README.html.new README.html -install: ## Install bees + libs -install: lib src test - install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so - install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees +install_bees: ## Install bees + libs +install_bees: lib src test + install -Dm644 lib/libcrucible.so $(DESTDIR)$(USRLIB_PREFIX)/libcrucible.so + install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees install_scripts: ## Install scipts -install_scripts: - install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd - install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample - install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service +install_scripts: scripts + install -Dm755 scripts/beesd $(DESTDIR)$(USR_PREFIX)/sbin/beesd + install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(PREFIX)/etc/bees/beesd.conf.sample + install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_LIB_PREFIX)/system/beesd@.service + +install: ## Install distribution +install: install_bees install_scripts help: ## Show help @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/' + +bees: all +fly: install diff --git a/README.md b/README.md index 6d483f7..40f3f49 100644 --- a/README.md +++ b/README.md @@ -322,13 +322,41 @@ Not really a bug, but a gotcha nonetheless: To limit this delay, Bees closes all FDs in its file FD cache every 15 minutes. -Build ------ +Installation +============ + +Bees can be installed by following one these instructions: + +Arch package +------------ + +Bees is availabe in Arch Linux AUR. Install with: + +`$ pacaur -S bees-git` + +Gentoo ebuild +------------- + +Bees is available as a Gentoo ebuild. Just copy `bees-9999.ebuild` from +`contrib/gentoo` including the `files` subdirectory to your local +overlay category `sys-fs`. + +You can copy the ebuild to match a Bees version number, and it will +build that tagged version. It is partly supported since v0.5, +previous versions won't work. + +Build from source +----------------- Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target system respectively. +It will also generate `scripts/beesd@.service` for systemd users. This +service makes use of a helper script `scripts/beesd` to boot the service. +Both of the latter use the filesystem UUID to mount the root subvolume +within a temporary runtime directory. + ### Ubuntu 16.04 - 17.04: `$ apt -y install build-essential btrfs-tools uuid-dev markdown && make` @@ -360,10 +388,15 @@ Dependencies * markdown +* util-linux version that provides `blkid` command for the helper + script `scripts/beesd` to work Setup ----- +If you don't want to use the helper script `scripts/beesd` to setup and +configure bees, here's how you manually setup bees. + Create a directory for bees state files: export BEESHOME=/some/path diff --git a/contrib/gentoo/bees-9999.ebuild b/contrib/gentoo/bees-9999.ebuild new file mode 100644 index 0000000..d39b895 --- /dev/null +++ b/contrib/gentoo/bees-9999.ebuild @@ -0,0 +1,42 @@ +# Copyright 1999-2018 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +EAPI=6 + +inherit git-r3 eutils multilib + +DESCRIPTION="Best-Effort Extent-Same, a btrfs dedup agent" +HOMEPAGE="https://github.com/Zygo/bees" + +if [[ ${PV} == "9999" ]] ; then + EGIT_REPO_URI="https://github.com/kakra/bees.git" + EGIT_BRANCH="integration" +else + IUSE="" + + SRC_URI="https://github.com/Zygo/bees/archive/v${PV}.tar.gz -> bees-${PV}.tar.gz" +fi + +PATCHES=" + ${FILESDIR}/v0.5-gentoo_build.patch +" + +LICENSE="GPL-3" +SLOT="0" +KEYWORDS="" +IUSE="" + +DEPEND=" + >=sys-apps/util-linux-2.30.2 + >=sys-devel/gcc-4.9 + >=sys-fs/btrfs-progs-4.1 +" +RDEPEND="${DEPEND}" + +DOCS="README.md COPYING" +HTML_DOCS="README.html" + +src_prepare() { + default + echo LIBDIR=$(get_libdir) >>${S}/localconf +} diff --git a/contrib/gentoo/files/v0.5-gentoo_build.patch b/contrib/gentoo/files/v0.5-gentoo_build.patch new file mode 100644 index 0000000..9f22cbc --- /dev/null +++ b/contrib/gentoo/files/v0.5-gentoo_build.patch @@ -0,0 +1,20 @@ +diff --git a/localconf b/localconf +new file mode 100644 +index 0000000..7705cbb +--- /dev/null ++++ b/localconf +@@ -0,0 +1,2 @@ ++PREFIX=/ ++LIBEXEC_PREFIX=/usr/libexec +diff --git a/makeflags b/makeflags +index f5983cb..0348623 100644 +--- a/makeflags ++++ b/makeflags +@@ -1,4 +1,3 @@ +-CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64 +-# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 +-CFLAGS = $(CCFLAGS) -std=c99 +-CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast ++CCFLAGS = -O3 -I../include -fpic -D_FILE_OFFSET_BITS=64 ++CFLAGS += $(CCFLAGS) -std=c99 ++CXXFLAGS += $(CCFLAGS) -std=c++11 -Wold-style-cast diff --git a/lib/Makefile b/lib/Makefile index cf66eb6..e4ed743 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,3 +1,5 @@ +TAG := $(shell git describe --always --dirty || echo UNKNOWN) + default: libcrucible.so OBJS = \ @@ -23,13 +25,13 @@ depends.mk: *.cc mv -fv depends.mk.new depends.mk .version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h - echo "namespace crucible { const char *VERSION = \"$(shell git describe --always --dirty || echo UNKNOWN)\"; }" > .version.new.cc + echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > .version.new.cc mv -f .version.new.cc .version.cc -include depends.mk %.o: %.cc ../include/crucible/%.h - $(CXX) $(CXXFLAGS) -o $@ -c $< + $(CXX) $(CXXFLAGS) -fPIC -o $@ -c $< libcrucible.so: $(OBJS) Makefile - $(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -luuid + $(CXX) $(LDFLAGS) -fPIC -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid diff --git a/makeflags b/makeflags index f5983cb..de4a66f 100644 --- a/makeflags +++ b/makeflags @@ -1,4 +1,4 @@ -CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 +CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64 # CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 CFLAGS = $(CCFLAGS) -std=c99 CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast diff --git a/scripts/beesd.in b/scripts/beesd.in index b4c466b..269f77f 100755 --- a/scripts/beesd.in +++ b/scripts/beesd.in @@ -115,4 +115,5 @@ fi MNT_DIR="${MNT_DIR//\/\//\/}" -cd $MNT_DIR && exec @LIBEXEC_PREFIX@/bees ${ARGUMENTS[@]} $OPTIONS "$MNT_DIR" +cd "$MNT_DIR" +@LIBEXEC_PREFIX@/bees "${ARGUMENTS[@]}" $OPTIONS "$MNT_DIR" diff --git a/scripts/beesd@.service.in b/scripts/beesd@.service.in index 55df99c..5aeff76 100644 --- a/scripts/beesd@.service.in +++ b/scripts/beesd@.service.in @@ -1,6 +1,7 @@ [Unit] -Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i -After=local-fs.target +Description=Bees (%i) +Documentation=https://github.com/Zygo/bees +After=sysinit.target [Service] Type=simple @@ -21,4 +22,4 @@ CPUAccounting=true MemoryAccounting=true [Install] -WantedBy=local-fs.target +WantedBy=basic.target diff --git a/src/Makefile b/src/Makefile index d96fd96..3a69c9a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -8,7 +8,7 @@ all: $(PROGRAMS) depends.mk include ../makeflags LIBS = -lcrucible -lpthread -LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib) +LDFLAGS = -L../lib depends.mk: Makefile *.cc for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new diff --git a/src/bees-hash.cc b/src/bees-hash.cc index 4747d16..d8cabac 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -24,14 +24,16 @@ operator<<(ostream &os, const BeesHashTable::Cell &bhte) << BeesAddress(bhte.e_addr) << " }"; } +#if 0 +static void -dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q) +dump_bucket_locked(BeesHashTable::Cell *p, BeesHashTable::Cell *q) { - // Must be called while holding m_bucket_mutex for (auto i = p; i < q; ++i) { BEESLOG("Entry " << i - p << " " << *i); } } +#endif const bool VERIFY_CLEARS_BUGS = false; @@ -91,52 +93,74 @@ BeesHashTable::get_extent_range(HashType hash) return make_pair(bp, ep); } +bool +BeesHashTable::flush_dirty_extent(uint64_t extent_index) +{ + BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents"); + + auto lock = lock_extent_by_index(extent_index); + + // Not dirty, nothing to do + if (!m_extent_metadata.at(extent_index).m_dirty) { + return false; + } + + bool wrote_extent = false; + + catch_all([&]() { + uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte; + uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte; + THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr); + THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end); + THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT); + BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); + // Copy the extent because we might be stuck writing for a while + vector extent_copy(dirty_extent, dirty_extent_end); + + // Mark extent non-dirty while we still hold the lock + m_extent_metadata.at(extent_index).m_dirty = false; + + // Release the lock + lock.unlock(); + + // Write the extent (or not) + pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); + BEESCOUNT(hash_extent_out); + + wrote_extent = true; + }); + + BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents"); + m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT); + return wrote_extent; +} + void BeesHashTable::flush_dirty_extents() { THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - unique_lock lock(m_extent_mutex); - auto dirty_extent_copy = m_buckets_dirty; - m_buckets_dirty.clear(); - if (dirty_extent_copy.empty()) { - BEESNOTE("idle"); - m_condvar.wait(lock); - return; // please call later, i.e. immediately + uint64_t wrote_extents = 0; + for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) { + if (flush_dirty_extent(extent_index)) { + ++wrote_extents; + } } - lock.unlock(); - size_t extent_counter = 0; - for (auto extent_number : dirty_extent_copy) { - ++extent_counter; - BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")"); - catch_all([&]() { - uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte; - uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte; - THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr); - THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end); - THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT); - BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); - // Page locks slow us down more than copying the data does - vector extent_copy(dirty_extent, dirty_extent_end); - pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); - BEESCOUNT(hash_extent_out); - }); - BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")"); - m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT); - } + BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents"); + unique_lock lock(m_dirty_mutex); + m_dirty_condvar.wait(lock); } void -BeesHashTable::set_extent_dirty(HashType hash) +BeesHashTable::set_extent_dirty_locked(uint64_t extent_index) { - THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - auto pr = get_extent_range(hash); - uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; - THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents); - unique_lock lock(m_extent_mutex); - m_buckets_dirty.insert(extent_number); - m_condvar.notify_one(); + // Must already be locked + m_extent_metadata.at(extent_index).m_dirty = true; + + // Signal writeback thread + unique_lock dirty_lock(m_dirty_mutex); + m_dirty_condvar.notify_one(); } void @@ -179,13 +203,13 @@ BeesHashTable::prefetch_loop() size_t unaligned_eof_count = 0; for (uint64_t ext = 0; ext < m_extents; ++ext) { - BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); + BEESNOTE("prefetching hash table extent " << ext << " of " << m_extents); catch_all([&]() { - fetch_missing_extent(ext * c_buckets_per_extent); + fetch_missing_extent_by_index(ext); - BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); + BEESNOTE("analyzing hash table extent " << ext << " of " << m_extents); bool duplicate_bugs_found = false; - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_index(ext); for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) { if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) { duplicate_bugs_found = true; @@ -214,9 +238,8 @@ BeesHashTable::prefetch_loop() // Count these instead of calculating the number so we get better stats in case of exceptions occupied_count += this_bucket_occupied_count; } - lock.unlock(); if (duplicate_bugs_found) { - set_extent_dirty(ext); + set_extent_dirty_locked(ext); } }); } @@ -291,52 +314,70 @@ BeesHashTable::prefetch_loop() } } -void -BeesHashTable::fetch_missing_extent(HashType hash) +size_t +BeesHashTable::hash_to_extent_index(HashType hash) +{ + auto pr = get_extent_range(hash); + uint64_t extent_index = reinterpret_cast(pr.first) - m_extent_ptr; + THROW_CHECK2(runtime_error, extent_index, m_extents, extent_index < m_extents); + return extent_index; +} + +BeesHashTable::ExtentMetaData::ExtentMetaData() : + m_mutex_ptr(make_shared()) +{ +} + +unique_lock +BeesHashTable::lock_extent_by_index(uint64_t extent_index) +{ + THROW_CHECK2(out_of_range, extent_index, m_extents, extent_index < m_extents); + return unique_lock(*m_extent_metadata.at(extent_index).m_mutex_ptr); +} + +unique_lock +BeesHashTable::lock_extent_by_hash(HashType hash) { BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash)); - THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - auto pr = get_extent_range(hash); - uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; - THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents); + return lock_extent_by_index(hash_to_extent_index(hash)); +} - unique_lock lock(m_extent_mutex); - if (!m_buckets_missing.count(extent_number)) { +void +BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index) +{ + BEESNOTE("checking hash extent #" << extent_index << " of " << m_extents << " extents"); + auto lock = lock_extent_by_index(extent_index); + if (!m_extent_metadata.at(extent_index).m_missing) { return; } - size_t missing_buckets = m_buckets_missing.size(); - lock.unlock(); - - BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); - - // Acquire blocking lock on this extent only - auto extent_lock = m_extent_lock_set.make_lock(extent_number); - - // Check missing again because someone else might have fetched this - // extent for us while we didn't hold any locks - lock.lock(); - if (!m_buckets_missing.count(extent_number)) { - BEESCOUNT(hash_extent_in_twice); - return; - } - lock.unlock(); - // OK we have to read this extent - BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); + BEESNOTE("fetching hash extent #" << extent_index << " of " << m_extents << " extents"); + BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents"); + BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents"); - BEESTRACE("Fetching missing hash extent " << extent_number); - uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte; - uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte; + uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte; + uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte; - { + // If the read fails don't retry, just go with whatever data we have + m_extent_metadata.at(extent_index).m_missing = false; + + catch_all([&]() { BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr); - } + }); + // Only count extents successfully read BEESCOUNT(hash_extent_in); - lock.lock(); - m_buckets_missing.erase(extent_number); +} + +void +BeesHashTable::fetch_missing_extent_by_hash(HashType hash) +{ + uint64_t extent_index = hash_to_extent_index(hash); + BEESNOTE("waiting to fetch hash extent #" << extent_index << " of " << m_extents << " extents"); + + fetch_missing_extent_by_index(extent_index); } bool @@ -358,10 +399,10 @@ BeesHashTable::find_cell(HashType hash) rv.push_back(toxic_cell); return rv; } - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("find_cell hash " << BeesHash(hash)); vector rv; - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); // FIXME: Weed out zero addresses in the table due to earlier bugs copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; }); @@ -377,9 +418,9 @@ BeesHashTable::find_cell(HashType hash) void BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -387,7 +428,7 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) if (found) { // Lookups on invalid addresses really hurt us. Kill it with fire! *ip = Cell(0, 0); - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); BEESCOUNT(hash_erase); #if 0 if (verify_cell_range(er.first, er.second)) { @@ -405,9 +446,9 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) bool BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr)); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -437,7 +478,7 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) // There is now a space at the front, insert there if different if (er.first[0] != mv) { er.first[0] = mv; - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); BEESCOUNT(hash_front); } #if 0 @@ -456,9 +497,9 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) bool BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr)); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -521,14 +562,14 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr) case_cond = 5; ret_dirty: BEESCOUNT(hash_insert); - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); ret: #if 0 if (verify_cell_range(er.first, er.second, false)) { BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos << " ip " << (ip - er.first) << " " << mv); - // dump_bucket(saved.data(), saved.data() + saved.size()); - // dump_bucket(er.first, er.second); + // dump_bucket_locked(saved.data(), saved.data() + saved.size()); + // dump_bucket_locked(er.first, er.second); } #else (void)case_cond; @@ -657,9 +698,7 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t } } - for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) { - m_buckets_missing.insert(i); - } + m_extent_metadata.resize(m_extents); m_writeback_thread.exec([&]() { writeback_loop(); diff --git a/src/bees.h b/src/bees.h index ce16ced..1914ab3 100644 --- a/src/bees.h +++ b/src/bees.h @@ -432,18 +432,24 @@ private: uint64_t m_buckets; uint64_t m_extents; uint64_t m_cells; - set m_buckets_dirty; - set m_buckets_missing; BeesThread m_writeback_thread; BeesThread m_prefetch_thread; RateLimiter m_flush_rate_limit; - mutex m_extent_mutex; - mutex m_bucket_mutex; - condition_variable m_condvar; set m_toxic_hashes; BeesStringFile m_stats_file; - LockSet m_extent_lock_set; + // Mutex/condvar for the writeback thread + mutex m_dirty_mutex; + condition_variable m_dirty_condvar; + + // Per-extent structures + struct ExtentMetaData { + shared_ptr m_mutex_ptr; // Access serializer + bool m_dirty = false; // Needs to be written back to disk + bool m_missing = true; // Needs to be read from disk + ExtentMetaData(); + }; + vector m_extent_metadata; void open_file(); void writeback_loop(); @@ -451,11 +457,17 @@ private: void try_mmap_flags(int flags); pair get_cell_range(HashType hash); pair get_extent_range(HashType hash); - void fetch_missing_extent(HashType hash); - void set_extent_dirty(HashType hash); + void fetch_missing_extent_by_hash(HashType hash); + void fetch_missing_extent_by_index(uint64_t extent_index); + void set_extent_dirty_locked(uint64_t extent_index); void flush_dirty_extents(); + bool flush_dirty_extent(uint64_t extent_index); bool is_toxic_hash(HashType h) const; + size_t hash_to_extent_index(HashType ht); + unique_lock lock_extent_by_hash(HashType ht); + unique_lock lock_extent_by_index(uint64_t extent_index); + BeesHashTable(const BeesHashTable &) = delete; BeesHashTable &operator=(const BeesHashTable &) = delete; };