1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

Merge branch 'master' into subvol-threads

This commit is contained in:
Zygo Blaxell 2018-01-10 23:26:59 -05:00
commit 93fb29a461
12 changed files with 284 additions and 126 deletions

1
.gitignore vendored
View File

@ -12,3 +12,4 @@ make.log
make.log.new make.log.new
localconf localconf
scripts/beesd scripts/beesd
scripts/beesd@.service

View File

@ -1,5 +1,9 @@
PREFIX ?= / PREFIX ?= /
LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees LIBDIR ?= lib
USR_PREFIX ?= $(PREFIX)/usr
USRLIB_PREFIX ?= $(USR_PREFIX)/$(LIBDIR)
SYSTEMD_LIB_PREFIX ?= $(PREFIX)/lib/systemd
LIBEXEC_PREFIX ?= $(USRLIB_PREFIX)/bees
MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null)) MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null))
MARKDOWN ?= markdown MARKDOWN ?= markdown
@ -7,10 +11,10 @@ MARKDOWN ?= markdown
# allow local configuration to override above variables # allow local configuration to override above variables
-include localconf -include localconf
default all: lib src test README.html default all: lib src scripts test README.html
clean: ## Cleanup clean: ## Cleanup
git clean -dfx git clean -dfx -e localconf
.PHONY: lib src test .PHONY: lib src test
@ -25,10 +29,7 @@ test: ## Run tests
test: lib src test: lib src
$(MAKE) -C test $(MAKE) -C test
scripts/beesd: scripts/beesd.in scripts/%: scripts/%.in
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
scripts/beesd@.service: scripts/beesd@.service.in
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
scripts: scripts/beesd scripts/beesd@.service scripts: scripts/beesd scripts/beesd@.service
@ -37,16 +38,22 @@ README.html: README.md
$(MARKDOWN) README.md > README.html.new $(MARKDOWN) README.md > README.html.new
mv -f README.html.new README.html mv -f README.html.new README.html
install: ## Install bees + libs install_bees: ## Install bees + libs
install: lib src test install_bees: lib src test
install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so install -Dm644 lib/libcrucible.so $(DESTDIR)$(USRLIB_PREFIX)/libcrucible.so
install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees
install_scripts: ## Install scipts install_scripts: ## Install scipts
install_scripts: install_scripts: scripts
install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd install -Dm755 scripts/beesd $(DESTDIR)$(USR_PREFIX)/sbin/beesd
install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(PREFIX)/etc/bees/beesd.conf.sample
install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_LIB_PREFIX)/system/beesd@.service
install: ## Install distribution
install: install_bees install_scripts
help: ## Show help help: ## Show help
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/' @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
bees: all
fly: install

View File

@ -322,13 +322,41 @@ Not really a bug, but a gotcha nonetheless:
To limit this delay, Bees closes all FDs in its file FD cache every To limit this delay, Bees closes all FDs in its file FD cache every
15 minutes. 15 minutes.
Build Installation
----- ============
Bees can be installed by following one these instructions:
Arch package
------------
Bees is availabe in Arch Linux AUR. Install with:
`$ pacaur -S bees-git`
Gentoo ebuild
-------------
Bees is available as a Gentoo ebuild. Just copy `bees-9999.ebuild` from
`contrib/gentoo` including the `files` subdirectory to your local
overlay category `sys-fs`.
You can copy the ebuild to match a Bees version number, and it will
build that tagged version. It is partly supported since v0.5,
previous versions won't work.
Build from source
-----------------
Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`,
which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH`
on the target system respectively. on the target system respectively.
It will also generate `scripts/beesd@.service` for systemd users. This
service makes use of a helper script `scripts/beesd` to boot the service.
Both of the latter use the filesystem UUID to mount the root subvolume
within a temporary runtime directory.
### Ubuntu 16.04 - 17.04: ### Ubuntu 16.04 - 17.04:
`$ apt -y install build-essential btrfs-tools uuid-dev markdown && make` `$ apt -y install build-essential btrfs-tools uuid-dev markdown && make`
@ -360,10 +388,15 @@ Dependencies
* markdown * markdown
* util-linux version that provides `blkid` command for the helper
script `scripts/beesd` to work
Setup Setup
----- -----
If you don't want to use the helper script `scripts/beesd` to setup and
configure bees, here's how you manually setup bees.
Create a directory for bees state files: Create a directory for bees state files:
export BEESHOME=/some/path export BEESHOME=/some/path

View File

@ -0,0 +1,42 @@
# Copyright 1999-2018 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
EAPI=6
inherit git-r3 eutils multilib
DESCRIPTION="Best-Effort Extent-Same, a btrfs dedup agent"
HOMEPAGE="https://github.com/Zygo/bees"
if [[ ${PV} == "9999" ]] ; then
EGIT_REPO_URI="https://github.com/kakra/bees.git"
EGIT_BRANCH="integration"
else
IUSE=""
SRC_URI="https://github.com/Zygo/bees/archive/v${PV}.tar.gz -> bees-${PV}.tar.gz"
fi
PATCHES="
${FILESDIR}/v0.5-gentoo_build.patch
"
LICENSE="GPL-3"
SLOT="0"
KEYWORDS=""
IUSE=""
DEPEND="
>=sys-apps/util-linux-2.30.2
>=sys-devel/gcc-4.9
>=sys-fs/btrfs-progs-4.1
"
RDEPEND="${DEPEND}"
DOCS="README.md COPYING"
HTML_DOCS="README.html"
src_prepare() {
default
echo LIBDIR=$(get_libdir) >>${S}/localconf
}

View File

@ -0,0 +1,20 @@
diff --git a/localconf b/localconf
new file mode 100644
index 0000000..7705cbb
--- /dev/null
+++ b/localconf
@@ -0,0 +1,2 @@
+PREFIX=/
+LIBEXEC_PREFIX=/usr/libexec
diff --git a/makeflags b/makeflags
index f5983cb..0348623 100644
--- a/makeflags
+++ b/makeflags
@@ -1,4 +1,3 @@
-CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64
-# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
-CFLAGS = $(CCFLAGS) -std=c99
-CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast
+CCFLAGS = -O3 -I../include -fpic -D_FILE_OFFSET_BITS=64
+CFLAGS += $(CCFLAGS) -std=c99
+CXXFLAGS += $(CCFLAGS) -std=c++11 -Wold-style-cast

View File

@ -1,3 +1,5 @@
TAG := $(shell git describe --always --dirty || echo UNKNOWN)
default: libcrucible.so default: libcrucible.so
OBJS = \ OBJS = \
@ -23,13 +25,13 @@ depends.mk: *.cc
mv -fv depends.mk.new depends.mk mv -fv depends.mk.new depends.mk
.version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h .version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h
echo "namespace crucible { const char *VERSION = \"$(shell git describe --always --dirty || echo UNKNOWN)\"; }" > .version.new.cc echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > .version.new.cc
mv -f .version.new.cc .version.cc mv -f .version.new.cc .version.cc
-include depends.mk -include depends.mk
%.o: %.cc ../include/crucible/%.h %.o: %.cc ../include/crucible/%.h
$(CXX) $(CXXFLAGS) -o $@ -c $< $(CXX) $(CXXFLAGS) -fPIC -o $@ -c $<
libcrucible.so: $(OBJS) Makefile libcrucible.so: $(OBJS) Makefile
$(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -luuid $(CXX) $(LDFLAGS) -fPIC -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid

View File

@ -1,4 +1,4 @@
CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 # CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
CFLAGS = $(CCFLAGS) -std=c99 CFLAGS = $(CCFLAGS) -std=c99
CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast

View File

@ -115,4 +115,5 @@ fi
MNT_DIR="${MNT_DIR//\/\//\/}" MNT_DIR="${MNT_DIR//\/\//\/}"
cd $MNT_DIR && exec @LIBEXEC_PREFIX@/bees ${ARGUMENTS[@]} $OPTIONS "$MNT_DIR" cd "$MNT_DIR"
@LIBEXEC_PREFIX@/bees "${ARGUMENTS[@]}" $OPTIONS "$MNT_DIR"

View File

@ -1,6 +1,7 @@
[Unit] [Unit]
Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i Description=Bees (%i)
After=local-fs.target Documentation=https://github.com/Zygo/bees
After=sysinit.target
[Service] [Service]
Type=simple Type=simple
@ -21,4 +22,4 @@ CPUAccounting=true
MemoryAccounting=true MemoryAccounting=true
[Install] [Install]
WantedBy=local-fs.target WantedBy=basic.target

View File

@ -8,7 +8,7 @@ all: $(PROGRAMS) depends.mk
include ../makeflags include ../makeflags
LIBS = -lcrucible -lpthread LIBS = -lcrucible -lpthread
LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib) LDFLAGS = -L../lib
depends.mk: Makefile *.cc depends.mk: Makefile *.cc
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new

View File

@ -24,14 +24,16 @@ operator<<(ostream &os, const BeesHashTable::Cell &bhte)
<< BeesAddress(bhte.e_addr) << " }"; << BeesAddress(bhte.e_addr) << " }";
} }
#if 0
static
void void
dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q) dump_bucket_locked(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
{ {
// Must be called while holding m_bucket_mutex
for (auto i = p; i < q; ++i) { for (auto i = p; i < q; ++i) {
BEESLOG("Entry " << i - p << " " << *i); BEESLOG("Entry " << i - p << " " << *i);
} }
} }
#endif
const bool VERIFY_CLEARS_BUGS = false; const bool VERIFY_CLEARS_BUGS = false;
@ -91,52 +93,74 @@ BeesHashTable::get_extent_range(HashType hash)
return make_pair(bp, ep); return make_pair(bp, ep);
} }
bool
BeesHashTable::flush_dirty_extent(uint64_t extent_index)
{
BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents");
auto lock = lock_extent_by_index(extent_index);
// Not dirty, nothing to do
if (!m_extent_metadata.at(extent_index).m_dirty) {
return false;
}
bool wrote_extent = false;
catch_all([&]() {
uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
// Copy the extent because we might be stuck writing for a while
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
// Mark extent non-dirty while we still hold the lock
m_extent_metadata.at(extent_index).m_dirty = false;
// Release the lock
lock.unlock();
// Write the extent (or not)
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
BEESCOUNT(hash_extent_out);
wrote_extent = true;
});
BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
return wrote_extent;
}
void void
BeesHashTable::flush_dirty_extents() BeesHashTable::flush_dirty_extents()
{ {
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
unique_lock<mutex> lock(m_extent_mutex); uint64_t wrote_extents = 0;
auto dirty_extent_copy = m_buckets_dirty; for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) {
m_buckets_dirty.clear(); if (flush_dirty_extent(extent_index)) {
if (dirty_extent_copy.empty()) { ++wrote_extents;
BEESNOTE("idle"); }
m_condvar.wait(lock);
return; // please call later, i.e. immediately
} }
lock.unlock();
size_t extent_counter = 0; BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents");
for (auto extent_number : dirty_extent_copy) { unique_lock<mutex> lock(m_dirty_mutex);
++extent_counter; m_dirty_condvar.wait(lock);
BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
catch_all([&]() {
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
// Page locks slow us down more than copying the data does
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
BEESCOUNT(hash_extent_out);
});
BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
}
} }
void void
BeesHashTable::set_extent_dirty(HashType hash) BeesHashTable::set_extent_dirty_locked(uint64_t extent_index)
{ {
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); // Must already be locked
auto pr = get_extent_range(hash); m_extent_metadata.at(extent_index).m_dirty = true;
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents); // Signal writeback thread
unique_lock<mutex> lock(m_extent_mutex); unique_lock<mutex> dirty_lock(m_dirty_mutex);
m_buckets_dirty.insert(extent_number); m_dirty_condvar.notify_one();
m_condvar.notify_one();
} }
void void
@ -179,13 +203,13 @@ BeesHashTable::prefetch_loop()
size_t unaligned_eof_count = 0; size_t unaligned_eof_count = 0;
for (uint64_t ext = 0; ext < m_extents; ++ext) { for (uint64_t ext = 0; ext < m_extents; ++ext) {
BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); BEESNOTE("prefetching hash table extent " << ext << " of " << m_extents);
catch_all([&]() { catch_all([&]() {
fetch_missing_extent(ext * c_buckets_per_extent); fetch_missing_extent_by_index(ext);
BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); BEESNOTE("analyzing hash table extent " << ext << " of " << m_extents);
bool duplicate_bugs_found = false; bool duplicate_bugs_found = false;
unique_lock<mutex> lock(m_bucket_mutex); auto lock = lock_extent_by_index(ext);
for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) { for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) { if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
duplicate_bugs_found = true; duplicate_bugs_found = true;
@ -214,9 +238,8 @@ BeesHashTable::prefetch_loop()
// Count these instead of calculating the number so we get better stats in case of exceptions // Count these instead of calculating the number so we get better stats in case of exceptions
occupied_count += this_bucket_occupied_count; occupied_count += this_bucket_occupied_count;
} }
lock.unlock();
if (duplicate_bugs_found) { if (duplicate_bugs_found) {
set_extent_dirty(ext); set_extent_dirty_locked(ext);
} }
}); });
} }
@ -291,52 +314,70 @@ BeesHashTable::prefetch_loop()
} }
} }
void size_t
BeesHashTable::fetch_missing_extent(HashType hash) BeesHashTable::hash_to_extent_index(HashType hash)
{
auto pr = get_extent_range(hash);
uint64_t extent_index = reinterpret_cast<const Extent *>(pr.first) - m_extent_ptr;
THROW_CHECK2(runtime_error, extent_index, m_extents, extent_index < m_extents);
return extent_index;
}
BeesHashTable::ExtentMetaData::ExtentMetaData() :
m_mutex_ptr(make_shared<mutex>())
{
}
unique_lock<mutex>
BeesHashTable::lock_extent_by_index(uint64_t extent_index)
{
THROW_CHECK2(out_of_range, extent_index, m_extents, extent_index < m_extents);
return unique_lock<mutex>(*m_extent_metadata.at(extent_index).m_mutex_ptr);
}
unique_lock<mutex>
BeesHashTable::lock_extent_by_hash(HashType hash)
{ {
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash)); BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); return lock_extent_by_index(hash_to_extent_index(hash));
auto pr = get_extent_range(hash); }
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
unique_lock<mutex> lock(m_extent_mutex); void
if (!m_buckets_missing.count(extent_number)) { BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index)
{
BEESNOTE("checking hash extent #" << extent_index << " of " << m_extents << " extents");
auto lock = lock_extent_by_index(extent_index);
if (!m_extent_metadata.at(extent_index).m_missing) {
return; return;
} }
size_t missing_buckets = m_buckets_missing.size();
lock.unlock();
BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
// Acquire blocking lock on this extent only
auto extent_lock = m_extent_lock_set.make_lock(extent_number);
// Check missing again because someone else might have fetched this
// extent for us while we didn't hold any locks
lock.lock();
if (!m_buckets_missing.count(extent_number)) {
BEESCOUNT(hash_extent_in_twice);
return;
}
lock.unlock();
// OK we have to read this extent // OK we have to read this extent
BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); BEESNOTE("fetching hash extent #" << extent_index << " of " << m_extents << " extents");
BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
BEESTRACE("Fetching missing hash extent " << extent_number); uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte; uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
{ // If the read fails don't retry, just go with whatever data we have
m_extent_metadata.at(extent_index).m_missing = false;
catch_all([&]() {
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr); pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
} });
// Only count extents successfully read
BEESCOUNT(hash_extent_in); BEESCOUNT(hash_extent_in);
lock.lock(); }
m_buckets_missing.erase(extent_number);
void
BeesHashTable::fetch_missing_extent_by_hash(HashType hash)
{
uint64_t extent_index = hash_to_extent_index(hash);
BEESNOTE("waiting to fetch hash extent #" << extent_index << " of " << m_extents << " extents");
fetch_missing_extent_by_index(extent_index);
} }
bool bool
@ -358,10 +399,10 @@ BeesHashTable::find_cell(HashType hash)
rv.push_back(toxic_cell); rv.push_back(toxic_cell);
return rv; return rv;
} }
fetch_missing_extent(hash); fetch_missing_extent_by_hash(hash);
BEESTOOLONG("find_cell hash " << BeesHash(hash)); BEESTOOLONG("find_cell hash " << BeesHash(hash));
vector<Cell> rv; vector<Cell> rv;
unique_lock<mutex> lock(m_bucket_mutex); auto lock = lock_extent_by_hash(hash);
auto er = get_cell_range(hash); auto er = get_cell_range(hash);
// FIXME: Weed out zero addresses in the table due to earlier bugs // FIXME: Weed out zero addresses in the table due to earlier bugs
copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; }); copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
@ -377,9 +418,9 @@ BeesHashTable::find_cell(HashType hash)
void void
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
{ {
fetch_missing_extent(hash); fetch_missing_extent_by_hash(hash);
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr); BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
unique_lock<mutex> lock(m_bucket_mutex); auto lock = lock_extent_by_hash(hash);
auto er = get_cell_range(hash); auto er = get_cell_range(hash);
Cell mv(hash, addr); Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv); Cell *ip = find(er.first, er.second, mv);
@ -387,7 +428,7 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
if (found) { if (found) {
// Lookups on invalid addresses really hurt us. Kill it with fire! // Lookups on invalid addresses really hurt us. Kill it with fire!
*ip = Cell(0, 0); *ip = Cell(0, 0);
set_extent_dirty(hash); set_extent_dirty_locked(hash_to_extent_index(hash));
BEESCOUNT(hash_erase); BEESCOUNT(hash_erase);
#if 0 #if 0
if (verify_cell_range(er.first, er.second)) { if (verify_cell_range(er.first, er.second)) {
@ -405,9 +446,9 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
bool bool
BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
{ {
fetch_missing_extent(hash); fetch_missing_extent_by_hash(hash);
BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr)); BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
unique_lock<mutex> lock(m_bucket_mutex); auto lock = lock_extent_by_hash(hash);
auto er = get_cell_range(hash); auto er = get_cell_range(hash);
Cell mv(hash, addr); Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv); Cell *ip = find(er.first, er.second, mv);
@ -437,7 +478,7 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
// There is now a space at the front, insert there if different // There is now a space at the front, insert there if different
if (er.first[0] != mv) { if (er.first[0] != mv) {
er.first[0] = mv; er.first[0] = mv;
set_extent_dirty(hash); set_extent_dirty_locked(hash_to_extent_index(hash));
BEESCOUNT(hash_front); BEESCOUNT(hash_front);
} }
#if 0 #if 0
@ -456,9 +497,9 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
bool bool
BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr) BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
{ {
fetch_missing_extent(hash); fetch_missing_extent_by_hash(hash);
BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr)); BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
unique_lock<mutex> lock(m_bucket_mutex); auto lock = lock_extent_by_hash(hash);
auto er = get_cell_range(hash); auto er = get_cell_range(hash);
Cell mv(hash, addr); Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv); Cell *ip = find(er.first, er.second, mv);
@ -521,14 +562,14 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
case_cond = 5; case_cond = 5;
ret_dirty: ret_dirty:
BEESCOUNT(hash_insert); BEESCOUNT(hash_insert);
set_extent_dirty(hash); set_extent_dirty_locked(hash_to_extent_index(hash));
ret: ret:
#if 0 #if 0
if (verify_cell_range(er.first, er.second, false)) { if (verify_cell_range(er.first, er.second, false)) {
BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
<< " ip " << (ip - er.first) << " " << mv); << " ip " << (ip - er.first) << " " << mv);
// dump_bucket(saved.data(), saved.data() + saved.size()); // dump_bucket_locked(saved.data(), saved.data() + saved.size());
// dump_bucket(er.first, er.second); // dump_bucket_locked(er.first, er.second);
} }
#else #else
(void)case_cond; (void)case_cond;
@ -657,9 +698,7 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
} }
} }
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) { m_extent_metadata.resize(m_extents);
m_buckets_missing.insert(i);
}
m_writeback_thread.exec([&]() { m_writeback_thread.exec([&]() {
writeback_loop(); writeback_loop();

View File

@ -432,18 +432,24 @@ private:
uint64_t m_buckets; uint64_t m_buckets;
uint64_t m_extents; uint64_t m_extents;
uint64_t m_cells; uint64_t m_cells;
set<uint64_t> m_buckets_dirty;
set<uint64_t> m_buckets_missing;
BeesThread m_writeback_thread; BeesThread m_writeback_thread;
BeesThread m_prefetch_thread; BeesThread m_prefetch_thread;
RateLimiter m_flush_rate_limit; RateLimiter m_flush_rate_limit;
mutex m_extent_mutex;
mutex m_bucket_mutex;
condition_variable m_condvar;
set<HashType> m_toxic_hashes; set<HashType> m_toxic_hashes;
BeesStringFile m_stats_file; BeesStringFile m_stats_file;
LockSet<uint64_t> m_extent_lock_set; // Mutex/condvar for the writeback thread
mutex m_dirty_mutex;
condition_variable m_dirty_condvar;
// Per-extent structures
struct ExtentMetaData {
shared_ptr<mutex> m_mutex_ptr; // Access serializer
bool m_dirty = false; // Needs to be written back to disk
bool m_missing = true; // Needs to be read from disk
ExtentMetaData();
};
vector<ExtentMetaData> m_extent_metadata;
void open_file(); void open_file();
void writeback_loop(); void writeback_loop();
@ -451,11 +457,17 @@ private:
void try_mmap_flags(int flags); void try_mmap_flags(int flags);
pair<Cell *, Cell *> get_cell_range(HashType hash); pair<Cell *, Cell *> get_cell_range(HashType hash);
pair<uint8_t *, uint8_t *> get_extent_range(HashType hash); pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
void fetch_missing_extent(HashType hash); void fetch_missing_extent_by_hash(HashType hash);
void set_extent_dirty(HashType hash); void fetch_missing_extent_by_index(uint64_t extent_index);
void set_extent_dirty_locked(uint64_t extent_index);
void flush_dirty_extents(); void flush_dirty_extents();
bool flush_dirty_extent(uint64_t extent_index);
bool is_toxic_hash(HashType h) const; bool is_toxic_hash(HashType h) const;
size_t hash_to_extent_index(HashType ht);
unique_lock<mutex> lock_extent_by_hash(HashType ht);
unique_lock<mutex> lock_extent_by_index(uint64_t extent_index);
BeesHashTable(const BeesHashTable &) = delete; BeesHashTable(const BeesHashTable &) = delete;
BeesHashTable &operator=(const BeesHashTable &) = delete; BeesHashTable &operator=(const BeesHashTable &) = delete;
}; };