mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
Merge branch 'master' into subvol-threads
This commit is contained in:
commit
93fb29a461
1
.gitignore
vendored
1
.gitignore
vendored
@ -12,3 +12,4 @@ make.log
|
|||||||
make.log.new
|
make.log.new
|
||||||
localconf
|
localconf
|
||||||
scripts/beesd
|
scripts/beesd
|
||||||
|
scripts/beesd@.service
|
||||||
|
37
Makefile
37
Makefile
@ -1,5 +1,9 @@
|
|||||||
PREFIX ?= /
|
PREFIX ?= /
|
||||||
LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees
|
LIBDIR ?= lib
|
||||||
|
USR_PREFIX ?= $(PREFIX)/usr
|
||||||
|
USRLIB_PREFIX ?= $(USR_PREFIX)/$(LIBDIR)
|
||||||
|
SYSTEMD_LIB_PREFIX ?= $(PREFIX)/lib/systemd
|
||||||
|
LIBEXEC_PREFIX ?= $(USRLIB_PREFIX)/bees
|
||||||
|
|
||||||
MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null))
|
MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null))
|
||||||
MARKDOWN ?= markdown
|
MARKDOWN ?= markdown
|
||||||
@ -7,10 +11,10 @@ MARKDOWN ?= markdown
|
|||||||
# allow local configuration to override above variables
|
# allow local configuration to override above variables
|
||||||
-include localconf
|
-include localconf
|
||||||
|
|
||||||
default all: lib src test README.html
|
default all: lib src scripts test README.html
|
||||||
|
|
||||||
clean: ## Cleanup
|
clean: ## Cleanup
|
||||||
git clean -dfx
|
git clean -dfx -e localconf
|
||||||
|
|
||||||
.PHONY: lib src test
|
.PHONY: lib src test
|
||||||
|
|
||||||
@ -25,10 +29,7 @@ test: ## Run tests
|
|||||||
test: lib src
|
test: lib src
|
||||||
$(MAKE) -C test
|
$(MAKE) -C test
|
||||||
|
|
||||||
scripts/beesd: scripts/beesd.in
|
scripts/%: scripts/%.in
|
||||||
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
|
|
||||||
|
|
||||||
scripts/beesd@.service: scripts/beesd@.service.in
|
|
||||||
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
|
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
|
||||||
|
|
||||||
scripts: scripts/beesd scripts/beesd@.service
|
scripts: scripts/beesd scripts/beesd@.service
|
||||||
@ -37,16 +38,22 @@ README.html: README.md
|
|||||||
$(MARKDOWN) README.md > README.html.new
|
$(MARKDOWN) README.md > README.html.new
|
||||||
mv -f README.html.new README.html
|
mv -f README.html.new README.html
|
||||||
|
|
||||||
install: ## Install bees + libs
|
install_bees: ## Install bees + libs
|
||||||
install: lib src test
|
install_bees: lib src test
|
||||||
install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so
|
install -Dm644 lib/libcrucible.so $(DESTDIR)$(USRLIB_PREFIX)/libcrucible.so
|
||||||
install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees
|
install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees
|
||||||
|
|
||||||
install_scripts: ## Install scipts
|
install_scripts: ## Install scipts
|
||||||
install_scripts:
|
install_scripts: scripts
|
||||||
install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd
|
install -Dm755 scripts/beesd $(DESTDIR)$(USR_PREFIX)/sbin/beesd
|
||||||
install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample
|
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(PREFIX)/etc/bees/beesd.conf.sample
|
||||||
install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service
|
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_LIB_PREFIX)/system/beesd@.service
|
||||||
|
|
||||||
|
install: ## Install distribution
|
||||||
|
install: install_bees install_scripts
|
||||||
|
|
||||||
help: ## Show help
|
help: ## Show help
|
||||||
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
|
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/'
|
||||||
|
|
||||||
|
bees: all
|
||||||
|
fly: install
|
||||||
|
37
README.md
37
README.md
@ -322,13 +322,41 @@ Not really a bug, but a gotcha nonetheless:
|
|||||||
To limit this delay, Bees closes all FDs in its file FD cache every
|
To limit this delay, Bees closes all FDs in its file FD cache every
|
||||||
15 minutes.
|
15 minutes.
|
||||||
|
|
||||||
Build
|
Installation
|
||||||
-----
|
============
|
||||||
|
|
||||||
|
Bees can be installed by following one these instructions:
|
||||||
|
|
||||||
|
Arch package
|
||||||
|
------------
|
||||||
|
|
||||||
|
Bees is availabe in Arch Linux AUR. Install with:
|
||||||
|
|
||||||
|
`$ pacaur -S bees-git`
|
||||||
|
|
||||||
|
Gentoo ebuild
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Bees is available as a Gentoo ebuild. Just copy `bees-9999.ebuild` from
|
||||||
|
`contrib/gentoo` including the `files` subdirectory to your local
|
||||||
|
overlay category `sys-fs`.
|
||||||
|
|
||||||
|
You can copy the ebuild to match a Bees version number, and it will
|
||||||
|
build that tagged version. It is partly supported since v0.5,
|
||||||
|
previous versions won't work.
|
||||||
|
|
||||||
|
Build from source
|
||||||
|
-----------------
|
||||||
|
|
||||||
Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`,
|
Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`,
|
||||||
which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH`
|
which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH`
|
||||||
on the target system respectively.
|
on the target system respectively.
|
||||||
|
|
||||||
|
It will also generate `scripts/beesd@.service` for systemd users. This
|
||||||
|
service makes use of a helper script `scripts/beesd` to boot the service.
|
||||||
|
Both of the latter use the filesystem UUID to mount the root subvolume
|
||||||
|
within a temporary runtime directory.
|
||||||
|
|
||||||
### Ubuntu 16.04 - 17.04:
|
### Ubuntu 16.04 - 17.04:
|
||||||
`$ apt -y install build-essential btrfs-tools uuid-dev markdown && make`
|
`$ apt -y install build-essential btrfs-tools uuid-dev markdown && make`
|
||||||
|
|
||||||
@ -360,10 +388,15 @@ Dependencies
|
|||||||
|
|
||||||
* markdown
|
* markdown
|
||||||
|
|
||||||
|
* util-linux version that provides `blkid` command for the helper
|
||||||
|
script `scripts/beesd` to work
|
||||||
|
|
||||||
Setup
|
Setup
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
If you don't want to use the helper script `scripts/beesd` to setup and
|
||||||
|
configure bees, here's how you manually setup bees.
|
||||||
|
|
||||||
Create a directory for bees state files:
|
Create a directory for bees state files:
|
||||||
|
|
||||||
export BEESHOME=/some/path
|
export BEESHOME=/some/path
|
||||||
|
42
contrib/gentoo/bees-9999.ebuild
Normal file
42
contrib/gentoo/bees-9999.ebuild
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# Copyright 1999-2018 Gentoo Foundation
|
||||||
|
# Distributed under the terms of the GNU General Public License v2
|
||||||
|
|
||||||
|
EAPI=6
|
||||||
|
|
||||||
|
inherit git-r3 eutils multilib
|
||||||
|
|
||||||
|
DESCRIPTION="Best-Effort Extent-Same, a btrfs dedup agent"
|
||||||
|
HOMEPAGE="https://github.com/Zygo/bees"
|
||||||
|
|
||||||
|
if [[ ${PV} == "9999" ]] ; then
|
||||||
|
EGIT_REPO_URI="https://github.com/kakra/bees.git"
|
||||||
|
EGIT_BRANCH="integration"
|
||||||
|
else
|
||||||
|
IUSE=""
|
||||||
|
|
||||||
|
SRC_URI="https://github.com/Zygo/bees/archive/v${PV}.tar.gz -> bees-${PV}.tar.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
PATCHES="
|
||||||
|
${FILESDIR}/v0.5-gentoo_build.patch
|
||||||
|
"
|
||||||
|
|
||||||
|
LICENSE="GPL-3"
|
||||||
|
SLOT="0"
|
||||||
|
KEYWORDS=""
|
||||||
|
IUSE=""
|
||||||
|
|
||||||
|
DEPEND="
|
||||||
|
>=sys-apps/util-linux-2.30.2
|
||||||
|
>=sys-devel/gcc-4.9
|
||||||
|
>=sys-fs/btrfs-progs-4.1
|
||||||
|
"
|
||||||
|
RDEPEND="${DEPEND}"
|
||||||
|
|
||||||
|
DOCS="README.md COPYING"
|
||||||
|
HTML_DOCS="README.html"
|
||||||
|
|
||||||
|
src_prepare() {
|
||||||
|
default
|
||||||
|
echo LIBDIR=$(get_libdir) >>${S}/localconf
|
||||||
|
}
|
20
contrib/gentoo/files/v0.5-gentoo_build.patch
Normal file
20
contrib/gentoo/files/v0.5-gentoo_build.patch
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
diff --git a/localconf b/localconf
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..7705cbb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/localconf
|
||||||
|
@@ -0,0 +1,2 @@
|
||||||
|
+PREFIX=/
|
||||||
|
+LIBEXEC_PREFIX=/usr/libexec
|
||||||
|
diff --git a/makeflags b/makeflags
|
||||||
|
index f5983cb..0348623 100644
|
||||||
|
--- a/makeflags
|
||||||
|
+++ b/makeflags
|
||||||
|
@@ -1,4 +1,3 @@
|
||||||
|
-CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64
|
||||||
|
-# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
||||||
|
-CFLAGS = $(CCFLAGS) -std=c99
|
||||||
|
-CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast
|
||||||
|
+CCFLAGS = -O3 -I../include -fpic -D_FILE_OFFSET_BITS=64
|
||||||
|
+CFLAGS += $(CCFLAGS) -std=c99
|
||||||
|
+CXXFLAGS += $(CCFLAGS) -std=c++11 -Wold-style-cast
|
@ -1,3 +1,5 @@
|
|||||||
|
TAG := $(shell git describe --always --dirty || echo UNKNOWN)
|
||||||
|
|
||||||
default: libcrucible.so
|
default: libcrucible.so
|
||||||
|
|
||||||
OBJS = \
|
OBJS = \
|
||||||
@ -23,13 +25,13 @@ depends.mk: *.cc
|
|||||||
mv -fv depends.mk.new depends.mk
|
mv -fv depends.mk.new depends.mk
|
||||||
|
|
||||||
.version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h
|
.version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h
|
||||||
echo "namespace crucible { const char *VERSION = \"$(shell git describe --always --dirty || echo UNKNOWN)\"; }" > .version.new.cc
|
echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > .version.new.cc
|
||||||
mv -f .version.new.cc .version.cc
|
mv -f .version.new.cc .version.cc
|
||||||
|
|
||||||
-include depends.mk
|
-include depends.mk
|
||||||
|
|
||||||
%.o: %.cc ../include/crucible/%.h
|
%.o: %.cc ../include/crucible/%.h
|
||||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
$(CXX) $(CXXFLAGS) -fPIC -o $@ -c $<
|
||||||
|
|
||||||
libcrucible.so: $(OBJS) Makefile
|
libcrucible.so: $(OBJS) Makefile
|
||||||
$(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -luuid
|
$(CXX) $(LDFLAGS) -fPIC -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64
|
||||||
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
||||||
CFLAGS = $(CCFLAGS) -std=c99
|
CFLAGS = $(CCFLAGS) -std=c99
|
||||||
CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast
|
CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast
|
||||||
|
@ -115,4 +115,5 @@ fi
|
|||||||
|
|
||||||
MNT_DIR="${MNT_DIR//\/\//\/}"
|
MNT_DIR="${MNT_DIR//\/\//\/}"
|
||||||
|
|
||||||
cd $MNT_DIR && exec @LIBEXEC_PREFIX@/bees ${ARGUMENTS[@]} $OPTIONS "$MNT_DIR"
|
cd "$MNT_DIR"
|
||||||
|
@LIBEXEC_PREFIX@/bees "${ARGUMENTS[@]}" $OPTIONS "$MNT_DIR"
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i
|
Description=Bees (%i)
|
||||||
After=local-fs.target
|
Documentation=https://github.com/Zygo/bees
|
||||||
|
After=sysinit.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
@ -21,4 +22,4 @@ CPUAccounting=true
|
|||||||
MemoryAccounting=true
|
MemoryAccounting=true
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=local-fs.target
|
WantedBy=basic.target
|
||||||
|
@ -8,7 +8,7 @@ all: $(PROGRAMS) depends.mk
|
|||||||
include ../makeflags
|
include ../makeflags
|
||||||
|
|
||||||
LIBS = -lcrucible -lpthread
|
LIBS = -lcrucible -lpthread
|
||||||
LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib)
|
LDFLAGS = -L../lib
|
||||||
|
|
||||||
depends.mk: Makefile *.cc
|
depends.mk: Makefile *.cc
|
||||||
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new
|
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new
|
||||||
|
223
src/bees-hash.cc
223
src/bees-hash.cc
@ -24,14 +24,16 @@ operator<<(ostream &os, const BeesHashTable::Cell &bhte)
|
|||||||
<< BeesAddress(bhte.e_addr) << " }";
|
<< BeesAddress(bhte.e_addr) << " }";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static
|
||||||
void
|
void
|
||||||
dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
|
dump_bucket_locked(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
|
||||||
{
|
{
|
||||||
// Must be called while holding m_bucket_mutex
|
|
||||||
for (auto i = p; i < q; ++i) {
|
for (auto i = p; i < q; ++i) {
|
||||||
BEESLOG("Entry " << i - p << " " << *i);
|
BEESLOG("Entry " << i - p << " " << *i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const bool VERIFY_CLEARS_BUGS = false;
|
const bool VERIFY_CLEARS_BUGS = false;
|
||||||
|
|
||||||
@ -91,52 +93,74 @@ BeesHashTable::get_extent_range(HashType hash)
|
|||||||
return make_pair(bp, ep);
|
return make_pair(bp, ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
||||||
|
{
|
||||||
|
BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
|
||||||
|
auto lock = lock_extent_by_index(extent_index);
|
||||||
|
|
||||||
|
// Not dirty, nothing to do
|
||||||
|
if (!m_extent_metadata.at(extent_index).m_dirty) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool wrote_extent = false;
|
||||||
|
|
||||||
|
catch_all([&]() {
|
||||||
|
uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
|
||||||
|
uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
||||||
|
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
|
||||||
|
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
|
||||||
|
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
||||||
|
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||||
|
// Copy the extent because we might be stuck writing for a while
|
||||||
|
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
|
||||||
|
|
||||||
|
// Mark extent non-dirty while we still hold the lock
|
||||||
|
m_extent_metadata.at(extent_index).m_dirty = false;
|
||||||
|
|
||||||
|
// Release the lock
|
||||||
|
lock.unlock();
|
||||||
|
|
||||||
|
// Write the extent (or not)
|
||||||
|
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
|
||||||
|
BEESCOUNT(hash_extent_out);
|
||||||
|
|
||||||
|
wrote_extent = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
|
||||||
|
return wrote_extent;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BeesHashTable::flush_dirty_extents()
|
BeesHashTable::flush_dirty_extents()
|
||||||
{
|
{
|
||||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||||
|
|
||||||
unique_lock<mutex> lock(m_extent_mutex);
|
uint64_t wrote_extents = 0;
|
||||||
auto dirty_extent_copy = m_buckets_dirty;
|
for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) {
|
||||||
m_buckets_dirty.clear();
|
if (flush_dirty_extent(extent_index)) {
|
||||||
if (dirty_extent_copy.empty()) {
|
++wrote_extents;
|
||||||
BEESNOTE("idle");
|
}
|
||||||
m_condvar.wait(lock);
|
|
||||||
return; // please call later, i.e. immediately
|
|
||||||
}
|
}
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
size_t extent_counter = 0;
|
BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents");
|
||||||
for (auto extent_number : dirty_extent_copy) {
|
unique_lock<mutex> lock(m_dirty_mutex);
|
||||||
++extent_counter;
|
m_dirty_condvar.wait(lock);
|
||||||
BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
|
|
||||||
catch_all([&]() {
|
|
||||||
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
|
|
||||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
|
|
||||||
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
|
|
||||||
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
|
|
||||||
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
|
||||||
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
|
||||||
// Page locks slow us down more than copying the data does
|
|
||||||
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
|
|
||||||
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
|
|
||||||
BEESCOUNT(hash_extent_out);
|
|
||||||
});
|
|
||||||
BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
|
|
||||||
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BeesHashTable::set_extent_dirty(HashType hash)
|
BeesHashTable::set_extent_dirty_locked(uint64_t extent_index)
|
||||||
{
|
{
|
||||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
// Must already be locked
|
||||||
auto pr = get_extent_range(hash);
|
m_extent_metadata.at(extent_index).m_dirty = true;
|
||||||
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
|
|
||||||
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
|
// Signal writeback thread
|
||||||
unique_lock<mutex> lock(m_extent_mutex);
|
unique_lock<mutex> dirty_lock(m_dirty_mutex);
|
||||||
m_buckets_dirty.insert(extent_number);
|
m_dirty_condvar.notify_one();
|
||||||
m_condvar.notify_one();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -179,13 +203,13 @@ BeesHashTable::prefetch_loop()
|
|||||||
size_t unaligned_eof_count = 0;
|
size_t unaligned_eof_count = 0;
|
||||||
|
|
||||||
for (uint64_t ext = 0; ext < m_extents; ++ext) {
|
for (uint64_t ext = 0; ext < m_extents; ++ext) {
|
||||||
BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
|
BEESNOTE("prefetching hash table extent " << ext << " of " << m_extents);
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
fetch_missing_extent(ext * c_buckets_per_extent);
|
fetch_missing_extent_by_index(ext);
|
||||||
|
|
||||||
BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
|
BEESNOTE("analyzing hash table extent " << ext << " of " << m_extents);
|
||||||
bool duplicate_bugs_found = false;
|
bool duplicate_bugs_found = false;
|
||||||
unique_lock<mutex> lock(m_bucket_mutex);
|
auto lock = lock_extent_by_index(ext);
|
||||||
for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
|
for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
|
||||||
if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
|
if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
|
||||||
duplicate_bugs_found = true;
|
duplicate_bugs_found = true;
|
||||||
@ -214,9 +238,8 @@ BeesHashTable::prefetch_loop()
|
|||||||
// Count these instead of calculating the number so we get better stats in case of exceptions
|
// Count these instead of calculating the number so we get better stats in case of exceptions
|
||||||
occupied_count += this_bucket_occupied_count;
|
occupied_count += this_bucket_occupied_count;
|
||||||
}
|
}
|
||||||
lock.unlock();
|
|
||||||
if (duplicate_bugs_found) {
|
if (duplicate_bugs_found) {
|
||||||
set_extent_dirty(ext);
|
set_extent_dirty_locked(ext);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -291,52 +314,70 @@ BeesHashTable::prefetch_loop()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
size_t
|
||||||
BeesHashTable::fetch_missing_extent(HashType hash)
|
BeesHashTable::hash_to_extent_index(HashType hash)
|
||||||
|
{
|
||||||
|
auto pr = get_extent_range(hash);
|
||||||
|
uint64_t extent_index = reinterpret_cast<const Extent *>(pr.first) - m_extent_ptr;
|
||||||
|
THROW_CHECK2(runtime_error, extent_index, m_extents, extent_index < m_extents);
|
||||||
|
return extent_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
BeesHashTable::ExtentMetaData::ExtentMetaData() :
|
||||||
|
m_mutex_ptr(make_shared<mutex>())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_lock<mutex>
|
||||||
|
BeesHashTable::lock_extent_by_index(uint64_t extent_index)
|
||||||
|
{
|
||||||
|
THROW_CHECK2(out_of_range, extent_index, m_extents, extent_index < m_extents);
|
||||||
|
return unique_lock<mutex>(*m_extent_metadata.at(extent_index).m_mutex_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_lock<mutex>
|
||||||
|
BeesHashTable::lock_extent_by_hash(HashType hash)
|
||||||
{
|
{
|
||||||
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
|
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
|
||||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
return lock_extent_by_index(hash_to_extent_index(hash));
|
||||||
auto pr = get_extent_range(hash);
|
}
|
||||||
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
|
|
||||||
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
|
|
||||||
|
|
||||||
unique_lock<mutex> lock(m_extent_mutex);
|
void
|
||||||
if (!m_buckets_missing.count(extent_number)) {
|
BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index)
|
||||||
|
{
|
||||||
|
BEESNOTE("checking hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
auto lock = lock_extent_by_index(extent_index);
|
||||||
|
if (!m_extent_metadata.at(extent_index).m_missing) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t missing_buckets = m_buckets_missing.size();
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
|
|
||||||
|
|
||||||
// Acquire blocking lock on this extent only
|
|
||||||
auto extent_lock = m_extent_lock_set.make_lock(extent_number);
|
|
||||||
|
|
||||||
// Check missing again because someone else might have fetched this
|
|
||||||
// extent for us while we didn't hold any locks
|
|
||||||
lock.lock();
|
|
||||||
if (!m_buckets_missing.count(extent_number)) {
|
|
||||||
BEESCOUNT(hash_extent_in_twice);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
// OK we have to read this extent
|
// OK we have to read this extent
|
||||||
BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
|
BEESNOTE("fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
|
||||||
BEESTRACE("Fetching missing hash extent " << extent_number);
|
uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
|
||||||
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
|
uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
||||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
|
|
||||||
|
|
||||||
{
|
// If the read fails don't retry, just go with whatever data we have
|
||||||
|
m_extent_metadata.at(extent_index).m_missing = false;
|
||||||
|
|
||||||
|
catch_all([&]() {
|
||||||
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||||
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
|
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
|
||||||
}
|
});
|
||||||
|
|
||||||
|
// Only count extents successfully read
|
||||||
BEESCOUNT(hash_extent_in);
|
BEESCOUNT(hash_extent_in);
|
||||||
lock.lock();
|
}
|
||||||
m_buckets_missing.erase(extent_number);
|
|
||||||
|
void
|
||||||
|
BeesHashTable::fetch_missing_extent_by_hash(HashType hash)
|
||||||
|
{
|
||||||
|
uint64_t extent_index = hash_to_extent_index(hash);
|
||||||
|
BEESNOTE("waiting to fetch hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
|
||||||
|
fetch_missing_extent_by_index(extent_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -358,10 +399,10 @@ BeesHashTable::find_cell(HashType hash)
|
|||||||
rv.push_back(toxic_cell);
|
rv.push_back(toxic_cell);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
fetch_missing_extent(hash);
|
fetch_missing_extent_by_hash(hash);
|
||||||
BEESTOOLONG("find_cell hash " << BeesHash(hash));
|
BEESTOOLONG("find_cell hash " << BeesHash(hash));
|
||||||
vector<Cell> rv;
|
vector<Cell> rv;
|
||||||
unique_lock<mutex> lock(m_bucket_mutex);
|
auto lock = lock_extent_by_hash(hash);
|
||||||
auto er = get_cell_range(hash);
|
auto er = get_cell_range(hash);
|
||||||
// FIXME: Weed out zero addresses in the table due to earlier bugs
|
// FIXME: Weed out zero addresses in the table due to earlier bugs
|
||||||
copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
|
copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
|
||||||
@ -377,9 +418,9 @@ BeesHashTable::find_cell(HashType hash)
|
|||||||
void
|
void
|
||||||
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
|
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
|
||||||
{
|
{
|
||||||
fetch_missing_extent(hash);
|
fetch_missing_extent_by_hash(hash);
|
||||||
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
|
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
|
||||||
unique_lock<mutex> lock(m_bucket_mutex);
|
auto lock = lock_extent_by_hash(hash);
|
||||||
auto er = get_cell_range(hash);
|
auto er = get_cell_range(hash);
|
||||||
Cell mv(hash, addr);
|
Cell mv(hash, addr);
|
||||||
Cell *ip = find(er.first, er.second, mv);
|
Cell *ip = find(er.first, er.second, mv);
|
||||||
@ -387,7 +428,7 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
|
|||||||
if (found) {
|
if (found) {
|
||||||
// Lookups on invalid addresses really hurt us. Kill it with fire!
|
// Lookups on invalid addresses really hurt us. Kill it with fire!
|
||||||
*ip = Cell(0, 0);
|
*ip = Cell(0, 0);
|
||||||
set_extent_dirty(hash);
|
set_extent_dirty_locked(hash_to_extent_index(hash));
|
||||||
BEESCOUNT(hash_erase);
|
BEESCOUNT(hash_erase);
|
||||||
#if 0
|
#if 0
|
||||||
if (verify_cell_range(er.first, er.second)) {
|
if (verify_cell_range(er.first, er.second)) {
|
||||||
@ -405,9 +446,9 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
|
|||||||
bool
|
bool
|
||||||
BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
||||||
{
|
{
|
||||||
fetch_missing_extent(hash);
|
fetch_missing_extent_by_hash(hash);
|
||||||
BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
|
BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
|
||||||
unique_lock<mutex> lock(m_bucket_mutex);
|
auto lock = lock_extent_by_hash(hash);
|
||||||
auto er = get_cell_range(hash);
|
auto er = get_cell_range(hash);
|
||||||
Cell mv(hash, addr);
|
Cell mv(hash, addr);
|
||||||
Cell *ip = find(er.first, er.second, mv);
|
Cell *ip = find(er.first, er.second, mv);
|
||||||
@ -437,7 +478,7 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
|||||||
// There is now a space at the front, insert there if different
|
// There is now a space at the front, insert there if different
|
||||||
if (er.first[0] != mv) {
|
if (er.first[0] != mv) {
|
||||||
er.first[0] = mv;
|
er.first[0] = mv;
|
||||||
set_extent_dirty(hash);
|
set_extent_dirty_locked(hash_to_extent_index(hash));
|
||||||
BEESCOUNT(hash_front);
|
BEESCOUNT(hash_front);
|
||||||
}
|
}
|
||||||
#if 0
|
#if 0
|
||||||
@ -456,9 +497,9 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
|||||||
bool
|
bool
|
||||||
BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
||||||
{
|
{
|
||||||
fetch_missing_extent(hash);
|
fetch_missing_extent_by_hash(hash);
|
||||||
BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
|
BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
|
||||||
unique_lock<mutex> lock(m_bucket_mutex);
|
auto lock = lock_extent_by_hash(hash);
|
||||||
auto er = get_cell_range(hash);
|
auto er = get_cell_range(hash);
|
||||||
Cell mv(hash, addr);
|
Cell mv(hash, addr);
|
||||||
Cell *ip = find(er.first, er.second, mv);
|
Cell *ip = find(er.first, er.second, mv);
|
||||||
@ -521,14 +562,14 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
|||||||
case_cond = 5;
|
case_cond = 5;
|
||||||
ret_dirty:
|
ret_dirty:
|
||||||
BEESCOUNT(hash_insert);
|
BEESCOUNT(hash_insert);
|
||||||
set_extent_dirty(hash);
|
set_extent_dirty_locked(hash_to_extent_index(hash));
|
||||||
ret:
|
ret:
|
||||||
#if 0
|
#if 0
|
||||||
if (verify_cell_range(er.first, er.second, false)) {
|
if (verify_cell_range(er.first, er.second, false)) {
|
||||||
BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
|
BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
|
||||||
<< " ip " << (ip - er.first) << " " << mv);
|
<< " ip " << (ip - er.first) << " " << mv);
|
||||||
// dump_bucket(saved.data(), saved.data() + saved.size());
|
// dump_bucket_locked(saved.data(), saved.data() + saved.size());
|
||||||
// dump_bucket(er.first, er.second);
|
// dump_bucket_locked(er.first, er.second);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)case_cond;
|
(void)case_cond;
|
||||||
@ -657,9 +698,7 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
|
m_extent_metadata.resize(m_extents);
|
||||||
m_buckets_missing.insert(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_writeback_thread.exec([&]() {
|
m_writeback_thread.exec([&]() {
|
||||||
writeback_loop();
|
writeback_loop();
|
||||||
|
28
src/bees.h
28
src/bees.h
@ -432,18 +432,24 @@ private:
|
|||||||
uint64_t m_buckets;
|
uint64_t m_buckets;
|
||||||
uint64_t m_extents;
|
uint64_t m_extents;
|
||||||
uint64_t m_cells;
|
uint64_t m_cells;
|
||||||
set<uint64_t> m_buckets_dirty;
|
|
||||||
set<uint64_t> m_buckets_missing;
|
|
||||||
BeesThread m_writeback_thread;
|
BeesThread m_writeback_thread;
|
||||||
BeesThread m_prefetch_thread;
|
BeesThread m_prefetch_thread;
|
||||||
RateLimiter m_flush_rate_limit;
|
RateLimiter m_flush_rate_limit;
|
||||||
mutex m_extent_mutex;
|
|
||||||
mutex m_bucket_mutex;
|
|
||||||
condition_variable m_condvar;
|
|
||||||
set<HashType> m_toxic_hashes;
|
set<HashType> m_toxic_hashes;
|
||||||
BeesStringFile m_stats_file;
|
BeesStringFile m_stats_file;
|
||||||
|
|
||||||
LockSet<uint64_t> m_extent_lock_set;
|
// Mutex/condvar for the writeback thread
|
||||||
|
mutex m_dirty_mutex;
|
||||||
|
condition_variable m_dirty_condvar;
|
||||||
|
|
||||||
|
// Per-extent structures
|
||||||
|
struct ExtentMetaData {
|
||||||
|
shared_ptr<mutex> m_mutex_ptr; // Access serializer
|
||||||
|
bool m_dirty = false; // Needs to be written back to disk
|
||||||
|
bool m_missing = true; // Needs to be read from disk
|
||||||
|
ExtentMetaData();
|
||||||
|
};
|
||||||
|
vector<ExtentMetaData> m_extent_metadata;
|
||||||
|
|
||||||
void open_file();
|
void open_file();
|
||||||
void writeback_loop();
|
void writeback_loop();
|
||||||
@ -451,11 +457,17 @@ private:
|
|||||||
void try_mmap_flags(int flags);
|
void try_mmap_flags(int flags);
|
||||||
pair<Cell *, Cell *> get_cell_range(HashType hash);
|
pair<Cell *, Cell *> get_cell_range(HashType hash);
|
||||||
pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
|
pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
|
||||||
void fetch_missing_extent(HashType hash);
|
void fetch_missing_extent_by_hash(HashType hash);
|
||||||
void set_extent_dirty(HashType hash);
|
void fetch_missing_extent_by_index(uint64_t extent_index);
|
||||||
|
void set_extent_dirty_locked(uint64_t extent_index);
|
||||||
void flush_dirty_extents();
|
void flush_dirty_extents();
|
||||||
|
bool flush_dirty_extent(uint64_t extent_index);
|
||||||
bool is_toxic_hash(HashType h) const;
|
bool is_toxic_hash(HashType h) const;
|
||||||
|
|
||||||
|
size_t hash_to_extent_index(HashType ht);
|
||||||
|
unique_lock<mutex> lock_extent_by_hash(HashType ht);
|
||||||
|
unique_lock<mutex> lock_extent_by_index(uint64_t extent_index);
|
||||||
|
|
||||||
BeesHashTable(const BeesHashTable &) = delete;
|
BeesHashTable(const BeesHashTable &) = delete;
|
||||||
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user