From 6c67ae0d5e50cf4dec43f4e8fd3e4d0450231ce9 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Mon, 8 Jan 2018 22:31:30 +0100 Subject: [PATCH 01/24] Don't zap localconf in "make clean" When you run "make clean", localconf is being removed. This is probably in most cases not intentional. Signed-off-by: Kai Krakow --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5da1465..c0ff5a3 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ MARKDOWN ?= markdown default all: lib src test README.html clean: ## Cleanup - git clean -dfx + git clean -dfx -e localconf .PHONY: lib src test From abeb6e74b2354763b47c29ba04bb9447e6ed2ff5 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Fri, 24 Nov 2017 14:43:57 +0100 Subject: [PATCH 02/24] Add scripts to "make all" target This prevents scripts being generated by "root" during "sudo make install" phase. Signed-off-by: Kai Krakow --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c0ff5a3..bce6d99 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MARKDOWN ?= markdown # allow local configuration to override above variables -include localconf -default all: lib src test README.html +default all: lib src scripts test README.html clean: ## Cleanup git clean -dfx -e localconf From f7f99f52b59f0effa194bbedb22afc5b65e4e350 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Fri, 24 Nov 2017 14:43:57 +0100 Subject: [PATCH 03/24] Generalize sed invocation rule Remove the redundant sed call by generalizing the rule to apply sed to .in templates. Signed-off-by: Kai Krakow --- Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Makefile b/Makefile index bce6d99..7fa6d05 100644 --- a/Makefile +++ b/Makefile @@ -25,10 +25,7 @@ test: ## Run tests test: lib src $(MAKE) -C test -scripts/beesd: scripts/beesd.in - sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" - -scripts/beesd@.service: scripts/beesd@.service.in +scripts/%: scripts/%.in sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" scripts: scripts/beesd scripts/beesd@.service From 953c158868831ea81cc5141c824b5db0be7c9347 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:24:35 +0100 Subject: [PATCH 04/24] systemd: Don't start in system-update.target Due to bees installing into the local-fs.target, bees also runs during system-update.target. This should not be done, system-update.target is meant as an isolated bootup mode for applying updates offline, that is: Only essential services are running. Fix this by making it WantedBy basic.target instead. According to system-update.target and "man bootup", system-update.target pulls in sysinit.target, as does basic.target. So essentially, basic.target is not part of the system-update.target transaction. Signed-off-by: Kai Krakow --- scripts/beesd@.service.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/beesd@.service.in b/scripts/beesd@.service.in index 55df99c..185620d 100644 --- a/scripts/beesd@.service.in +++ b/scripts/beesd@.service.in @@ -21,4 +21,4 @@ CPUAccounting=true MemoryAccounting=true [Install] -WantedBy=local-fs.target +WantedBy=basic.target From 78f96a9fbdb696390422fcb2da1dc43d4c8c615f Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:29:05 +0100 Subject: [PATCH 05/24] systemd: Don't start without essential system services Starting bees right after local-fs.target is probably not what we want, as basic setup of the system might not have been done (like udev, cryptsetup, sysctl, swap, etc). Let's start only after sysinit.target instead which guarantees that all basic setup has been done, most importantly, sysctl, udev, and swap have been setup which may apply important tweaks, configuration, and tuning. Signed-off-by: Kai Krakow --- scripts/beesd@.service.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/beesd@.service.in b/scripts/beesd@.service.in index 185620d..6115c62 100644 --- a/scripts/beesd@.service.in +++ b/scripts/beesd@.service.in @@ -1,6 +1,6 @@ [Unit] Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i -After=local-fs.target +After=sysinit.target [Service] Type=simple From b959af1a15f2a19f188b9483c7eda3f05f7e30ec Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:32:17 +0100 Subject: [PATCH 06/24] systemd: Provide URL and better description Let's direct users to the support site when they ask systemd for help about the service unit, or by looking at error messages. Also, let's adjust the description to be more pleasing to the eyes. The previous long description with uncommon formatting really stuck out in the boot logs. Signed-off-by: Kai Krakow --- scripts/beesd@.service.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/beesd@.service.in b/scripts/beesd@.service.in index 6115c62..5aeff76 100644 --- a/scripts/beesd@.service.in +++ b/scripts/beesd@.service.in @@ -1,5 +1,6 @@ [Unit] -Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i +Description=Bees (%i) +Documentation=https://github.com/Zygo/bees After=sysinit.target [Service] From 8e2139d6edd8e2c57da7695da884bcbf541f338e Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:51:18 +0100 Subject: [PATCH 07/24] Makefile: depend install_scripts on scripts For consistency with the other install target, let's depend install_scripts on its build targets. Signed-off-by: Kai Krakow --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7fa6d05..24fbdaf 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ install: lib src test install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees install_scripts: ## Install scipts -install_scripts: +install_scripts: scripts install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service From f0c516f33b60310a87173e768c97df8c852e5c75 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:54:41 +0100 Subject: [PATCH 08/24] Makefile: let "make install" install the complete distribution It happened more than once that I ran just "make install" only, which doesn't install the scripts. Let's fix this by renaming the previous install target to install_bees, and then make a new install target which depends on each install target and thus installs the complete distribution. It doesn't hurt to install those few scripts. I don't see the point in separating the install targets as it was previously done. Signed-off-by: Kai Krakow --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 24fbdaf..bc5d8f5 100644 --- a/Makefile +++ b/Makefile @@ -34,8 +34,8 @@ README.html: README.md $(MARKDOWN) README.md > README.html.new mv -f README.html.new README.html -install: ## Install bees + libs -install: lib src test +install_bees: ## Install bees + libs +install_bees: lib src test install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees @@ -45,5 +45,8 @@ install_scripts: scripts install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service +install: ## Install distribution +install: install_bees install_scripts + help: ## Show help @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/' From 92aa13a6ae882fad955e532c47bb58e49f7f7c0d Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 9 Jan 2018 01:56:22 +0100 Subject: [PATCH 09/24] Add beesd@.service to gitignore It's a generated file. We should ignore it, so it won't be accidently be checked in. Signed-off-by: Kai Krakow --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cb02a07..e00f61d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ make.log make.log.new localconf scripts/beesd +scripts/beesd@.service From ff9e0e3571ab34ff61f332a8a4bb1a96cf036222 Mon Sep 17 00:00:00 2001 From: Timofey Titovets Date: Tue, 9 Jan 2018 22:33:57 +0300 Subject: [PATCH 10/24] Fix: exec bees - breaks bash trap handling of umount bees workdir Signed-off-by: Timofey Titovets --- scripts/beesd.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/beesd.in b/scripts/beesd.in index b4c466b..269f77f 100755 --- a/scripts/beesd.in +++ b/scripts/beesd.in @@ -115,4 +115,5 @@ fi MNT_DIR="${MNT_DIR//\/\//\/}" -cd $MNT_DIR && exec @LIBEXEC_PREFIX@/bees ${ARGUMENTS[@]} $OPTIONS "$MNT_DIR" +cd "$MNT_DIR" +@LIBEXEC_PREFIX@/bees "${ARGUMENTS[@]}" $OPTIONS "$MNT_DIR" From 333b2f77460f7a61b0937ee23796f8ad2d1314ea Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 20:08:23 +0100 Subject: [PATCH 11/24] Makefile improvement Now you can make bees fly as pointed out in the README... ;-) --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index bc5d8f5..0eb3cdf 100644 --- a/Makefile +++ b/Makefile @@ -50,3 +50,6 @@ install: install_bees install_scripts help: ## Show help @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/' + +bees: all +fly: install From 1fcf07cc2a73100eaac4a531bc84e98a3aa2e305 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 20:41:00 +0100 Subject: [PATCH 12/24] Installation: Prepare README Rename a section in preparation for a new install section. Signed-off-by: Kai Krakow --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d483f7..196ed1d 100644 --- a/README.md +++ b/README.md @@ -322,8 +322,8 @@ Not really a bug, but a gotcha nonetheless: To limit this delay, Bees closes all FDs in its file FD cache every 15 minutes. -Build ------ +Build from source +----------------- Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` From 361ef0bebfa566a4b6e576feb5693731857834f7 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 20:41:37 +0100 Subject: [PATCH 13/24] Installation: Add new section to README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 196ed1d..a1eef32 100644 --- a/README.md +++ b/README.md @@ -322,6 +322,11 @@ Not really a bug, but a gotcha nonetheless: To limit this delay, Bees closes all FDs in its file FD cache every 15 minutes. +Installation +============ + +Bees can be installed by following one these instructions: + Build from source ----------------- From a465d997bdac16e7df996c9356a89df7cf50f057 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 20:41:56 +0100 Subject: [PATCH 14/24] Makefile: Document Makefile changes --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index a1eef32..9bf3312 100644 --- a/README.md +++ b/README.md @@ -334,6 +334,11 @@ Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target system respectively. +It will also generate `scripts/beesd@.service` for systemd users. This +service makes use of a helper script `scripts/beesd` to boot the service. +Both of the latter use the filesystem UUID to mount the root subvolume +within a temporary runtime directory. + ### Ubuntu 16.04 - 17.04: `$ apt -y install build-essential btrfs-tools uuid-dev markdown && make` From 0fce10991b906f1863f60ff45a5775e2cbd91a49 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 20:43:54 +0100 Subject: [PATCH 15/24] Installation: Add Arch Linux instructions Closes #34 Signed-off-by: Kai Krakow --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 9bf3312..71c14ce 100644 --- a/README.md +++ b/README.md @@ -327,6 +327,13 @@ Installation Bees can be installed by following one these instructions: +Arch package +------------ + +Bees is availabe Arch Linux AUR. Install with: + +`$ pacaur -S bees-git` + Build from source ----------------- From 421641e2428f0a352f758a123211f1ae91a1e9e0 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 21:06:49 +0100 Subject: [PATCH 16/24] Makefile: Document scripts/beesd Add a paragraph about the helper script `scripts/beesd` to automatically setup and configure bees. Signed-off-by: Kai Krakow --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 71c14ce..9ca855d 100644 --- a/README.md +++ b/README.md @@ -381,6 +381,9 @@ Dependencies Setup ----- +If you don't want to use the helper script `scripts/beesd` to setup and +configure bees, here's how you manually setup bees. + Create a directory for bees state files: export BEESHOME=/some/path From f0e02478ef77df6049c29fcda5cef72afecd52ee Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 21:47:01 +0100 Subject: [PATCH 17/24] Installation: Document optional dependency on blkid If using `scripts/beesd`, we need `blkid` which is part of util-linux. It should be available on every distribution but let's document it anyway. Signed-off-by: Kai Krakow --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 9ca855d..85f208c 100644 --- a/README.md +++ b/README.md @@ -377,6 +377,8 @@ Dependencies * markdown +* util-linux version that provides `blkid` command for the helper + script `scripts/beesd` to work Setup ----- From 60cd9c6165cfcc102d5759c40417e871bf971860 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 22:35:22 +0100 Subject: [PATCH 18/24] Installation: Introduce DESTDIR into Makefile In Gentoo, usage of DESTDIR is automatically handled by the build system to support installation into a clean image from which the package is created. Thus, let's add DESTDIR to the install targets. One can now correctly install bees with packaging systems simply by running: $ DESTDIR=/tmp/bees-image make all install This will no longer mess up with the PREFIX setting. CC: Timofey Titovets Signed-off-by: Kai Krakow --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 0eb3cdf..899748c 100644 --- a/Makefile +++ b/Makefile @@ -36,14 +36,14 @@ README.html: README.md install_bees: ## Install bees + libs install_bees: lib src test - install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so - install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees + install -Dm644 lib/libcrucible.so $(DESTDIR)$(PREFIX)/usr/lib/libcrucible.so + install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees install_scripts: ## Install scipts install_scripts: scripts - install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd - install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample - install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service + install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/usr/sbin/beesd + install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(PREFIX)/etc/bees/beesd.conf.sample + install -Dm644 scripts/beesd@.service $(DESTDIR)$(PREFIX)/lib/systemd/system/beesd@.service install: ## Install distribution install: install_bees install_scripts From fdd835023946770d94a29027922df03ac87fb998 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 23:05:43 +0100 Subject: [PATCH 19/24] Installation: Improve filesystem layout flexibility In preparation for Gentoo QA checks during ebuild merge phase, let's make some more of the filesystem layout adjustable. Signed-off-by: Kai Krakow --- Makefile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 899748c..363289c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,9 @@ PREFIX ?= / -LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees +LIBDIR ?= lib +USR_PREFIX ?= $(PREFIX)/usr +USRLIB_PREFIX ?= $(USR_PREFIX)/$(LIBDIR) +SYSTEMD_LIB_PREFIX ?= $(PREFIX)/lib/systemd +LIBEXEC_PREFIX ?= $(USRLIB_PREFIX)/bees MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null)) MARKDOWN ?= markdown @@ -36,14 +40,14 @@ README.html: README.md install_bees: ## Install bees + libs install_bees: lib src test - install -Dm644 lib/libcrucible.so $(DESTDIR)$(PREFIX)/usr/lib/libcrucible.so + install -Dm644 lib/libcrucible.so $(DESTDIR)$(USRLIB_PREFIX)/libcrucible.so install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees install_scripts: ## Install scipts install_scripts: scripts - install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/usr/sbin/beesd + install -Dm755 scripts/beesd $(DESTDIR)$(USR_PREFIX)/sbin/beesd install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(PREFIX)/etc/bees/beesd.conf.sample - install -Dm644 scripts/beesd@.service $(DESTDIR)$(PREFIX)/lib/systemd/system/beesd@.service + install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_LIB_PREFIX)/system/beesd@.service install: ## Install distribution install: install_bees install_scripts From 3391593cb960958d16a83d214d28cdd9aa1d4c7d Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Thu, 11 Jan 2018 01:22:45 +0100 Subject: [PATCH 20/24] Installation: Keep version tag in a variable To prepare soname handling, we need to keep the version tag in a variable. Signed-off-by: Kai Krakow --- lib/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 178128f..a2d607d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,3 +1,5 @@ +TAG := $(shell git describe --always --dirty || echo UNKNOWN) + default: libcrucible.so OBJS = \ @@ -22,7 +24,7 @@ depends.mk: *.cc mv -fv depends.mk.new depends.mk .version.cc: Makefile ../makeflags *.cc ../include/crucible/*.h - echo "namespace crucible { const char *VERSION = \"$(shell git describe --always --dirty || echo UNKNOWN)\"; }" > .version.new.cc + echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > .version.new.cc mv -f .version.new.cc .version.cc -include depends.mk From 3a24cd30106ee15d4bd8381d0e8291250488413b Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 10 Jan 2018 23:21:25 +0100 Subject: [PATCH 21/24] Installation: Fix soname QA warning in Gentoo Gentoo warns about libs missing a proper soname during QA phase. Let's fix this. Signed-off-by: Kai Krakow --- lib/Makefile | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index a2d607d..5aaf468 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -33,4 +33,4 @@ depends.mk: *.cc $(CXX) $(CXXFLAGS) -o $@ -c $< libcrucible.so: $(OBJS) Makefile - $(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -luuid + $(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid diff --git a/src/Makefile b/src/Makefile index d96fd96..3a69c9a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -8,7 +8,7 @@ all: $(PROGRAMS) depends.mk include ../makeflags LIBS = -lcrucible -lpthread -LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib) +LDFLAGS = -L../lib depends.mk: Makefile *.cc for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new From 634a1d0bf6732f6b98b3449a23ff11582fdbf52c Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Thu, 11 Jan 2018 01:55:26 +0100 Subject: [PATCH 22/24] Installation: -fPIC should not be used unconditionally According to Gentoo packaging guide, -fPIC should only be used on shared libraries, and not added unconditionally to every linker call. Signed-off-by: Kai Krakow --- lib/Makefile | 4 ++-- makeflags | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 5aaf468..54df19d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,7 +30,7 @@ depends.mk: *.cc -include depends.mk %.o: %.cc ../include/crucible/%.h - $(CXX) $(CXXFLAGS) -o $@ -c $< + $(CXX) $(CXXFLAGS) -fPIC -o $@ -c $< libcrucible.so: $(OBJS) Makefile - $(CXX) $(LDFLAGS) -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid + $(CXX) $(LDFLAGS) -fPIC -o $@ $(OBJS) -shared -Wl,-soname,$@ -luuid diff --git a/makeflags b/makeflags index f5983cb..de4a66f 100644 --- a/makeflags +++ b/makeflags @@ -1,4 +1,4 @@ -CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 +CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64 # CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 CFLAGS = $(CCFLAGS) -std=c99 CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast From 365a913a260434273f6b97d98715b312a960615e Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Thu, 11 Jan 2018 03:03:17 +0100 Subject: [PATCH 23/24] Installation: Add Gentoo ebuild This commit adds an ebuild for Gentoo. Version 9999 is building live from current git, currently using kakra:integration because it has some installation and build fixes important for Gentoo. Signed-off-by: Kai Krakow --- README.md | 13 +++++- contrib/gentoo/bees-9999.ebuild | 42 ++++++++++++++++++++ contrib/gentoo/files/v0.5-gentoo_build.patch | 20 ++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 contrib/gentoo/bees-9999.ebuild create mode 100644 contrib/gentoo/files/v0.5-gentoo_build.patch diff --git a/README.md b/README.md index 85f208c..40f3f49 100644 --- a/README.md +++ b/README.md @@ -330,10 +330,21 @@ Bees can be installed by following one these instructions: Arch package ------------ -Bees is availabe Arch Linux AUR. Install with: +Bees is availabe in Arch Linux AUR. Install with: `$ pacaur -S bees-git` +Gentoo ebuild +------------- + +Bees is available as a Gentoo ebuild. Just copy `bees-9999.ebuild` from +`contrib/gentoo` including the `files` subdirectory to your local +overlay category `sys-fs`. + +You can copy the ebuild to match a Bees version number, and it will +build that tagged version. It is partly supported since v0.5, +previous versions won't work. + Build from source ----------------- diff --git a/contrib/gentoo/bees-9999.ebuild b/contrib/gentoo/bees-9999.ebuild new file mode 100644 index 0000000..d39b895 --- /dev/null +++ b/contrib/gentoo/bees-9999.ebuild @@ -0,0 +1,42 @@ +# Copyright 1999-2018 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +EAPI=6 + +inherit git-r3 eutils multilib + +DESCRIPTION="Best-Effort Extent-Same, a btrfs dedup agent" +HOMEPAGE="https://github.com/Zygo/bees" + +if [[ ${PV} == "9999" ]] ; then + EGIT_REPO_URI="https://github.com/kakra/bees.git" + EGIT_BRANCH="integration" +else + IUSE="" + + SRC_URI="https://github.com/Zygo/bees/archive/v${PV}.tar.gz -> bees-${PV}.tar.gz" +fi + +PATCHES=" + ${FILESDIR}/v0.5-gentoo_build.patch +" + +LICENSE="GPL-3" +SLOT="0" +KEYWORDS="" +IUSE="" + +DEPEND=" + >=sys-apps/util-linux-2.30.2 + >=sys-devel/gcc-4.9 + >=sys-fs/btrfs-progs-4.1 +" +RDEPEND="${DEPEND}" + +DOCS="README.md COPYING" +HTML_DOCS="README.html" + +src_prepare() { + default + echo LIBDIR=$(get_libdir) >>${S}/localconf +} diff --git a/contrib/gentoo/files/v0.5-gentoo_build.patch b/contrib/gentoo/files/v0.5-gentoo_build.patch new file mode 100644 index 0000000..9f22cbc --- /dev/null +++ b/contrib/gentoo/files/v0.5-gentoo_build.patch @@ -0,0 +1,20 @@ +diff --git a/localconf b/localconf +new file mode 100644 +index 0000000..7705cbb +--- /dev/null ++++ b/localconf +@@ -0,0 +1,2 @@ ++PREFIX=/ ++LIBEXEC_PREFIX=/usr/libexec +diff --git a/makeflags b/makeflags +index f5983cb..0348623 100644 +--- a/makeflags ++++ b/makeflags +@@ -1,4 +1,3 @@ +-CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -D_FILE_OFFSET_BITS=64 +-# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64 +-CFLAGS = $(CCFLAGS) -std=c99 +-CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast ++CCFLAGS = -O3 -I../include -fpic -D_FILE_OFFSET_BITS=64 ++CFLAGS += $(CCFLAGS) -std=c99 ++CXXFLAGS += $(CCFLAGS) -std=c++11 -Wold-style-cast From 4aa5978a89e8dbf055749316d43568d6aef07414 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 10 Jan 2018 23:08:22 -0500 Subject: [PATCH 24/24] hash: reduce mutex contention using one mutex per hash table extent This avoids PERFORMANCE warnings when large hash tables are used on slow CPUs or with lots of worker threads. It also simplifies the code (no locksets, only one object-wide mutex instead of two). Fixed a few minor bugs along the way (e.g. we were not setting the dirty flag on the right hash table extent when we detected hash table errors). Simplified error handling: IO errors on the hash table are ignored, instead of throwing an exception into the function that tried to use the hash table. Signed-off-by: Zygo Blaxell --- src/bees-hash.cc | 223 ++++++++++++++++++++++++++++------------------- src/bees.h | 28 ++++-- 2 files changed, 151 insertions(+), 100 deletions(-) diff --git a/src/bees-hash.cc b/src/bees-hash.cc index 37b6f6c..7ba562f 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -24,14 +24,16 @@ operator<<(ostream &os, const BeesHashTable::Cell &bhte) << BeesAddress(bhte.e_addr) << " }"; } +#if 0 +static void -dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q) +dump_bucket_locked(BeesHashTable::Cell *p, BeesHashTable::Cell *q) { - // Must be called while holding m_bucket_mutex for (auto i = p; i < q; ++i) { BEESLOG("Entry " << i - p << " " << *i); } } +#endif const bool VERIFY_CLEARS_BUGS = false; @@ -91,52 +93,74 @@ BeesHashTable::get_extent_range(HashType hash) return make_pair(bp, ep); } +bool +BeesHashTable::flush_dirty_extent(uint64_t extent_index) +{ + BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents"); + + auto lock = lock_extent_by_index(extent_index); + + // Not dirty, nothing to do + if (!m_extent_metadata.at(extent_index).m_dirty) { + return false; + } + + bool wrote_extent = false; + + catch_all([&]() { + uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte; + uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte; + THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr); + THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end); + THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT); + BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); + // Copy the extent because we might be stuck writing for a while + vector extent_copy(dirty_extent, dirty_extent_end); + + // Mark extent non-dirty while we still hold the lock + m_extent_metadata.at(extent_index).m_dirty = false; + + // Release the lock + lock.unlock(); + + // Write the extent (or not) + pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); + BEESCOUNT(hash_extent_out); + + wrote_extent = true; + }); + + BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents"); + m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT); + return wrote_extent; +} + void BeesHashTable::flush_dirty_extents() { THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - unique_lock lock(m_extent_mutex); - auto dirty_extent_copy = m_buckets_dirty; - m_buckets_dirty.clear(); - if (dirty_extent_copy.empty()) { - BEESNOTE("idle"); - m_condvar.wait(lock); - return; // please call later, i.e. immediately + uint64_t wrote_extents = 0; + for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) { + if (flush_dirty_extent(extent_index)) { + ++wrote_extents; + } } - lock.unlock(); - size_t extent_counter = 0; - for (auto extent_number : dirty_extent_copy) { - ++extent_counter; - BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")"); - catch_all([&]() { - uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte; - uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte; - THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr); - THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end); - THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT); - BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); - // Page locks slow us down more than copying the data does - vector extent_copy(dirty_extent, dirty_extent_end); - pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); - BEESCOUNT(hash_extent_out); - }); - BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")"); - m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT); - } + BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents"); + unique_lock lock(m_dirty_mutex); + m_dirty_condvar.wait(lock); } void -BeesHashTable::set_extent_dirty(HashType hash) +BeesHashTable::set_extent_dirty_locked(uint64_t extent_index) { - THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - auto pr = get_extent_range(hash); - uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; - THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents); - unique_lock lock(m_extent_mutex); - m_buckets_dirty.insert(extent_number); - m_condvar.notify_one(); + // Must already be locked + m_extent_metadata.at(extent_index).m_dirty = true; + + // Signal writeback thread + unique_lock dirty_lock(m_dirty_mutex); + m_dirty_condvar.notify_one(); } void @@ -179,13 +203,13 @@ BeesHashTable::prefetch_loop() size_t unaligned_eof_count = 0; for (uint64_t ext = 0; ext < m_extents; ++ext) { - BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); + BEESNOTE("prefetching hash table extent " << ext << " of " << m_extents); catch_all([&]() { - fetch_missing_extent(ext * c_buckets_per_extent); + fetch_missing_extent_by_index(ext); - BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr); + BEESNOTE("analyzing hash table extent " << ext << " of " << m_extents); bool duplicate_bugs_found = false; - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_index(ext); for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) { if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) { duplicate_bugs_found = true; @@ -214,9 +238,8 @@ BeesHashTable::prefetch_loop() // Count these instead of calculating the number so we get better stats in case of exceptions occupied_count += this_bucket_occupied_count; } - lock.unlock(); if (duplicate_bugs_found) { - set_extent_dirty(ext); + set_extent_dirty_locked(ext); } }); } @@ -291,52 +314,70 @@ BeesHashTable::prefetch_loop() } } -void -BeesHashTable::fetch_missing_extent(HashType hash) +size_t +BeesHashTable::hash_to_extent_index(HashType hash) +{ + auto pr = get_extent_range(hash); + uint64_t extent_index = reinterpret_cast(pr.first) - m_extent_ptr; + THROW_CHECK2(runtime_error, extent_index, m_extents, extent_index < m_extents); + return extent_index; +} + +BeesHashTable::ExtentMetaData::ExtentMetaData() : + m_mutex_ptr(make_shared()) +{ +} + +unique_lock +BeesHashTable::lock_extent_by_index(uint64_t extent_index) +{ + THROW_CHECK2(out_of_range, extent_index, m_extents, extent_index < m_extents); + return unique_lock(*m_extent_metadata.at(extent_index).m_mutex_ptr); +} + +unique_lock +BeesHashTable::lock_extent_by_hash(HashType hash) { BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash)); - THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); - auto pr = get_extent_range(hash); - uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; - THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents); + return lock_extent_by_index(hash_to_extent_index(hash)); +} - unique_lock lock(m_extent_mutex); - if (!m_buckets_missing.count(extent_number)) { +void +BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index) +{ + BEESNOTE("checking hash extent #" << extent_index << " of " << m_extents << " extents"); + auto lock = lock_extent_by_index(extent_index); + if (!m_extent_metadata.at(extent_index).m_missing) { return; } - size_t missing_buckets = m_buckets_missing.size(); - lock.unlock(); - - BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); - - // Acquire blocking lock on this extent only - auto extent_lock = m_extent_lock_set.make_lock(extent_number); - - // Check missing again because someone else might have fetched this - // extent for us while we didn't hold any locks - lock.lock(); - if (!m_buckets_missing.count(extent_number)) { - BEESCOUNT(hash_extent_in_twice); - return; - } - lock.unlock(); - // OK we have to read this extent - BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); + BEESNOTE("fetching hash extent #" << extent_index << " of " << m_extents << " extents"); + BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents"); + BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents"); - BEESTRACE("Fetching missing hash extent " << extent_number); - uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte; - uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte; + uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte; + uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte; - { + // If the read fails don't retry, just go with whatever data we have + m_extent_metadata.at(extent_index).m_missing = false; + + catch_all([&]() { BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr); - } + }); + // Only count extents successfully read BEESCOUNT(hash_extent_in); - lock.lock(); - m_buckets_missing.erase(extent_number); +} + +void +BeesHashTable::fetch_missing_extent_by_hash(HashType hash) +{ + uint64_t extent_index = hash_to_extent_index(hash); + BEESNOTE("waiting to fetch hash extent #" << extent_index << " of " << m_extents << " extents"); + + fetch_missing_extent_by_index(extent_index); } bool @@ -358,10 +399,10 @@ BeesHashTable::find_cell(HashType hash) rv.push_back(toxic_cell); return rv; } - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("find_cell hash " << BeesHash(hash)); vector rv; - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); // FIXME: Weed out zero addresses in the table due to earlier bugs copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; }); @@ -377,9 +418,9 @@ BeesHashTable::find_cell(HashType hash) void BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -387,7 +428,7 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) if (found) { // Lookups on invalid addresses really hurt us. Kill it with fire! *ip = Cell(0, 0); - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); BEESCOUNT(hash_erase); #if 0 if (verify_cell_range(er.first, er.second)) { @@ -405,9 +446,9 @@ BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) bool BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr)); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -437,7 +478,7 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) // There is now a space at the front, insert there if different if (er.first[0] != mv) { er.first[0] = mv; - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); BEESCOUNT(hash_front); } #if 0 @@ -456,9 +497,9 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr) bool BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr) { - fetch_missing_extent(hash); + fetch_missing_extent_by_hash(hash); BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr)); - unique_lock lock(m_bucket_mutex); + auto lock = lock_extent_by_hash(hash); auto er = get_cell_range(hash); Cell mv(hash, addr); Cell *ip = find(er.first, er.second, mv); @@ -521,14 +562,14 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr) case_cond = 5; ret_dirty: BEESCOUNT(hash_insert); - set_extent_dirty(hash); + set_extent_dirty_locked(hash_to_extent_index(hash)); ret: #if 0 if (verify_cell_range(er.first, er.second, false)) { BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos << " ip " << (ip - er.first) << " " << mv); - // dump_bucket(saved.data(), saved.data() + saved.size()); - // dump_bucket(er.first, er.second); + // dump_bucket_locked(saved.data(), saved.data() + saved.size()); + // dump_bucket_locked(er.first, er.second); } #else (void)case_cond; @@ -652,9 +693,7 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t } } - for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) { - m_buckets_missing.insert(i); - } + m_extent_metadata.resize(m_extents); m_writeback_thread.exec([&]() { writeback_loop(); diff --git a/src/bees.h b/src/bees.h index 5155bc8..04af577 100644 --- a/src/bees.h +++ b/src/bees.h @@ -432,18 +432,24 @@ private: uint64_t m_buckets; uint64_t m_extents; uint64_t m_cells; - set m_buckets_dirty; - set m_buckets_missing; BeesThread m_writeback_thread; BeesThread m_prefetch_thread; RateLimiter m_flush_rate_limit; - mutex m_extent_mutex; - mutex m_bucket_mutex; - condition_variable m_condvar; set m_toxic_hashes; BeesStringFile m_stats_file; - LockSet m_extent_lock_set; + // Mutex/condvar for the writeback thread + mutex m_dirty_mutex; + condition_variable m_dirty_condvar; + + // Per-extent structures + struct ExtentMetaData { + shared_ptr m_mutex_ptr; // Access serializer + bool m_dirty = false; // Needs to be written back to disk + bool m_missing = true; // Needs to be read from disk + ExtentMetaData(); + }; + vector m_extent_metadata; void open_file(); void writeback_loop(); @@ -451,11 +457,17 @@ private: void try_mmap_flags(int flags); pair get_cell_range(HashType hash); pair get_extent_range(HashType hash); - void fetch_missing_extent(HashType hash); - void set_extent_dirty(HashType hash); + void fetch_missing_extent_by_hash(HashType hash); + void fetch_missing_extent_by_index(uint64_t extent_index); + void set_extent_dirty_locked(uint64_t extent_index); void flush_dirty_extents(); + bool flush_dirty_extent(uint64_t extent_index); bool is_toxic_hash(HashType h) const; + size_t hash_to_extent_index(HashType ht); + unique_lock lock_extent_by_hash(HashType ht); + unique_lock lock_extent_by_index(uint64_t extent_index); + BeesHashTable(const BeesHashTable &) = delete; BeesHashTable &operator=(const BeesHashTable &) = delete; };