mirror of
https://github.com/Zygo/bees.git
synced 2025-08-03 14:23:29 +02:00
Compare commits
57 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
9d295fab4e | ||
|
dc7360397e | ||
|
305ab5dbfa | ||
|
80e4302958 | ||
|
07751885d2 | ||
|
77614a0e99 | ||
|
649ae5bb40 | ||
|
40112faf0f | ||
|
bfb768a079 | ||
|
71514e7229 | ||
|
78d04b1417 | ||
|
47805253e6 | ||
|
629e33b4f3 | ||
|
58157d03dd | ||
|
9d67329ef7 | ||
|
c6be07e158 | ||
|
c6bf6bfe1d | ||
|
29d2d51c47 | ||
|
893595190f | ||
|
0455827989 | ||
|
62626aef7f | ||
|
f59e311809 | ||
|
3bf4e69c4d | ||
|
5622ebd411 | ||
|
04cb25bd04 | ||
|
06b8fd8697 | ||
|
94ab477b90 | ||
|
cceb0480a5 | ||
|
23749eb634 | ||
|
5afbcb99e3 | ||
|
5275249396 | ||
|
a07728bc7e | ||
|
732896b471 | ||
|
5cc5a44661 | ||
|
f6a6992ac9 | ||
|
ceda8ee6c3 | ||
|
18ae15658e | ||
|
339579096f | ||
|
702a8eec8c | ||
|
5f18fcda52 | ||
|
088cbd24ff | ||
|
8c9a44998d | ||
|
a5e2bdff47 | ||
|
703bb7c1a3 | ||
|
4f66d1cb44 | ||
|
3901962379 | ||
|
48aac8a99a | ||
|
b0ba4c4f38 | ||
|
74d256f0fe | ||
|
8cde833863 | ||
|
e0951ed4ba | ||
|
c479b361cd | ||
|
c6c3990d19 | ||
|
3fdc217b4f | ||
|
6c8d2bf428 | ||
|
d6f97edf4a | ||
|
312254a47b |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,3 +10,5 @@ html/
|
||||
latex/
|
||||
make.log
|
||||
make.log.new
|
||||
localconf
|
||||
scripts/beesd
|
||||
|
24
Makefile
24
Makefile
@@ -1,11 +1,18 @@
|
||||
PREFIX ?= /
|
||||
LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees
|
||||
|
||||
MARKDOWN := $(firstword $(shell which markdown markdown2 markdown_py 2>/dev/null))
|
||||
MARKDOWN ?= markdown
|
||||
|
||||
# allow local configuration to override above variables
|
||||
-include localconf
|
||||
|
||||
default all: lib src test README.html
|
||||
|
||||
clean: ## Cleanup
|
||||
git clean -dfx
|
||||
|
||||
.PHONY: lib src
|
||||
.PHONY: lib src test
|
||||
|
||||
lib: ## Build libs
|
||||
$(MAKE) -C lib
|
||||
@@ -18,17 +25,26 @@ test: ## Run tests
|
||||
test: lib src
|
||||
$(MAKE) -C test
|
||||
|
||||
scripts/beesd: scripts/beesd.in
|
||||
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
|
||||
|
||||
scripts/beesd@.service: scripts/beesd@.service.in
|
||||
sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@"
|
||||
|
||||
scripts: scripts/beesd scripts/beesd@.service
|
||||
|
||||
README.html: README.md
|
||||
markdown README.md > README.html.new
|
||||
$(MARKDOWN) README.md > README.html.new
|
||||
mv -f README.html.new README.html
|
||||
|
||||
install: ## Install bees + libs
|
||||
install: lib src test
|
||||
install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so
|
||||
install -Dm755 bin/bees $(PREFIX)/usr/bin/bees
|
||||
install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees
|
||||
|
||||
install_scripts: ## Install scipts
|
||||
install -Dm755 scripts/beesd $(PREFIX)/usr/bin/beesd
|
||||
install_scripts:
|
||||
install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd
|
||||
install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample
|
||||
install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service
|
||||
|
||||
|
137
README.md
137
README.md
@@ -107,9 +107,9 @@ fresh full-filesystem rescan, and restart `bees'.
|
||||
Things You Might Expect That Bees Doesn't Have
|
||||
----------------------------------------------
|
||||
|
||||
* There's no configuration file or getopt command line option processing
|
||||
(patches welcome!). There are some tunables hardcoded in the source
|
||||
that could eventually become configuration options.
|
||||
* There's no configuration file (patches welcome!). There are some tunables
|
||||
hardcoded in the source that could eventually become configuration options.
|
||||
There's also an incomplete option parser (patches welcome!).
|
||||
|
||||
* There's no way to *stop* the Bees daemon. Use SIGKILL, SIGTERM, or
|
||||
Ctrl-C for now. Some of the destructors are unreachable and have never
|
||||
@@ -134,11 +134,6 @@ performance by caching, but really fixing this requires rewriting the
|
||||
crawler to scan the btrfs extent tree directly instead of the subvol
|
||||
FS trees.
|
||||
|
||||
* Bees had support for multiple worker threads in the past; however,
|
||||
this was removed because it made Bees too aggressive to coexist with
|
||||
other applications on the same machine. It also hit the *slow backrefs*
|
||||
on N CPU cores instead of just one.
|
||||
|
||||
* Block reads are currently more allocation- and CPU-intensive than they
|
||||
should be, especially for filesystems on SSD where the IO overhead is
|
||||
much smaller. This is a problem for power-constrained environments
|
||||
@@ -171,6 +166,7 @@ Bees has been tested in combination with the following:
|
||||
* Large (>16M) extents
|
||||
* Huge files (>1TB--although Btrfs performance on such files isn't great in general)
|
||||
* filesystems up to 25T bytes, 100M+ files
|
||||
* btrfs read-only snapshots
|
||||
|
||||
Bad Btrfs Feature Interactions
|
||||
------------------------------
|
||||
@@ -179,14 +175,13 @@ Bees has not been tested with the following, and undesirable interactions may oc
|
||||
|
||||
* Non-4K filesystem data block size (should work if recompiled)
|
||||
* Non-equal hash (SUM) and filesystem data block (CLONE) sizes (probably never will work)
|
||||
* btrfs read-only snapshots (never tested, probably wouldn't work well)
|
||||
* btrfs send/receive (receive is probably OK, but send requires RO snapshots. See above)
|
||||
* btrfs send/receive (receive is probably OK, but send could be confused?)
|
||||
* btrfs qgroups (never tested, no idea what might happen)
|
||||
* btrfs seed filesystems (does anyone even use those?)
|
||||
* btrfs autodefrag mount option (never tested, could fight with Bees)
|
||||
* btrfs nodatacow mount option or inode attribute (*could* work, but might not)
|
||||
* btrfs nodatacow/nodatasum inode attribute or mount option (bees skips all nodatasum files)
|
||||
* btrfs out-of-tree kernel patches (e.g. in-band dedup or encryption)
|
||||
* btrfs-convert from ext2/3/4 (never tested)
|
||||
* btrfs-convert from ext2/3/4 (never tested, might run out of space or ignore significant portions of the filesystem due to sanity checks)
|
||||
* btrfs mixed block groups (don't know a reason why it would *not* work, but never tested)
|
||||
* open(O_DIRECT)
|
||||
* Filesystems mounted *without* the flushoncommit option
|
||||
@@ -194,7 +189,7 @@ Bees has not been tested with the following, and undesirable interactions may oc
|
||||
Other Caveats
|
||||
-------------
|
||||
|
||||
* btrfs balance will invalidate parts of the dedup table. Bees will
|
||||
* btrfs balance will invalidate parts of the dedup hash table. Bees will
|
||||
happily rebuild the table, but it will have to scan all the blocks
|
||||
again.
|
||||
|
||||
@@ -205,17 +200,35 @@ Other Caveats
|
||||
|
||||
* Bees creates temporary files (with O_TMPFILE) and uses them to split
|
||||
and combine extents elsewhere in btrfs. These will take up to 2GB
|
||||
during normal operation.
|
||||
of disk space per thread during normal operation.
|
||||
|
||||
* Like all deduplicators, Bees will replace data blocks with metadata
|
||||
references. It is a good idea to ensure there are several GB of
|
||||
unallocated space (see `btrfs fi df`) on the filesystem before running
|
||||
Bees for the first time. Use
|
||||
references. It is a good idea to ensure there is sufficient unallocated
|
||||
space (see `btrfs fi usage`) on the filesystem to allow the metadata
|
||||
to multiply in size by the number of snapshots before running Bees
|
||||
for the first time. Use
|
||||
|
||||
btrfs balance start -dusage=100,limit=1 /your/filesystem
|
||||
btrfs balance start -dusage=100,limit=N /your/filesystem
|
||||
|
||||
If possible, raise the `limit` parameter to the current size of metadata
|
||||
usage (from `btrfs fi df`) plus 1.
|
||||
where the `limit` parameter 'N' should be calculated as follows:
|
||||
|
||||
* start with the current size of metadata usage (from `btrfs fi
|
||||
df`) in GB, plus 1
|
||||
|
||||
* multiply by the proportion of disk space in subvols with
|
||||
snapshots (i.e. if there are no snapshots, multiply by 0;
|
||||
if all of the data is shared between at least one origin
|
||||
and one snapshot subvol, multiply by 1)
|
||||
|
||||
* multiply by the number of snapshots (i.e. if there is only
|
||||
one subvol, multiply by 0; if there are 3 snapshots and one
|
||||
origin subvol, multiply by 3)
|
||||
|
||||
`limit = GB_metadata * (disk_space_in_snapshots / total_disk_space) * number_of_snapshots`
|
||||
|
||||
Monitor unallocated space to ensure that the filesystem never runs out
|
||||
of metadata space (whether Bees is running or not--this is a general
|
||||
btrfs requirement).
|
||||
|
||||
|
||||
A Brief List Of Btrfs Kernel Bugs
|
||||
@@ -228,16 +241,29 @@ Missing features (usually not available in older LTS kernels):
|
||||
* 3.16: `SEARCH_V2` ioctl added. Bees could use `SEARCH` instead.
|
||||
* 4.2: `FILE_EXTENT_SAME` no longer updates mtime, can be used at EOF.
|
||||
|
||||
Future features (kernel features Bees does not yet use, but may rely on
|
||||
in the future):
|
||||
|
||||
* 4.14: `LOGICAL_INO_V2` allows userspace to create forward and backward
|
||||
reference maps to entire physical extents with a single ioctl call,
|
||||
and raises the limit of 2730 references per extent. Bees has not yet
|
||||
been rewritten to take full advantage of these features.
|
||||
|
||||
Bug fixes (sometimes included in older LTS kernels):
|
||||
|
||||
* Bugs fixed prior to 4.4.3 are not listed here.
|
||||
* 4.5: hang in the `INO_PATHS` ioctl used by Bees.
|
||||
* 4.5: use-after-free in the `FILE_EXTENT_SAME` ioctl used by Bees.
|
||||
* 4.6: lost inodes after a rename, crash, and log tree replay
|
||||
(triggered by the fsync() while writing `beescrawl.dat`).
|
||||
* 4.7: *slow backref* bug no longer triggers a softlockup panic. It still
|
||||
too long to resolve a block address to a root/inode/offset triple.
|
||||
* 4.10-rc1: reduced CPU time cost of the LOGICAL_INO ioctl and dedup
|
||||
takes too long to resolve a block address to a root/inode/offset triple.
|
||||
* 4.10: reduced CPU time cost of the LOGICAL_INO ioctl and dedup
|
||||
backref processing in general.
|
||||
* 4.11: yet another dedup deadlock case is fixed.
|
||||
* 4.14: backref performance improvements make LOGICAL_INO even faster.
|
||||
|
||||
Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees:
|
||||
Unfixed kernel bugs (as of 4.11.9) with workarounds in Bees:
|
||||
|
||||
* *slow backrefs* (aka toxic extents): If the number of references to a
|
||||
single shared extent within a single file grows above a few thousand,
|
||||
@@ -246,7 +272,7 @@ Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees:
|
||||
measuring the time the kernel spends performing certain operations
|
||||
and permanently blacklisting any extent or hash where the kernel
|
||||
starts to get slow. Inside Bees, such blocks are marked as 'toxic'
|
||||
hash/block addresses.
|
||||
hash/block addresses. *Needs to be retested after v4.14.*
|
||||
|
||||
* `LOGICAL_INO` output is arbitrarily limited to 2730 references
|
||||
even if more buffer space is provided for results. Once this number
|
||||
@@ -257,35 +283,29 @@ Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees:
|
||||
This places an obvious limit on dedup efficiency for extremely common
|
||||
blocks or filesystems with many snapshots (although this limit is
|
||||
far greater than the effective limit imposed by the *slow backref* bug).
|
||||
*Fixed in v4.14.*
|
||||
|
||||
* `LOGICAL_INO` on compressed extents returns a list of root/inode/offset
|
||||
tuples matching the extent bytenr of its argument. On uncompressed
|
||||
extents, any r/i/o tuple whose extent offset does not match the
|
||||
argument's extent offset is discarded, i.e. only the single 4K block
|
||||
matching the argument is returned, so a complete map of the extent
|
||||
references requires calling `LOGICAL_INO` for every single block of
|
||||
the extent. This is undesirable behavior for Bees, which wants a
|
||||
list of all extent refs referencing a data extent (i.e. Bees wants
|
||||
the compressed-extent behavior in all cases). *Fixed in v4.14.*
|
||||
|
||||
* `LOGICAL_INO` is only called from one thread at any time per process.
|
||||
This means at most one core is irretrievably stuck in this ioctl.
|
||||
|
||||
* `FILE_EXTENT_SAME` is arbitrarily limited to 16MB. This is less than
|
||||
128MB which is the maximum extent size that can be created by defrag
|
||||
or prealloc. Bees avoids feedback loops this can generate while
|
||||
attempting to replace extents over 16MB in length.
|
||||
|
||||
* `DEFRAG_RANGE` is useless. The ioctl attempts to implement `btrfs
|
||||
fi defrag` in the kernel, and will arbitrarily defragment more or
|
||||
less than the range requested to match the behavior expected from the
|
||||
userspace tool. Bees implements its own defrag instead, copying data
|
||||
to a temporary file and using the `FILE_EXTENT_SAME` ioctl to replace
|
||||
precisely the specified range of offending fragmented blocks.
|
||||
|
||||
* When writing BeesStringFile, a crash can cause the directory entry
|
||||
`beescrawl.dat.tmp` to exist without a corresponding inode.
|
||||
This directory entry cannot be renamed or removed; however, it does
|
||||
not prevent the creation of a second directory entry with the same
|
||||
name that functions normally, so it doesn't prevent Bees operation.
|
||||
|
||||
The orphan directory entry can be removed by deleting its subvol,
|
||||
so place BEESHOME on a separate subvol so you can delete these orphan
|
||||
directory entries when they occur (or use btrfs zero-log before mounting
|
||||
the filesystem after a crash). Alternatively, place BEESHOME on a
|
||||
non-btrfs filesystem.
|
||||
|
||||
* If the `fsync()` in `BeesTempFile::make_copy` is removed, the filesystem
|
||||
hangs within a few hours, requiring a reboot to recover. On the other
|
||||
hand, there may be net performance benefits to calling `fsync()` before
|
||||
or after each dedup. This needs further investigation.
|
||||
hand, the `fsync()` only costs about 8% of overall performance.
|
||||
|
||||
Not really a bug, but a gotcha nonetheless:
|
||||
|
||||
@@ -296,9 +316,26 @@ Not really a bug, but a gotcha nonetheless:
|
||||
children* until the FD is closed. Bees avoids this gotcha by closing
|
||||
all of the FDs in its directory FD cache every 15 minutes.
|
||||
|
||||
* If a file is deleted while Bees is caching an open FD to the file,
|
||||
Bees continues to scan the file. For very large files (e.g. VM
|
||||
images), the deletion of the file can be delayed indefinitely.
|
||||
To limit this delay, Bees closes all FDs in its file FD cache every
|
||||
15 minutes.
|
||||
|
||||
Build
|
||||
-----
|
||||
|
||||
Requirements
|
||||
Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`,
|
||||
which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH`
|
||||
on the target system respectively.
|
||||
|
||||
### Ubuntu 16.04 - 17.04:
|
||||
`$ apt -y install build-essential btrfs-tools uuid-dev markdown && make`
|
||||
|
||||
### Ubuntu 14.04:
|
||||
You can try to carry on the work done here: https://gist.github.com/dagelf/99ee07f5638b346adb8c058ab3d57492
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
* C++11 compiler (tested with GCC 4.9 and 6.2.0)
|
||||
@@ -321,14 +358,8 @@ Requirements
|
||||
Don't bother trying to make Bees work with older kernels.
|
||||
It won't end well.
|
||||
|
||||
Build
|
||||
-----
|
||||
* markdown
|
||||
|
||||
Build with `make`.
|
||||
|
||||
The build produces `bin/bees` and `lib/libcrucible.so`, which must be
|
||||
copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target
|
||||
system respectively.
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
@@ -124,7 +124,7 @@ namespace crucible {
|
||||
if (found == m_map.end()) {
|
||||
// No, release cache lock and acquire key lock
|
||||
lock.unlock();
|
||||
typename LockSet<Key>::Lock key_lock(m_lockset, k);
|
||||
auto key_lock = m_lockset.make_lock(k);
|
||||
|
||||
// Did item appear in cache while we were waiting for key?
|
||||
lock.lock();
|
||||
@@ -197,7 +197,7 @@ namespace crucible {
|
||||
if (found == m_map.end()) {
|
||||
// No, release cache lock and acquire key lock
|
||||
lock.unlock();
|
||||
typename LockSet<Key>::Lock key_lock(m_lockset, k);
|
||||
auto key_lock = m_lockset.make_lock(k);
|
||||
|
||||
// Did item appear in cache while we were waiting for key?
|
||||
lock.lock();
|
||||
|
@@ -45,6 +45,8 @@ namespace crucible {
|
||||
template <class T> Chatter &operator<<(const T& arg);
|
||||
|
||||
~Chatter();
|
||||
|
||||
static void enable_timestamp(bool prefix_timestamp);
|
||||
};
|
||||
|
||||
template <class Argument>
|
||||
|
@@ -100,12 +100,6 @@ namespace crucible {
|
||||
} while (0)
|
||||
|
||||
// macros for checking a constraint
|
||||
#define CHECK_CONSTRAINT(value, expr) do { \
|
||||
if (!(expr)) { \
|
||||
THROW_ERROR(out_of_range, #value << " = " << value << " failed constraint check (" << #expr << ")"); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define THROW_CHECK0(type, expr) do { \
|
||||
if (!(expr)) { \
|
||||
THROW_ERROR(type, "failed constraint check (" << #expr << ")"); \
|
||||
|
@@ -13,6 +13,10 @@
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
// ioctl
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
// socket
|
||||
#include <sys/socket.h>
|
||||
|
||||
@@ -141,6 +145,9 @@ namespace crucible {
|
||||
Stat &lstat(const string &filename);
|
||||
};
|
||||
|
||||
int ioctl_iflags_get(int fd);
|
||||
void ioctl_iflags_set(int fd, int attr);
|
||||
|
||||
string st_mode_ntoa(mode_t mode);
|
||||
|
||||
// Because it's not trivial to do correctly
|
||||
|
@@ -112,8 +112,8 @@ namespace crucible {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
BTRFS_COMPRESS_LZO = 2,
|
||||
BTRFS_COMPRESS_TYPES = 2,
|
||||
BTRFS_COMPRESS_LAST = 3,
|
||||
BTRFS_COMPRESS_ZSTD = 3,
|
||||
BTRFS_COMPRESS_TYPES = 3
|
||||
} btrfs_compression_type;
|
||||
|
||||
struct FiemapExtent : public fiemap_extent {
|
||||
|
@@ -2,14 +2,16 @@
|
||||
#define CRUCIBLE_LOCKSET_H
|
||||
|
||||
#include <crucible/error.h>
|
||||
#include <crucible/process.h>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
@@ -18,7 +20,7 @@ namespace crucible {
|
||||
class LockSet {
|
||||
|
||||
public:
|
||||
using set_type = set<T>;
|
||||
using set_type = map<T, pid_t>;
|
||||
using key_type = typename set_type::key_type;
|
||||
|
||||
private:
|
||||
@@ -31,6 +33,24 @@ namespace crucible {
|
||||
bool full();
|
||||
bool locked(const key_type &name);
|
||||
|
||||
class Lock {
|
||||
LockSet &m_lockset;
|
||||
key_type m_name;
|
||||
bool m_locked;
|
||||
|
||||
Lock() = delete;
|
||||
Lock(const Lock &) = delete;
|
||||
Lock& operator=(const Lock &) = delete;
|
||||
Lock(Lock &&that) = delete;
|
||||
Lock& operator=(Lock &&that) = delete;
|
||||
public:
|
||||
~Lock();
|
||||
Lock(LockSet &lockset, const key_type &name, bool start_locked = true);
|
||||
void lock();
|
||||
void unlock();
|
||||
bool try_lock();
|
||||
};
|
||||
|
||||
public:
|
||||
~LockSet();
|
||||
LockSet() = default;
|
||||
@@ -45,24 +65,18 @@ namespace crucible {
|
||||
|
||||
void max_size(size_t max);
|
||||
|
||||
class Lock {
|
||||
LockSet &m_lockset;
|
||||
key_type m_name;
|
||||
bool m_locked;
|
||||
class LockHandle {
|
||||
shared_ptr<Lock> m_lock;
|
||||
|
||||
Lock() = delete;
|
||||
Lock(const Lock &) = delete;
|
||||
Lock& operator=(const Lock &) = delete;
|
||||
public:
|
||||
~Lock();
|
||||
Lock(LockSet &lockset, const key_type &m_name, bool start_locked = true);
|
||||
Lock(Lock &&that);
|
||||
Lock& operator=(Lock &&that);
|
||||
void lock();
|
||||
void unlock();
|
||||
bool try_lock();
|
||||
LockHandle(LockSet &lockset, const key_type &name, bool start_locked = true) :
|
||||
m_lock(make_shared<Lock>(lockset, name, start_locked)) {}
|
||||
void lock() { m_lock->lock(); }
|
||||
void unlock() { m_lock->unlock(); }
|
||||
bool try_lock() { return m_lock->try_lock(); }
|
||||
};
|
||||
|
||||
LockHandle make_lock(const key_type &name, bool start_locked = true);
|
||||
};
|
||||
|
||||
template <class T>
|
||||
@@ -104,7 +118,7 @@ namespace crucible {
|
||||
while (full() || locked(name)) {
|
||||
m_condvar.wait(lock);
|
||||
}
|
||||
auto rv = m_set.insert(name);
|
||||
auto rv = m_set.insert(make_pair(name, gettid()));
|
||||
THROW_CHECK0(runtime_error, rv.second);
|
||||
}
|
||||
|
||||
@@ -116,7 +130,7 @@ namespace crucible {
|
||||
if (full() || locked(name)) {
|
||||
return false;
|
||||
}
|
||||
auto rv = m_set.insert(name);
|
||||
auto rv = m_set.insert(make_pair(name, gettid()));
|
||||
THROW_CHECK1(runtime_error, name, rv.second);
|
||||
return true;
|
||||
}
|
||||
@@ -214,26 +228,10 @@ namespace crucible {
|
||||
}
|
||||
|
||||
template <class T>
|
||||
LockSet<T>::Lock::Lock(Lock &&that) :
|
||||
m_lockset(that.lockset),
|
||||
m_name(that.m_name),
|
||||
m_locked(that.m_locked)
|
||||
typename LockSet<T>::LockHandle
|
||||
LockSet<T>::make_lock(const key_type &name, bool start_locked)
|
||||
{
|
||||
that.m_locked = false;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
typename LockSet<T>::Lock &
|
||||
LockSet<T>::Lock::operator=(Lock &&that)
|
||||
{
|
||||
THROW_CHECK2(invalid_argument, &m_lockset, &that.m_lockset, &m_lockset == &that.m_lockset);
|
||||
if (m_locked && that.m_name != m_name) {
|
||||
unlock();
|
||||
}
|
||||
m_name = that.m_name;
|
||||
m_locked = that.m_locked;
|
||||
that.m_locked = false;
|
||||
return *this;
|
||||
return LockHandle(*this, name, start_locked);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -17,6 +17,7 @@ namespace crucible {
|
||||
|
||||
static shared_ptr<set<string>> chatter_names;
|
||||
static const char *SPACETAB = " \t";
|
||||
static bool add_prefix_timestamp = true;
|
||||
|
||||
static
|
||||
void
|
||||
@@ -48,20 +49,31 @@ namespace crucible {
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
Chatter::enable_timestamp(bool prefix_timestamp)
|
||||
{
|
||||
add_prefix_timestamp = prefix_timestamp;
|
||||
}
|
||||
|
||||
Chatter::~Chatter()
|
||||
{
|
||||
ostringstream header_stream;
|
||||
|
||||
time_t ltime;
|
||||
DIE_IF_MINUS_ONE(time(<ime));
|
||||
struct tm ltm;
|
||||
DIE_IF_ZERO(localtime_r(<ime, <m));
|
||||
if (add_prefix_timestamp) {
|
||||
time_t ltime;
|
||||
DIE_IF_MINUS_ONE(time(<ime));
|
||||
struct tm ltm;
|
||||
DIE_IF_ZERO(localtime_r(<ime, <m));
|
||||
|
||||
char buf[1024];
|
||||
DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", <m));
|
||||
char buf[1024];
|
||||
DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", <m));
|
||||
|
||||
header_stream << buf;
|
||||
header_stream << " " << getpid() << "." << gettid();
|
||||
} else {
|
||||
header_stream << "tid " << gettid();
|
||||
}
|
||||
|
||||
header_stream << buf;
|
||||
header_stream << " " << getpid() << "." << gettid();
|
||||
if (!m_name.empty()) {
|
||||
header_stream << " " << m_name;
|
||||
}
|
||||
|
14
lib/fd.cc
14
lib/fd.cc
@@ -488,6 +488,20 @@ namespace crucible {
|
||||
lstat(filename);
|
||||
}
|
||||
|
||||
int
|
||||
ioctl_iflags_get(int fd)
|
||||
{
|
||||
int attr = 0;
|
||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_GETFLAGS, &attr));
|
||||
return attr;
|
||||
}
|
||||
|
||||
void
|
||||
ioctl_iflags_set(int fd, int attr)
|
||||
{
|
||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &attr));
|
||||
}
|
||||
|
||||
string
|
||||
readlink_or_die(const string &path)
|
||||
{
|
||||
|
17
lib/fs.cc
17
lib/fs.cc
@@ -468,6 +468,7 @@ namespace crucible {
|
||||
static const bits_ntoa_table table[] = {
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZLIB),
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_LZO),
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZSTD),
|
||||
NTOA_TABLE_ENTRY_END()
|
||||
};
|
||||
return bits_ntoa(compress_type, table);
|
||||
@@ -625,7 +626,7 @@ namespace crucible {
|
||||
void
|
||||
Fiemap::do_ioctl(int fd)
|
||||
{
|
||||
CHECK_CONSTRAINT(m_min_count, m_min_count <= m_max_count);
|
||||
THROW_CHECK1(out_of_range, m_min_count, m_min_count <= m_max_count);
|
||||
|
||||
auto extent_count = m_min_count;
|
||||
vector<char> ioctl_arg = vector_copy_struct<fiemap>(this);
|
||||
@@ -716,10 +717,20 @@ namespace crucible {
|
||||
bool
|
||||
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
||||
{
|
||||
vector<char> ioctl_arg = vector_copy_struct<btrfs_ioctl_search_key>(this);
|
||||
// Normally we like to be paranoid and fill empty bytes with zero,
|
||||
// but these buffers can be huge. 80% of a 4GHz CPU huge.
|
||||
ioctl_arg.resize(sizeof(btrfs_ioctl_search_args_v2) + m_buf_size);
|
||||
|
||||
// Keep the ioctl buffer from one run to the next to save on malloc costs
|
||||
size_t target_buf_size = sizeof(btrfs_ioctl_search_args_v2) + m_buf_size;
|
||||
|
||||
thread_local vector<char> ioctl_arg;
|
||||
if (ioctl_arg.size() < m_buf_size) {
|
||||
ioctl_arg = vector_copy_struct<btrfs_ioctl_search_key>(this);
|
||||
ioctl_arg.resize(target_buf_size);
|
||||
} else {
|
||||
memcpy(ioctl_arg.data(), static_cast<btrfs_ioctl_search_key*>(this), sizeof(btrfs_ioctl_search_key));
|
||||
}
|
||||
|
||||
btrfs_ioctl_search_args_v2 *ioctl_ptr = reinterpret_cast<btrfs_ioctl_search_args_v2 *>(ioctl_arg.data());
|
||||
|
||||
ioctl_ptr->buf_size = m_buf_size;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
CCFLAGS = -Wall -Wextra -Werror -O3 -march=native -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
||||
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic
|
||||
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic -D_FILE_OFFSET_BITS=64
|
||||
CFLAGS = $(CCFLAGS) -std=c99
|
||||
CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast
|
||||
|
@@ -2,8 +2,11 @@
|
||||
## https://github.com/Zygo/bees
|
||||
## It's a default values, change it, if needed
|
||||
|
||||
# How to use?
|
||||
# Copy this file to a new file name and adjust the UUID below
|
||||
|
||||
# Which FS will be used
|
||||
UUID=5d3c0ad5-bedf-463d-8235-b4d4f6f99476
|
||||
UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
|
||||
## System Vars
|
||||
# Change carefully
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
# /usr/bin/beesd
|
||||
|
||||
## Helpful functions
|
||||
INFO(){ echo "INFO:" "$@"; }
|
||||
@@ -13,18 +12,34 @@ export CONFIG_FILE
|
||||
export UUID AL16M
|
||||
|
||||
readonly AL16M="$((16*1024*1024))"
|
||||
readonly CONFIG_DIR=/etc/bees/
|
||||
readonly CONFIG_DIR=@PREFIX@/etc/bees/
|
||||
|
||||
## Pre checks
|
||||
{
|
||||
[ ! -d "$CONFIG_DIR" ] && ERRO "Missing: $CONFIG_DIR"
|
||||
[ "$UID" == "0" ] || ERRO "Must be runned as root"
|
||||
[ "$UID" == "0" ] || ERRO "Must be run as root"
|
||||
}
|
||||
|
||||
command -v bees &> /dev/null || ERRO "Missing 'bees' command"
|
||||
command -v @LIBEXEC_PREFIX@/bees &> /dev/null || ERRO "Missing 'bees' agent"
|
||||
|
||||
## Parse args
|
||||
UUID="$1"
|
||||
ARGUMENTS=()
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
-*)
|
||||
ARGUMENTS+=($1)
|
||||
;;
|
||||
*)
|
||||
if [ -z "$UUID" ]; then
|
||||
UUID="$1"
|
||||
else
|
||||
ERRO "Only one filesystem may be supplied"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
case "$UUID" in
|
||||
*-*-*-*-*)
|
||||
FILE_CONFIG=""
|
||||
@@ -39,7 +54,7 @@ case "$UUID" in
|
||||
source "$FILE_CONFIG"
|
||||
;;
|
||||
*)
|
||||
echo "beesd <btrfs_uuid>"
|
||||
echo "beesd [options] <btrfs_uuid>"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -50,17 +65,16 @@ BEESHOME="${BEESHOME:-$MNT_DIR/.beeshome}"
|
||||
BEESSTATUS="${BEESSTATUS:-$WORK_DIR/$UUID.status}"
|
||||
DB_SIZE="${DB_SIZE:-$((64*AL16M))}"
|
||||
LOG_SHORT_PATH="${LOG_SHORT_PATH:-N}"
|
||||
LOG_FILTER_TIME="${LOG_FILTER_TIME:-N}"
|
||||
|
||||
INFO "Check: Disk exists"
|
||||
if [ ! -b "/dev/disk/by-uuid/$UUID" ]; then
|
||||
ERRO "Missing disk: /dev/disk/by-uuid/$UUID"
|
||||
fi
|
||||
|
||||
it_btrfs(){ [ "$(blkid -s TYPE -o value "$1")" == "btrfs" ]; }
|
||||
is_btrfs(){ [ "$(blkid -s TYPE -o value "$1")" == "btrfs" ]; }
|
||||
|
||||
INFO "Check: Disk with btrfs"
|
||||
if ! it_btrfs "/dev/disk/by-uuid/$UUID"; then
|
||||
if ! is_btrfs "/dev/disk/by-uuid/$UUID"; then
|
||||
ERRO "Disk not contain btrfs: /dev/disk/by-uuid/$UUID"
|
||||
fi
|
||||
|
||||
@@ -102,19 +116,6 @@ fi
|
||||
|
||||
MNT_DIR="${MNT_DIR//\/\//\/}"
|
||||
|
||||
filter_time(){
|
||||
if YN $LOG_FILTER_TIME; then
|
||||
sed -e 's/^.*crawl:/crawl:/g' \
|
||||
-e 's/^.*status:/status:/g' \
|
||||
-e 's/^.*bees:/bees:/g' \
|
||||
-e 's/^.*crawl_writeback:/crawl_writeback:/g' \
|
||||
-e 's/^.*main:/main:/g' \
|
||||
-e 's/^.*hash_prefetch:/hash_prefetch:/g'
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
filter_path(){
|
||||
if YN $LOG_SHORT_PATH; then
|
||||
sed -e "s#$MNT_DIR##g"
|
||||
@@ -123,6 +124,6 @@ filter_path(){
|
||||
fi
|
||||
}
|
||||
|
||||
bees "$MNT_DIR" 3>&1 2>&1 | filter_time | filter_path
|
||||
@LIBEXEC_PREFIX@/bees ${ARGUMENTS[@]} $OPTIONS "$MNT_DIR" 3>&1 2>&1 | filter_path
|
||||
|
||||
exit 0
|
@@ -1,14 +0,0 @@
|
||||
[Unit]
|
||||
Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i
|
||||
After=local-fs.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/beesd %i
|
||||
Nice=19
|
||||
IOSchedulingClass=idle
|
||||
CPUAccounting=true
|
||||
MemoryAccounting=true
|
||||
# CPUQuota=95%
|
||||
|
||||
[Install]
|
||||
WantedBy=local-fs.target
|
24
scripts/beesd@.service.in
Normal file
24
scripts/beesd@.service.in
Normal file
@@ -0,0 +1,24 @@
|
||||
[Unit]
|
||||
Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i
|
||||
After=local-fs.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/sbin/beesd %i
|
||||
Nice=19
|
||||
KillMode=control-group
|
||||
KillSignal=SIGTERM
|
||||
CPUShares=128
|
||||
StartupCPUShares=256
|
||||
BlockIOWeight=100
|
||||
StartupBlockIOWeight=250
|
||||
IOSchedulingClass=idle
|
||||
IOSchedulingPriority=7
|
||||
CPUSchedulingPolicy=batch
|
||||
Nice=19
|
||||
Restart=on-abnormal
|
||||
CPUAccounting=true
|
||||
MemoryAccounting=true
|
||||
|
||||
[Install]
|
||||
WantedBy=local-fs.target
|
@@ -35,6 +35,7 @@ BeesFdCache::BeesFdCache()
|
||||
BEESCOUNTADD(open_ino_ms, open_timer.age() * 1000);
|
||||
return rv;
|
||||
});
|
||||
m_file_cache.max_size(BEES_FD_CACHE_SIZE);
|
||||
}
|
||||
|
||||
Fd
|
||||
@@ -71,97 +72,6 @@ BeesFdCache::insert_root_ino(shared_ptr<BeesContext> ctx, Fd fd)
|
||||
return m_file_cache.insert(fd, ctx, fid.root(), fid.ino());
|
||||
}
|
||||
|
||||
mutex BeesWorkQueueBase::s_mutex;
|
||||
set<BeesWorkQueueBase*> BeesWorkQueueBase::s_all_workers;
|
||||
|
||||
BeesWorkQueueBase::BeesWorkQueueBase(const string &name) :
|
||||
m_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
BeesWorkQueueBase::~BeesWorkQueueBase()
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
s_all_workers.erase(this);
|
||||
}
|
||||
|
||||
void
|
||||
BeesWorkQueueBase::for_each_work_queue(std::function<void (BeesWorkQueueBase*)> f)
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
for (auto i : s_all_workers) {
|
||||
f(i);
|
||||
}
|
||||
}
|
||||
|
||||
string
|
||||
BeesWorkQueueBase::name() const
|
||||
{
|
||||
return m_name;
|
||||
}
|
||||
|
||||
void
|
||||
BeesWorkQueueBase::name(const string &new_name)
|
||||
{
|
||||
m_name = new_name;
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
BeesWorkQueue<Task>::~BeesWorkQueue()
|
||||
{
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
BeesWorkQueue<Task>::BeesWorkQueue(const string &name) :
|
||||
BeesWorkQueueBase(name)
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
s_all_workers.insert(this);
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
void
|
||||
BeesWorkQueue<Task>::push_active(const Task &t)
|
||||
{
|
||||
BEESNOTE("pushing task " << t);
|
||||
m_active_queue.push(t);
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
void
|
||||
BeesWorkQueue<Task>::push_active(const Task &t, size_t limit)
|
||||
{
|
||||
// BEESNOTE("pushing limit " << limit << " task " << t);
|
||||
m_active_queue.push_wait(t, limit);
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
size_t
|
||||
BeesWorkQueue<Task>::active_size() const
|
||||
{
|
||||
return m_active_queue.size();
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
list<string>
|
||||
BeesWorkQueue<Task>::peek_active(size_t count) const
|
||||
{
|
||||
list<string> rv;
|
||||
for (auto i : m_active_queue.peek(count)) {
|
||||
ostringstream oss;
|
||||
oss << i;
|
||||
rv.push_back(oss.str());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class Task>
|
||||
Task
|
||||
BeesWorkQueue<Task>::pop()
|
||||
{
|
||||
return m_active_queue.pop();
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::dump_status()
|
||||
{
|
||||
@@ -188,12 +98,6 @@ BeesContext::dump_status()
|
||||
ofs << "\ttid " << t.first << ": " << t.second << "\n";
|
||||
}
|
||||
|
||||
BeesWorkQueueBase::for_each_work_queue([&](BeesWorkQueueBase *worker) {
|
||||
ofs << "QUEUE: " << worker->name() << " active: " << worker->active_size() << "\n";
|
||||
for (auto t : worker->peek_active(10)) {
|
||||
ofs << "\t" << t << "\n";
|
||||
}
|
||||
});
|
||||
ofs.close();
|
||||
|
||||
BEESNOTE("renaming status file '" << status_file << "'");
|
||||
@@ -229,10 +133,6 @@ BeesContext::show_progress()
|
||||
};
|
||||
lastProgressStats = thisStats;
|
||||
|
||||
BeesWorkQueueBase::for_each_work_queue([&](BeesWorkQueueBase *worker) {
|
||||
BEESLOG("QUEUE: " << worker->name() << " active: " << worker->active_size());
|
||||
});
|
||||
|
||||
BEESLOG("THREADS:");
|
||||
|
||||
for (auto t : BeesNote::get_status()) {
|
||||
@@ -281,29 +181,16 @@ BeesContext::dedup(const BeesRangePair &brp)
|
||||
|
||||
BEESTOOLONG("dedup " << brp);
|
||||
|
||||
thread_local BeesFileId tl_first_fid, tl_second_fid;
|
||||
if (tl_first_fid != brp.first.fid()) {
|
||||
BEESLOG("dedup: src " << name_fd(brp.first.fd()));
|
||||
tl_first_fid = brp.first.fid();
|
||||
tl_second_fid = BeesFileId();
|
||||
}
|
||||
ostringstream dst_line;
|
||||
dst_line << " dst " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "]";
|
||||
if (brp.first.begin() != brp.second.begin()) {
|
||||
dst_line << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "]";
|
||||
}
|
||||
BeesAddress first_addr(brp.first.fd(), brp.first.begin());
|
||||
BeesAddress second_addr(brp.second.fd(), brp.second.begin());
|
||||
dst_line << " (" << first_addr << "->" << second_addr << ")";
|
||||
|
||||
BEESLOG("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()));
|
||||
BEESLOG(" dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||
|
||||
if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) {
|
||||
BEESLOGTRACE("equal physical addresses in dedup");
|
||||
BEESCOUNT(bug_dedup_same_physical);
|
||||
}
|
||||
if (tl_second_fid != brp.second.fid()) {
|
||||
dst_line << " " << name_fd(brp.second.fd());
|
||||
tl_second_fid = brp.second.fid();
|
||||
}
|
||||
BEESLOG(dst_line.str());
|
||||
|
||||
THROW_CHECK1(invalid_argument, brp, !brp.first.overlaps(brp.second));
|
||||
THROW_CHECK1(invalid_argument, brp, brp.first.size() == brp.second.size());
|
||||
@@ -348,6 +235,7 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
||||
// BEESLOG("\torig_bbd " << orig_bbd);
|
||||
BeesBlockData dup_bbd(dup_brp.first.fd(), dup_brp.first.begin(), min(BLOCK_SIZE_SUMS, dup_brp.first.size()));
|
||||
// BEESLOG("BeesResolver br(..., " << bfr << ")");
|
||||
BEESTRACE("BeesContext::rewrite_file_range calling BeesResolver " << bfr);
|
||||
BeesResolver br(m_ctx, BeesAddress(bfr.fd(), bfr.begin()));
|
||||
// BEESLOG("\treplace_src " << dup_bbd);
|
||||
br.replace_src(dup_bbd);
|
||||
@@ -543,6 +431,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
if (found_addr.is_toxic()) {
|
||||
BEESINFO("WORKAROUND: abandoned toxic match for hash " << hash << " addr " << found_addr);
|
||||
// Don't push these back in because we'll never delete them.
|
||||
// Extents may become non-toxic so give them a chance to expire.
|
||||
// hash_table->push_front_hash_addr(hash, found_addr);
|
||||
BEESCOUNT(scan_toxic_hash);
|
||||
return bfr;
|
||||
@@ -553,17 +442,16 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
catch_all([&]() {
|
||||
BEESNOTE("resolving " << found_addr << " matched " << bbd);
|
||||
BEESTRACE("resolving " << found_addr << " matched " << bbd);
|
||||
BEESTRACE("BeesContext::scan_one_extent calling BeesResolver " << found_addr);
|
||||
BeesResolver resolved(m_ctx, found_addr);
|
||||
// Toxic extents are really toxic
|
||||
if (resolved.is_toxic()) {
|
||||
BEESINFO("WORKAROUND: abandoned toxic match at found_addr " << found_addr << " matching bbd " << bbd);
|
||||
BEESCOUNT(scan_toxic_match);
|
||||
#if 0
|
||||
// Don't push these back in because we'll never delete them.
|
||||
// Make sure we never see this hash again
|
||||
// Make sure we never see this hash again.
|
||||
// It has become toxic since it was inserted into the hash table.
|
||||
found_addr.set_toxic();
|
||||
hash_table->push_front_hash_addr(hash, found_addr);
|
||||
#endif
|
||||
abandon_extent = true;
|
||||
} else if (!resolved.count()) {
|
||||
BEESCOUNT(scan_resolve_zero);
|
||||
@@ -785,13 +673,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
|
||||
// Visualize
|
||||
if (bar != string(block_count, '.')) {
|
||||
thread_local BeesFileId last_fid;
|
||||
string file_name;
|
||||
if (bfr.fid() != last_fid) {
|
||||
last_fid = bfr.fid();
|
||||
file_name = " " + name_fd(bfr.fd());
|
||||
}
|
||||
BEESLOG("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << file_name);
|
||||
BEESLOG("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
||||
}
|
||||
|
||||
return bfr;
|
||||
@@ -1028,8 +910,3 @@ BeesContext::insert_root_ino(Fd fd)
|
||||
{
|
||||
fd_cache()->insert_root_ino(shared_from_this(), fd);
|
||||
}
|
||||
|
||||
// instantiate templates for linkage ----------------------------------------
|
||||
|
||||
template class BeesWorkQueue<BeesFileRange>;
|
||||
template class BeesWorkQueue<BeesRangePair>;
|
||||
|
@@ -311,7 +311,7 @@ BeesHashTable::fetch_missing_extent(HashType hash)
|
||||
BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
|
||||
|
||||
// Acquire blocking lock on this extent only
|
||||
LockSet<uint64_t>::Lock extent_lock(m_extent_lock_set, extent_number);
|
||||
auto extent_lock = m_extent_lock_set.make_lock(extent_number);
|
||||
|
||||
// Check missing again because someone else might have fetched this
|
||||
// extent for us while we didn't hold any locks
|
||||
|
@@ -174,9 +174,9 @@ BeesRoots::transid_min()
|
||||
uint64_t
|
||||
BeesRoots::transid_max()
|
||||
{
|
||||
BEESNOTE("Calculating transid_max");
|
||||
uint64_t rv = 0;
|
||||
uint64_t root = 0;
|
||||
BEESNOTE("Calculating transid_max (" << rv << " as of root " << root << ")");
|
||||
BEESTRACE("Calculating transid_max...");
|
||||
do {
|
||||
root = next_root(root);
|
||||
@@ -397,7 +397,6 @@ Fd
|
||||
BeesRoots::open_root_nocache(uint64_t rootid)
|
||||
{
|
||||
BEESTRACE("open_root_nocache " << rootid);
|
||||
BEESNOTE("open_root_nocache " << rootid);
|
||||
|
||||
// Stop recursion at the root of the filesystem tree
|
||||
if (rootid == BTRFS_FS_TREE_OBJECTID) {
|
||||
@@ -594,6 +593,27 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
|
||||
break;
|
||||
}
|
||||
|
||||
// As of 4.12 the kernel rejects dedup requests with
|
||||
// src and dst that have different datasum flags.
|
||||
//
|
||||
// We can't detect those from userspace reliably, but
|
||||
// we can detect the common case where one file is
|
||||
// marked with the nodatasum (which implies nodatacow)
|
||||
// on a filesystem that is mounted with datacow.
|
||||
// These are arguably out of scope for dedup.
|
||||
//
|
||||
// To fix this properly, we have to keep track of which
|
||||
// pairs of inodes failed to dedup, guess that the reason
|
||||
// for failure was a mismatch of datasum flags, and
|
||||
// create temporary files with the right flags somehow.
|
||||
int attr = ioctl_iflags_get(rv);
|
||||
if (attr & FS_NOCOW_FL) {
|
||||
BEESLOG("Opening " << name_fd(rv) << " found FS_NOCOW_FL flag in " << to_hex(attr));
|
||||
rv = Fd();
|
||||
BEESCOUNT(open_wrong_flags);
|
||||
break;
|
||||
}
|
||||
|
||||
BEESTRACE("mapped " << BeesFileId(root, ino));
|
||||
BEESTRACE("\tto " << name_fd(rv));
|
||||
BEESCOUNT(open_hit);
|
||||
|
@@ -71,7 +71,18 @@ operator<<(ostream &os, const BeesFileRange &bfr)
|
||||
if (bfr.end() == numeric_limits<off_t>::max()) {
|
||||
os << "- [" << to_hex(bfr.begin()) << "..eof]";
|
||||
} else {
|
||||
os << pretty(bfr.size()) << " [" << to_hex(bfr.begin()) << ".." << to_hex(bfr.end()) << "]";
|
||||
os << pretty(bfr.size()) << " ";
|
||||
if (bfr.begin() != 0) {
|
||||
os << "[" << to_hex(bfr.begin());
|
||||
} else {
|
||||
os << "(";
|
||||
}
|
||||
os << ".." << to_hex(bfr.end());
|
||||
if (!!bfr.m_fd && bfr.end() >= bfr.file_size()) {
|
||||
os << ")";
|
||||
} else {
|
||||
os << "]";
|
||||
}
|
||||
}
|
||||
if (bfr.m_fid) {
|
||||
os << " fid = " << bfr.m_fid;
|
||||
@@ -92,8 +103,6 @@ operator<<(ostream &os, const BeesRangePair &brp)
|
||||
<< "\ndst = " << brp.second.fd() << " " << name_fd(brp.second.fd());
|
||||
}
|
||||
|
||||
mutex BeesFileRange::s_mutex;
|
||||
|
||||
bool
|
||||
BeesFileRange::operator<(const BeesFileRange &that) const
|
||||
{
|
||||
@@ -145,7 +154,6 @@ off_t
|
||||
BeesFileRange::file_size() const
|
||||
{
|
||||
if (m_file_size <= 0) {
|
||||
// Use method fd() not member m_fd() so we hold lock
|
||||
Stat st(fd());
|
||||
m_file_size = st.st_size;
|
||||
// These checks could trigger on valid input, but that would mean we have
|
||||
@@ -178,31 +186,21 @@ BeesFileRange::grow_begin(off_t delta)
|
||||
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
|
||||
m_fd(bbd.fd()),
|
||||
m_begin(bbd.begin()),
|
||||
m_end(bbd.end()),
|
||||
m_file_size(-1)
|
||||
m_end(bbd.end())
|
||||
{
|
||||
}
|
||||
|
||||
BeesFileRange::BeesFileRange(Fd fd, off_t begin, off_t end) :
|
||||
m_fd(fd),
|
||||
m_begin(begin),
|
||||
m_end(end),
|
||||
m_file_size(-1)
|
||||
m_end(end)
|
||||
{
|
||||
}
|
||||
|
||||
BeesFileRange::BeesFileRange(const BeesFileId &fid, off_t begin, off_t end) :
|
||||
m_fid(fid),
|
||||
m_begin(begin),
|
||||
m_end(end),
|
||||
m_file_size(-1)
|
||||
{
|
||||
}
|
||||
|
||||
BeesFileRange::BeesFileRange() :
|
||||
m_begin(0),
|
||||
m_end(0),
|
||||
m_file_size(-1)
|
||||
m_end(end)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -285,23 +283,18 @@ BeesFileRange::operator BeesBlockData() const
|
||||
Fd
|
||||
BeesFileRange::fd() const
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
auto rv = m_fd;
|
||||
return rv;
|
||||
return m_fd;
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
// If we don't have a fid we can't do much here
|
||||
if (m_fid) {
|
||||
if (!m_fd) {
|
||||
// If we don't have a fd, open by fid
|
||||
if (m_fid && ctx) {
|
||||
lock.unlock();
|
||||
Fd new_fd = ctx->roots()->open_root_ino(m_fid);
|
||||
lock.lock();
|
||||
m_fd = new_fd;
|
||||
}
|
||||
} else {
|
||||
@@ -311,8 +304,7 @@ BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
|
||||
}
|
||||
}
|
||||
// We either had a fid and opened it, or we didn't and we're just stuck with our fd
|
||||
auto rv = m_fd;
|
||||
return rv;
|
||||
return m_fd;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
@@ -938,6 +930,7 @@ BeesBlockData::data() const
|
||||
{
|
||||
if (m_data.empty()) {
|
||||
THROW_CHECK1(invalid_argument, size(), size() > 0);
|
||||
BEESNOTE("Reading BeesBlockData " << *this);
|
||||
BEESTOOLONG("Reading BeesBlockData " << *this);
|
||||
Timer read_timer;
|
||||
|
||||
|
85
src/bees.cc
85
src/bees.cc
@@ -19,18 +19,25 @@
|
||||
#include <linux/fs.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
do_cmd_help(const char **argv)
|
||||
do_cmd_help(char *argv[])
|
||||
{
|
||||
cerr << "Usage: " << argv[0] << " fs-root-path [fs-root-path-2...]\n"
|
||||
cerr << "Usage: " << argv[0] << " [options] fs-root-path [fs-root-path-2...]\n"
|
||||
"Performs best-effort extent-same deduplication on btrfs.\n"
|
||||
"\n"
|
||||
"fs-root-path MUST be the root of a btrfs filesystem tree (id 5).\n"
|
||||
"Other directories will be rejected.\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
"\t-h, --help\t\tShow this help\n"
|
||||
"\t-t, --timestamps\tShow timestamps in log output (default)\n"
|
||||
"\t-T, --notimestamps\tOmit timestamps in log output\n"
|
||||
"\n"
|
||||
"Optional environment variables:\n"
|
||||
"\tBEESHOME\tPath to hash table and configuration files\n"
|
||||
"\t\t\t(default is .beeshome/ in the root of each filesystem).\n"
|
||||
@@ -51,7 +58,13 @@ thread_local BeesTracer *BeesTracer::tl_next_tracer = nullptr;
|
||||
BeesTracer::~BeesTracer()
|
||||
{
|
||||
if (uncaught_exception()) {
|
||||
m_func();
|
||||
try {
|
||||
m_func();
|
||||
} catch (exception &e) {
|
||||
BEESLOG("Nested exception: " << e.what());
|
||||
} catch (...) {
|
||||
BEESLOG("Nested exception ...");
|
||||
}
|
||||
if (!m_next_tracer) {
|
||||
BEESLOG("--- END TRACE --- exception ---");
|
||||
}
|
||||
@@ -413,6 +426,7 @@ BeesTempFile::create()
|
||||
BEESNOTE("creating temporary file in " << m_ctx->root_path());
|
||||
BEESTOOLONG("creating temporary file in " << m_ctx->root_path());
|
||||
|
||||
Timer create_timer;
|
||||
DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR));
|
||||
BEESCOUNT(tmp_create);
|
||||
|
||||
@@ -420,18 +434,22 @@ BeesTempFile::create()
|
||||
// Resolves won't work there anyway. There are lots of tempfiles
|
||||
// and they're short-lived, so this ends up being just a memory leak
|
||||
// m_ctx->blacklist_add(BeesFileId(m_fd));
|
||||
|
||||
// Put this inode in the cache so we can resolve it later
|
||||
m_ctx->insert_root_ino(m_fd);
|
||||
|
||||
// Set compression attribute
|
||||
int flags = 0;
|
||||
BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
||||
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_GETFLAGS, &flags));
|
||||
BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd));
|
||||
int flags = ioctl_iflags_get(m_fd);
|
||||
flags |= FS_COMPR_FL;
|
||||
BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
||||
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_SETFLAGS, &flags));
|
||||
ioctl_iflags_set(m_fd, flags);
|
||||
|
||||
// Always leave first block empty to avoid creating a file with an inline extent
|
||||
m_end_offset = BLOCK_SIZE_CLONE;
|
||||
|
||||
// Count time spent here
|
||||
BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -445,11 +463,15 @@ BeesTempFile::resize(off_t offset)
|
||||
THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset);
|
||||
|
||||
// Truncate
|
||||
Timer resize_timer;
|
||||
DIE_IF_NON_ZERO(ftruncate(m_fd, offset));
|
||||
BEESCOUNT(tmp_resize);
|
||||
|
||||
// Success
|
||||
m_end_offset = offset;
|
||||
|
||||
// Count time spent here
|
||||
BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000);
|
||||
}
|
||||
|
||||
BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
|
||||
@@ -518,6 +540,7 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
auto end = m_end_offset + src.size();
|
||||
resize(end);
|
||||
|
||||
Timer copy_timer;
|
||||
BeesFileRange rv(m_fd, begin, end);
|
||||
BEESTRACE("copying to: " << rv);
|
||||
BEESNOTE("copying " << src << " to " << rv);
|
||||
@@ -543,10 +566,15 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
src_p += len;
|
||||
dst_p += len;
|
||||
}
|
||||
BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);
|
||||
|
||||
// We seem to get lockups without this!
|
||||
if (did_block_write) {
|
||||
#if 1
|
||||
// Is this fixed by "Btrfs: fix deadlock between dedup on same file and starting writeback"?
|
||||
// No.
|
||||
bees_sync(m_fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
BEESCOUNT(tmp_copy);
|
||||
@@ -554,7 +582,7 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
}
|
||||
|
||||
int
|
||||
bees_main(int argc, const char **argv)
|
||||
bees_main(int argc, char *argv[])
|
||||
{
|
||||
set_catch_explainer([&](string s) {
|
||||
BEESLOG("\n\n*** EXCEPTION ***\n\t" << s << "\n***\n");
|
||||
@@ -568,14 +596,47 @@ bees_main(int argc, const char **argv)
|
||||
shared_ptr<BeesContext> bc;
|
||||
|
||||
THROW_CHECK1(invalid_argument, argc, argc >= 0);
|
||||
vector<string> args(argv + 1, argv + argc);
|
||||
|
||||
// Defaults
|
||||
bool chatter_prefix_timestamp = true;
|
||||
|
||||
// Parse options
|
||||
int c;
|
||||
while (1) {
|
||||
int option_index = 0;
|
||||
static struct option long_options[] = {
|
||||
{ "timestamps", no_argument, NULL, 't' },
|
||||
{ "notimestamps", no_argument, NULL, 'T' },
|
||||
{ "help", no_argument, NULL, 'h' }
|
||||
};
|
||||
|
||||
c = getopt_long(argc, argv, "Tth", long_options, &option_index);
|
||||
if (-1 == c) {
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 'T':
|
||||
chatter_prefix_timestamp = false;
|
||||
break;
|
||||
case 't':
|
||||
chatter_prefix_timestamp = true;
|
||||
break;
|
||||
case 'h':
|
||||
do_cmd_help(argv); // fallthrough
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
Chatter::enable_timestamp(chatter_prefix_timestamp);
|
||||
|
||||
// Create a context and start crawlers
|
||||
bool did_subscription = false;
|
||||
for (string arg : args) {
|
||||
while (optind < argc) {
|
||||
catch_all([&]() {
|
||||
bc = make_shared<BeesContext>(bc);
|
||||
bc->set_root_path(arg);
|
||||
bc->set_root_path(argv[optind++]);
|
||||
did_subscription = true;
|
||||
});
|
||||
}
|
||||
@@ -596,7 +657,7 @@ bees_main(int argc, const char **argv)
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
cerr << "bees version " << BEES_VERSION << endl;
|
||||
|
||||
|
69
src/bees.h
69
src/bees.h
@@ -39,13 +39,6 @@ const off_t BLOCK_SIZE_MAX_EXTENT_SAME = 4096 * 4096;
|
||||
// Maximum length of a compressed extent in bytes
|
||||
const off_t BLOCK_SIZE_MAX_COMPRESSED_EXTENT = 128 * 1024;
|
||||
|
||||
// Try to combine smaller extents into larger ones
|
||||
const off_t BLOCK_SIZE_MIN_EXTENT_DEFRAG = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
|
||||
|
||||
// Avoid splitting extents that are already too small
|
||||
const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
|
||||
// const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = 1024LL * 1024 * 1024 * 1024;
|
||||
|
||||
// Maximum length of any extent in bytes
|
||||
// except we've seen 1.03G extents...
|
||||
// ...FIEMAP is slow and full of lies
|
||||
@@ -54,8 +47,6 @@ const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024;
|
||||
// Masks, so we don't have to write "(BLOCK_SIZE_CLONE - 1)" everywhere
|
||||
const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1;
|
||||
const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1;
|
||||
const off_t BLOCK_MASK_MMAP = BLOCK_SIZE_MMAP - 1;
|
||||
const off_t BLOCK_MASK_MAX_COMPRESSED_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT * 2 - 1;
|
||||
|
||||
// Maximum temporary file size
|
||||
const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024;
|
||||
@@ -69,29 +60,32 @@ const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024;
|
||||
// Bytes per second we want to flush (8GB every two hours)
|
||||
const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;
|
||||
|
||||
// Interval between writing non-hash-table things to disk (15 minutes)
|
||||
const int BEES_WRITEBACK_INTERVAL = 900;
|
||||
// How long we should wait for new btrfs transactions
|
||||
const double BEES_COMMIT_INTERVAL = 900;
|
||||
|
||||
// Interval between writing non-hash-table things to disk, and starting new subvol crawlers
|
||||
const int BEES_WRITEBACK_INTERVAL = BEES_COMMIT_INTERVAL;
|
||||
|
||||
// Statistics reports while scanning
|
||||
const int BEES_STATS_INTERVAL = 3600;
|
||||
|
||||
// Progress shows instantaneous rates and thread status
|
||||
const int BEES_PROGRESS_INTERVAL = 3600;
|
||||
const int BEES_PROGRESS_INTERVAL = BEES_STATS_INTERVAL;
|
||||
|
||||
// Status is output every freakin second. Use a ramdisk.
|
||||
const int BEES_STATUS_INTERVAL = 1;
|
||||
|
||||
// Number of FDs to open (not counting 100 roots)
|
||||
const size_t BEES_FD_CACHE_SIZE = 384;
|
||||
|
||||
// Log warnings when an operation takes too long
|
||||
const double BEES_TOO_LONG = 2.5;
|
||||
|
||||
// Avoid any extent where LOGICAL_INO takes this long
|
||||
const double BEES_TOXIC_DURATION = 9.9;
|
||||
|
||||
// How long we should wait for new btrfs transactions
|
||||
const double BEES_COMMIT_INTERVAL = 900;
|
||||
|
||||
// How long between hash table histograms
|
||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = 3600;
|
||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
||||
|
||||
// Rate limiting of informational messages
|
||||
const double BEES_INFO_RATE = 10.0;
|
||||
@@ -251,15 +245,14 @@ ostream& operator<<(ostream &os, const BeesFileId &bfi);
|
||||
|
||||
class BeesFileRange {
|
||||
protected:
|
||||
static mutex s_mutex;
|
||||
mutable Fd m_fd;
|
||||
mutable BeesFileId m_fid;
|
||||
off_t m_begin, m_end;
|
||||
mutable off_t m_file_size;
|
||||
off_t m_begin = 0, m_end = 0;
|
||||
mutable off_t m_file_size = -1;
|
||||
|
||||
public:
|
||||
|
||||
BeesFileRange();
|
||||
BeesFileRange() = default;
|
||||
BeesFileRange(Fd fd, off_t begin, off_t end);
|
||||
BeesFileRange(const BeesFileId &fid, off_t begin, off_t end);
|
||||
BeesFileRange(const BeesBlockData &bbd);
|
||||
@@ -604,42 +597,6 @@ public:
|
||||
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
||||
};
|
||||
|
||||
class BeesWorkQueueBase {
|
||||
string m_name;
|
||||
|
||||
protected:
|
||||
static mutex s_mutex;
|
||||
static set<BeesWorkQueueBase *> s_all_workers;
|
||||
|
||||
public:
|
||||
virtual ~BeesWorkQueueBase();
|
||||
BeesWorkQueueBase(const string &name);
|
||||
|
||||
string name() const;
|
||||
void name(const string &new_name);
|
||||
|
||||
virtual size_t active_size() const = 0;
|
||||
virtual list<string> peek_active(size_t count) const = 0;
|
||||
|
||||
static void for_each_work_queue(function<void(BeesWorkQueueBase *)> f);
|
||||
};
|
||||
|
||||
template <class Task>
|
||||
class BeesWorkQueue : public BeesWorkQueueBase {
|
||||
WorkQueue<Task> m_active_queue;
|
||||
|
||||
public:
|
||||
BeesWorkQueue(const string &name);
|
||||
~BeesWorkQueue();
|
||||
void push_active(const Task &task, size_t limit);
|
||||
void push_active(const Task &task);
|
||||
|
||||
size_t active_size() const override;
|
||||
list<string> peek_active(size_t count) const override;
|
||||
|
||||
Task pop();
|
||||
};
|
||||
|
||||
class BeesTempFile {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
Fd m_fd;
|
||||
|
Reference in New Issue
Block a user