From 312254a47b0637ed23958169e315970c8c29fe86 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 30 Mar 2017 00:35:59 -0400 Subject: [PATCH 01/37] crucible: cache: no need to use explicit lock type C++11 'auto' keyword is sufficient. Signed-off-by: Zygo Blaxell (cherry picked from commit 44fedfc928a5ac6b8040fd94907954f0f2f806f1) --- include/crucible/cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/crucible/cache.h b/include/crucible/cache.h index 767b790..d7fa41d 100644 --- a/include/crucible/cache.h +++ b/include/crucible/cache.h @@ -124,7 +124,7 @@ namespace crucible { if (found == m_map.end()) { // No, release cache lock and acquire key lock lock.unlock(); - typename LockSet::Lock key_lock(m_lockset, k); + auto key_lock = m_lockset.make_lock(k); // Did item appear in cache while we were waiting for key? lock.lock(); @@ -197,7 +197,7 @@ namespace crucible { if (found == m_map.end()) { // No, release cache lock and acquire key lock lock.unlock(); - typename LockSet::Lock key_lock(m_lockset, k); + auto key_lock = m_lockset.make_lock(k); // Did item appear in cache while we were waiting for key? lock.lock(); From d6f97edf4adbf15b628a0efc2c1965996374fb84 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Tue, 7 Feb 2017 21:56:54 -0500 Subject: [PATCH 02/37] crucible: fs: keep ioctl buffer between runs perf blames the SEARCH_V2 ioctl wrapper for a lot of time spent in malloc. Use a thread_local buffer for ioctl results, and reuse it between runs. Signed-off-by: Zygo Blaxell (cherry picked from commit e509210428951e645d33916694a17aed1950991d) --- lib/fs.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/fs.cc b/lib/fs.cc index 511ceae..366934f 100644 --- a/lib/fs.cc +++ b/lib/fs.cc @@ -716,10 +716,20 @@ namespace crucible { bool BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd) { - vector ioctl_arg = vector_copy_struct(this); // Normally we like to be paranoid and fill empty bytes with zero, // but these buffers can be huge. 80% of a 4GHz CPU huge. - ioctl_arg.resize(sizeof(btrfs_ioctl_search_args_v2) + m_buf_size); + + // Keep the ioctl buffer from one run to the next to save on malloc costs + size_t target_buf_size = sizeof(btrfs_ioctl_search_args_v2) + m_buf_size; + + thread_local vector ioctl_arg; + if (ioctl_arg.size() < m_buf_size) { + ioctl_arg = vector_copy_struct(this); + ioctl_arg.resize(target_buf_size); + } else { + memcpy(ioctl_arg.data(), static_cast(this), sizeof(btrfs_ioctl_search_key)); + } + btrfs_ioctl_search_args_v2 *ioctl_ptr = reinterpret_cast(ioctl_arg.data()); ioctl_ptr->buf_size = m_buf_size; From 6c8d2bf42823985e2d957163d2eb93b3c9c741c5 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 26 Jan 2017 22:03:10 -0500 Subject: [PATCH 03/37] bees: limit FD cache size explicitly This will allow the default size limit for cache objects to be changed with impunity. Signed-off-by: Zygo Blaxell (cherry picked from commit 9daa51edaab44c02ce0917ff94b20683036d7594) --- src/bees-context.cc | 1 + src/bees.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/bees-context.cc b/src/bees-context.cc index ada5a1c..31e1b49 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -35,6 +35,7 @@ BeesFdCache::BeesFdCache() BEESCOUNTADD(open_ino_ms, open_timer.age() * 1000); return rv; }); + m_file_cache.max_size(BEES_FD_CACHE_SIZE); } Fd diff --git a/src/bees.h b/src/bees.h index 8c74ca2..0c685e2 100644 --- a/src/bees.h +++ b/src/bees.h @@ -81,6 +81,9 @@ const int BEES_PROGRESS_INTERVAL = 3600; // Status is output every freakin second. Use a ramdisk. const int BEES_STATUS_INTERVAL = 1; +// Number of FDs to open (not counting 100 roots) +const size_t BEES_FD_CACHE_SIZE = 384; + // Log warnings when an operation takes too long const double BEES_TOO_LONG = 2.5; From 3fdc217b4f94ee84a54ed6849443a1c4ccf7a295 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 27 Jan 2017 21:46:41 -0500 Subject: [PATCH 04/37] bees: change formatting for physical bytenr ranges in dedup Use a different character to make it easier to search for bytenr ranges in the logs. Signed-off-by: Zygo Blaxell (cherry picked from commit d43199e3d6e6469264eb10de8b0a783f8573e0e8) --- src/bees-context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index 31e1b49..dae5171 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -295,7 +295,7 @@ BeesContext::dedup(const BeesRangePair &brp) } BeesAddress first_addr(brp.first.fd(), brp.first.begin()); BeesAddress second_addr(brp.second.fd(), brp.second.begin()); - dst_line << " (" << first_addr << "->" << second_addr << ")"; + dst_line << " {" << first_addr << "->" << second_addr << "}"; if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) { BEESLOGTRACE("equal physical addresses in dedup"); BEESCOUNT(bug_dedup_same_physical); From c6c3990d19ae463bad8f362084c504e1a997e596 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 27 Jan 2017 21:43:21 -0500 Subject: [PATCH 05/37] bees: types: improve serialization of byte ranges Use () instead of [] when the respective end of the byte range touches the beginning or end of the file. Also omit the '0' at beginning of file. Signed-off-by: Zygo Blaxell (cherry picked from commit 3023b7f57a3003242bc770bcfe55f666227680ff) --- src/bees-types.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/bees-types.cc b/src/bees-types.cc index 54a4959..b418fda 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -71,7 +71,18 @@ operator<<(ostream &os, const BeesFileRange &bfr) if (bfr.end() == numeric_limits::max()) { os << "- [" << to_hex(bfr.begin()) << "..eof]"; } else { - os << pretty(bfr.size()) << " [" << to_hex(bfr.begin()) << ".." << to_hex(bfr.end()) << "]"; + os << pretty(bfr.size()) << " "; + if (bfr.begin() != 0) { + os << "[" << to_hex(bfr.begin()); + } else { + os << "("; + } + os << ".." << to_hex(bfr.end()); + if (!!bfr.m_fd && bfr.end() >= bfr.file_size()) { + os << ")"; + } else { + os << "]"; + } } if (bfr.m_fid) { os << " fid = " << bfr.m_fid; From c479b361cd7d647fd3f3bac6a0d64bfb67d5a198 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 27 Jan 2017 21:43:21 -0500 Subject: [PATCH 06/37] bees: remove file open serialization mutex It is no longer necessary. Signed-off-by: Zygo Blaxell (cherry picked from commit 5c9104555768801701df15f512c4d3d7c579398b) --- src/bees-types.cc | 13 ++----------- src/bees.h | 1 - 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/bees-types.cc b/src/bees-types.cc index b418fda..f5f398f 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -103,8 +103,6 @@ operator<<(ostream &os, const BeesRangePair &brp) << "\ndst = " << brp.second.fd() << " " << name_fd(brp.second.fd()); } -mutex BeesFileRange::s_mutex; - bool BeesFileRange::operator<(const BeesFileRange &that) const { @@ -156,7 +154,6 @@ off_t BeesFileRange::file_size() const { if (m_file_size <= 0) { - // Use method fd() not member m_fd() so we hold lock Stat st(fd()); m_file_size = st.st_size; // These checks could trigger on valid input, but that would mean we have @@ -296,23 +293,18 @@ BeesFileRange::operator BeesBlockData() const Fd BeesFileRange::fd() const { - unique_lock lock(s_mutex); - auto rv = m_fd; - return rv; + return m_fd; } Fd BeesFileRange::fd(const shared_ptr &ctx) const { - unique_lock lock(s_mutex); // If we don't have a fid we can't do much here if (m_fid) { if (!m_fd) { // If we don't have a fd, open by fid if (m_fid && ctx) { - lock.unlock(); Fd new_fd = ctx->roots()->open_root_ino(m_fid); - lock.lock(); m_fd = new_fd; } } else { @@ -322,8 +314,7 @@ BeesFileRange::fd(const shared_ptr &ctx) const } } // We either had a fid and opened it, or we didn't and we're just stuck with our fd - auto rv = m_fd; - return rv; + return m_fd; } BeesFileRange diff --git a/src/bees.h b/src/bees.h index 0c685e2..e4c079c 100644 --- a/src/bees.h +++ b/src/bees.h @@ -254,7 +254,6 @@ ostream& operator<<(ostream &os, const BeesFileId &bfi); class BeesFileRange { protected: - static mutex s_mutex; mutable Fd m_fd; mutable BeesFileId m_fid; off_t m_begin, m_end; From e0951ed4baeb04523e05d8ba687cc82aca635630 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 27 Jan 2017 21:43:21 -0500 Subject: [PATCH 07/37] bees: use C++11 syntax for constant initializers This lets us use more default constructors. Signed-off-by: Zygo Blaxell (cherry picked from commit 8a932a632ff4602a0357ed5fbcd3f86b6bc50283) --- src/bees-types.cc | 16 +++------------- src/bees.h | 6 +++--- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/bees-types.cc b/src/bees-types.cc index f5f398f..6ae281f 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -186,31 +186,21 @@ BeesFileRange::grow_begin(off_t delta) BeesFileRange::BeesFileRange(const BeesBlockData &bbd) : m_fd(bbd.fd()), m_begin(bbd.begin()), - m_end(bbd.end()), - m_file_size(-1) + m_end(bbd.end()) { } BeesFileRange::BeesFileRange(Fd fd, off_t begin, off_t end) : m_fd(fd), m_begin(begin), - m_end(end), - m_file_size(-1) + m_end(end) { } BeesFileRange::BeesFileRange(const BeesFileId &fid, off_t begin, off_t end) : m_fid(fid), m_begin(begin), - m_end(end), - m_file_size(-1) -{ -} - -BeesFileRange::BeesFileRange() : - m_begin(0), - m_end(0), - m_file_size(-1) + m_end(end) { } diff --git a/src/bees.h b/src/bees.h index e4c079c..41ab956 100644 --- a/src/bees.h +++ b/src/bees.h @@ -256,12 +256,12 @@ class BeesFileRange { protected: mutable Fd m_fd; mutable BeesFileId m_fid; - off_t m_begin, m_end; - mutable off_t m_file_size; + off_t m_begin = 0, m_end = 0; + mutable off_t m_file_size = -1; public: - BeesFileRange(); + BeesFileRange() = default; BeesFileRange(Fd fd, off_t begin, off_t end); BeesFileRange(const BeesFileId &fid, off_t begin, off_t end); BeesFileRange(const BeesBlockData &bbd); From 8cde833863c77e575facfdd2ca093f1d81d39200 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 1 Jun 2017 18:07:47 -0400 Subject: [PATCH 08/37] bees: make a thread note when we read data Reads can block indefinitely due to bugs, low io priority, or poor storage performance. Record the block origin data in the thread state so we can see which reads are problematic. Signed-off-by: Zygo Blaxell (cherry picked from commit f56f736d28970a0f03ee887a5bd5515cc749d413) --- src/bees-types.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bees-types.cc b/src/bees-types.cc index 6ae281f..df2d2b4 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -930,6 +930,7 @@ BeesBlockData::data() const { if (m_data.empty()) { THROW_CHECK1(invalid_argument, size(), size() > 0); + BEESNOTE("Reading BeesBlockData " << *this); BEESTOOLONG("Reading BeesBlockData " << *this); Timer read_timer; From 74d256f0fe1dd0ac95d26dc89e6636ad52c5c66e Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 30 Jan 2017 01:47:00 -0500 Subject: [PATCH 09/37] bees: handle trace functions that throw exceptions A BEESTRACE closure could throw an exception. Trap those so we don't end up in terminate(). Signed-off-by: Zygo Blaxell (cherry picked from commit 59660cfc00b9ca233eeb1a7cdf6df34a45a2deba) --- src/bees.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bees.cc b/src/bees.cc index 1efaf78..55fb240 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -51,7 +51,13 @@ thread_local BeesTracer *BeesTracer::tl_next_tracer = nullptr; BeesTracer::~BeesTracer() { if (uncaught_exception()) { - m_func(); + try { + m_func(); + } catch (exception &e) { + BEESLOG("Nested exception: " << e.what()); + } catch (...) { + BEESLOG("Nested exception ..."); + } if (!m_next_tracer) { BEESLOG("--- END TRACE --- exception ---"); } From b0ba4c4f3802487c610ea8c24d83189e761802ae Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 3 Feb 2017 13:02:27 -0500 Subject: [PATCH 10/37] bees: time tmpfile create and copy operations Add time spent in file create and copy operations to the stats. Signed-off-by: Zygo Blaxell (cherry picked from commit f01c20f97269083175a74d1a1fd3ebaced2d9560) --- src/bees.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/bees.cc b/src/bees.cc index 55fb240..ffddaba 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -419,6 +419,7 @@ BeesTempFile::create() BEESNOTE("creating temporary file in " << m_ctx->root_path()); BEESTOOLONG("creating temporary file in " << m_ctx->root_path()); + Timer create_timer; DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR)); BEESCOUNT(tmp_create); @@ -426,6 +427,8 @@ BeesTempFile::create() // Resolves won't work there anyway. There are lots of tempfiles // and they're short-lived, so this ends up being just a memory leak // m_ctx->blacklist_add(BeesFileId(m_fd)); + + // Put this inode in the cache so we can resolve it later m_ctx->insert_root_ino(m_fd); // Set compression attribute @@ -438,6 +441,9 @@ BeesTempFile::create() // Always leave first block empty to avoid creating a file with an inline extent m_end_offset = BLOCK_SIZE_CLONE; + + // Count time spent here + BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000); } void @@ -451,11 +457,15 @@ BeesTempFile::resize(off_t offset) THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset); // Truncate + Timer resize_timer; DIE_IF_NON_ZERO(ftruncate(m_fd, offset)); BEESCOUNT(tmp_resize); // Success m_end_offset = offset; + + // Count time spent here + BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000); } BeesTempFile::BeesTempFile(shared_ptr ctx) : @@ -524,6 +534,7 @@ BeesTempFile::make_copy(const BeesFileRange &src) auto end = m_end_offset + src.size(); resize(end); + Timer copy_timer; BeesFileRange rv(m_fd, begin, end); BEESTRACE("copying to: " << rv); BEESNOTE("copying " << src << " to " << rv); @@ -549,6 +560,7 @@ BeesTempFile::make_copy(const BeesFileRange &src) src_p += len; dst_p += len; } + BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000); // We seem to get lockups without this! if (did_block_write) { From 48aac8a99a793811b658774b7ad6dd74dacddbcb Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 3 Feb 2017 13:09:15 -0500 Subject: [PATCH 11/37] bees: drop unused constants BLOCK_SIZE_MIN_EXTENT_DEFRAG, BLOCK_SIZE_MIN_EXTENT_SPLIT, and others are no longer used. Remove them. Signed-off-by: Zygo Blaxell (cherry picked from commit a3d7032edaf5fc584412d0dcf8773f1cafa8f2dc) --- src/bees.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/bees.h b/src/bees.h index 41ab956..c1b827e 100644 --- a/src/bees.h +++ b/src/bees.h @@ -39,13 +39,6 @@ const off_t BLOCK_SIZE_MAX_EXTENT_SAME = 4096 * 4096; // Maximum length of a compressed extent in bytes const off_t BLOCK_SIZE_MAX_COMPRESSED_EXTENT = 128 * 1024; -// Try to combine smaller extents into larger ones -const off_t BLOCK_SIZE_MIN_EXTENT_DEFRAG = BLOCK_SIZE_MAX_COMPRESSED_EXTENT; - -// Avoid splitting extents that are already too small -const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT; -// const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = 1024LL * 1024 * 1024 * 1024; - // Maximum length of any extent in bytes // except we've seen 1.03G extents... // ...FIEMAP is slow and full of lies @@ -54,8 +47,6 @@ const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024; // Masks, so we don't have to write "(BLOCK_SIZE_CLONE - 1)" everywhere const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1; const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1; -const off_t BLOCK_MASK_MMAP = BLOCK_SIZE_MMAP - 1; -const off_t BLOCK_MASK_MAX_COMPRESSED_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT * 2 - 1; // Maximum temporary file size const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024; @@ -69,14 +60,17 @@ const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024; // Bytes per second we want to flush (8GB every two hours) const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0; -// Interval between writing non-hash-table things to disk (15 minutes) -const int BEES_WRITEBACK_INTERVAL = 900; +// How long we should wait for new btrfs transactions +const double BEES_COMMIT_INTERVAL = 900; + +// Interval between writing non-hash-table things to disk, and starting new subvol crawlers +const int BEES_WRITEBACK_INTERVAL = BEES_COMMIT_INTERVAL; // Statistics reports while scanning const int BEES_STATS_INTERVAL = 3600; // Progress shows instantaneous rates and thread status -const int BEES_PROGRESS_INTERVAL = 3600; +const int BEES_PROGRESS_INTERVAL = BEES_STATS_INTERVAL; // Status is output every freakin second. Use a ramdisk. const int BEES_STATUS_INTERVAL = 1; @@ -90,11 +84,8 @@ const double BEES_TOO_LONG = 2.5; // Avoid any extent where LOGICAL_INO takes this long const double BEES_TOXIC_DURATION = 9.9; -// How long we should wait for new btrfs transactions -const double BEES_COMMIT_INTERVAL = 900; - // How long between hash table histograms -const double BEES_HASH_TABLE_ANALYZE_INTERVAL = 3600; +const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL; // Rate limiting of informational messages const double BEES_INFO_RATE = 10.0; From 3901962379d13f472647fb311e5d71f9f01e0ae8 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 3 May 2017 21:02:28 -0400 Subject: [PATCH 12/37] bees: trace calls to BeesResolver This helps identify causes of the "same physical address in dedup" exception. Signed-off-by: Zygo Blaxell (cherry picked from commit cc7b4f22b5df3a1f52d27060ee8a6a3352b8cd10) --- src/bees-context.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bees-context.cc b/src/bees-context.cc index dae5171..8ff0cc3 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -349,6 +349,7 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr) // BEESLOG("\torig_bbd " << orig_bbd); BeesBlockData dup_bbd(dup_brp.first.fd(), dup_brp.first.begin(), min(BLOCK_SIZE_SUMS, dup_brp.first.size())); // BEESLOG("BeesResolver br(..., " << bfr << ")"); + BEESTRACE("BeesContext::rewrite_file_range calling BeesResolver " << bfr); BeesResolver br(m_ctx, BeesAddress(bfr.fd(), bfr.begin())); // BEESLOG("\treplace_src " << dup_bbd); br.replace_src(dup_bbd); @@ -554,6 +555,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) catch_all([&]() { BEESNOTE("resolving " << found_addr << " matched " << bbd); BEESTRACE("resolving " << found_addr << " matched " << bbd); + BEESTRACE("BeesContext::scan_one_extent calling BeesResolver " << found_addr); BeesResolver resolved(m_ctx, found_addr); // Toxic extents are really toxic if (resolved.is_toxic()) { From 4f66d1cb44948e1c5d96911145b191e2cd27ede1 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 30 Mar 2017 00:35:59 -0400 Subject: [PATCH 13/37] crucible: lockset: track lockers and use handle type [bees master branch edition] Keep track of the locking thread so we can see why we are deadlocked in gdb. Use a handle type for locks based on shared_ptr. Change the handle type name to flush out any non-auto local variables. Signed-off-by: Zygo Blaxell (cherry picked from commit aa0b22d445664409c36503c6fd808bc49b6816d0) --- include/crucible/lockset.h | 72 ++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/include/crucible/lockset.h b/include/crucible/lockset.h index 2af782d..e792e9d 100644 --- a/include/crucible/lockset.h +++ b/include/crucible/lockset.h @@ -2,14 +2,16 @@ #define CRUCIBLE_LOCKSET_H #include +#include #include #include #include #include +#include +#include #include -#include namespace crucible { using namespace std; @@ -18,7 +20,7 @@ namespace crucible { class LockSet { public: - using set_type = set; + using set_type = map; using key_type = typename set_type::key_type; private: @@ -31,6 +33,24 @@ namespace crucible { bool full(); bool locked(const key_type &name); + class Lock { + LockSet &m_lockset; + key_type m_name; + bool m_locked; + + Lock() = delete; + Lock(const Lock &) = delete; + Lock& operator=(const Lock &) = delete; + Lock(Lock &&that) = delete; + Lock& operator=(Lock &&that) = delete; + public: + ~Lock(); + Lock(LockSet &lockset, const key_type &name, bool start_locked = true); + void lock(); + void unlock(); + bool try_lock(); + }; + public: ~LockSet(); LockSet() = default; @@ -45,24 +65,18 @@ namespace crucible { void max_size(size_t max); - class Lock { - LockSet &m_lockset; - key_type m_name; - bool m_locked; + class LockHandle { + shared_ptr m_lock; - Lock() = delete; - Lock(const Lock &) = delete; - Lock& operator=(const Lock &) = delete; public: - ~Lock(); - Lock(LockSet &lockset, const key_type &m_name, bool start_locked = true); - Lock(Lock &&that); - Lock& operator=(Lock &&that); - void lock(); - void unlock(); - bool try_lock(); + LockHandle(LockSet &lockset, const key_type &name, bool start_locked = true) : + m_lock(make_shared(lockset, name, start_locked)) {} + void lock() { m_lock->lock(); } + void unlock() { m_lock->unlock(); } + bool try_lock() { return m_lock->try_lock(); } }; + LockHandle make_lock(const key_type &name, bool start_locked = true); }; template @@ -104,7 +118,7 @@ namespace crucible { while (full() || locked(name)) { m_condvar.wait(lock); } - auto rv = m_set.insert(name); + auto rv = m_set.insert(make_pair(name, gettid())); THROW_CHECK0(runtime_error, rv.second); } @@ -116,7 +130,7 @@ namespace crucible { if (full() || locked(name)) { return false; } - auto rv = m_set.insert(name); + auto rv = m_set.insert(make_pair(name, gettid())); THROW_CHECK1(runtime_error, name, rv.second); return true; } @@ -214,26 +228,10 @@ namespace crucible { } template - LockSet::Lock::Lock(Lock &&that) : - m_lockset(that.lockset), - m_name(that.m_name), - m_locked(that.m_locked) + typename LockSet::LockHandle + LockSet::make_lock(const key_type &name, bool start_locked) { - that.m_locked = false; - } - - template - typename LockSet::Lock & - LockSet::Lock::operator=(Lock &&that) - { - THROW_CHECK2(invalid_argument, &m_lockset, &that.m_lockset, &m_lockset == &that.m_lockset); - if (m_locked && that.m_name != m_name) { - unlock(); - } - m_name = that.m_name; - m_locked = that.m_locked; - that.m_locked = false; - return *this; + return LockHandle(*this, name, start_locked); } } From 703bb7c1a399e84cc523c7b93f947d8a5845fa20 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 30 Mar 2017 00:35:59 -0400 Subject: [PATCH 14/37] bees: use handle type for hash table extent locks Fixes build breakage after "crucible: lockset: track lockers and use handle type". Signed-off-by: Zygo Blaxell --- src/bees-hash.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bees-hash.cc b/src/bees-hash.cc index 5cc2cb1..37b6f6c 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -311,7 +311,7 @@ BeesHashTable::fetch_missing_extent(HashType hash) BEESNOTE("waiting to fetch hash extent #" << extent_number << ", " << missing_buckets << " left to fetch"); // Acquire blocking lock on this extent only - LockSet::Lock extent_lock(m_extent_lock_set, extent_number); + auto extent_lock = m_extent_lock_set.make_lock(extent_number); // Check missing again because someone else might have fetched this // extent for us while we didn't hold any locks From a5e2bdff47d7079d022ebfb218edc597ccc03058 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 12 Sep 2017 02:09:22 +0200 Subject: [PATCH 15/37] Skip nocow files to speed up processing If you have a lot of or a few big nocow files (like vm images) which contain a lot of potential deduplication candidates, bees becomes incredibly slow running through a lot "invalid operation" exceptions. Let's just skip over such files to get more bang for the buck. I did no regression testing as this patch seems trivial (and I cannot imagine any pitfalls either). The process progresses much faster for me now. --- include/crucible/fd.h | 6 ++++++ lib/fd.cc | 8 ++++++++ src/bees-roots.cc | 8 ++++++++ 3 files changed, 22 insertions(+) diff --git a/include/crucible/fd.h b/include/crucible/fd.h index 8492846..9818819 100644 --- a/include/crucible/fd.h +++ b/include/crucible/fd.h @@ -13,6 +13,10 @@ #include #include +// ioctl +#include +#include + // socket #include @@ -141,6 +145,8 @@ namespace crucible { Stat &lstat(const string &filename); }; + int ioctl_iflags_get(int fd); + string st_mode_ntoa(mode_t mode); // Because it's not trivial to do correctly diff --git a/lib/fd.cc b/lib/fd.cc index e0735a9..931167f 100644 --- a/lib/fd.cc +++ b/lib/fd.cc @@ -488,6 +488,14 @@ namespace crucible { lstat(filename); } + int + ioctl_iflags_get(int fd) + { + int attr = 0; + DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_GETFLAGS, &attr)); + return attr; + } + string readlink_or_die(const string &path) { diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 921132c..e99dc66 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -576,6 +576,14 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) break; } + int attr = ioctl_iflags_get(rv); + if (attr & FS_NOCOW_FL) { + BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found incompatible flags " << attr << " (FS_NOCOW_FL)"); + rv = Fd(); + BEESCOUNT(open_wrong_flags); + break; + } + // Correct root? auto file_root = btrfs_get_root_id(rv); if (file_root != root) { From 8c9a44998dfd99325dfb3e02728712da76bac47d Mon Sep 17 00:00:00 2001 From: Coenraad Loubser Date: Sat, 16 Sep 2017 09:40:52 +0200 Subject: [PATCH 16/37] Verbatim Ubuntu build instructions And link to work done so far on 14.04... (Doesn't work yet) --- README.md | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2c4ead6..0591504 100644 --- a/README.md +++ b/README.md @@ -296,9 +296,18 @@ Not really a bug, but a gotcha nonetheless: children* until the FD is closed. Bees avoids this gotcha by closing all of the FDs in its directory FD cache every 15 minutes. +Build +----- +Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target system respectively. -Requirements +### Ubuntu 16.04 - 17.04: +`$ apt -y install build-essential btrfs-tools uuid-dev markdown && make` + +### Ubuntu 14.04: +You can try to carry on the work done here: https://gist.github.com/dagelf/99ee07f5638b346adb8c058ab3d57492 + +Dependencies ------------ * C++11 compiler (tested with GCC 4.9 and 6.2.0) @@ -320,15 +329,9 @@ Requirements Don't bother trying to make Bees work with older kernels. It won't end well. + +* markdown -Build ------ - -Build with `make`. - -The build produces `bin/bees` and `lib/libcrucible.so`, which must be -copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target -system respectively. Setup ----- From 5f18fcda5282ec878a5a45cca9912356cd0fb464 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 14:29:42 -0400 Subject: [PATCH 17/37] crucible: add ioctl_iflags_set to complement ioctl_iflags_get Signed-off-by: Zygo Blaxell --- include/crucible/fd.h | 1 + lib/fd.cc | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/crucible/fd.h b/include/crucible/fd.h index 9818819..f905812 100644 --- a/include/crucible/fd.h +++ b/include/crucible/fd.h @@ -146,6 +146,7 @@ namespace crucible { }; int ioctl_iflags_get(int fd); + void ioctl_iflags_set(int fd, int attr); string st_mode_ntoa(mode_t mode); diff --git a/lib/fd.cc b/lib/fd.cc index 931167f..be6177b 100644 --- a/lib/fd.cc +++ b/lib/fd.cc @@ -496,6 +496,12 @@ namespace crucible { return attr; } + void + ioctl_iflags_set(int fd, int attr) + { + DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &attr)); + } + string readlink_or_die(const string &path) { From 702a8eec8cb84345336274f50e1b73a451b6295f Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 14:31:43 -0400 Subject: [PATCH 18/37] bees: use ioctl_iflags_get and ioctl_iflags_set instead of opencoded versions Signed-off-by: Zygo Blaxell --- src/bees.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/bees.cc b/src/bees.cc index ffddaba..df39866 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -432,12 +432,11 @@ BeesTempFile::create() m_ctx->insert_root_ino(m_fd); // Set compression attribute - int flags = 0; - BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags)); - DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_GETFLAGS, &flags)); + BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd)); + int flags = ioctl_iflags_get(m_fd); flags |= FS_COMPR_FL; BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags)); - DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_SETFLAGS, &flags)); + ioctl_iflags_set(m_fd, flags); // Always leave first block empty to avoid creating a file with an inline extent m_end_offset = BLOCK_SIZE_CLONE; From 339579096f9d2264779bee6953797418e4faa456 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 14:36:55 -0400 Subject: [PATCH 19/37] roots: move flags check after file identity checks and make error message style consistent If we lose a race and open the wrong file, we will not retry with the next path if the file we opened had incompatible flags. We need to keep trying paths until we open the correct file or run out of paths. Fix by moving the inode flag check after the checks for file identity. Output attributes in hex to be consistent with other attribute error messages. There is no need to report root and file paths separately in the error message for incompatible flags because we have confirmed the identity of the file before the incompatible flag error is detected. Other messages in this loop still output root path and file_path separately because the identity of 'rv' is unknown at the time these messages are emitted. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index e99dc66..b861129 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -576,14 +576,6 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) break; } - int attr = ioctl_iflags_get(rv); - if (attr & FS_NOCOW_FL) { - BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found incompatible flags " << attr << " (FS_NOCOW_FL)"); - rv = Fd(); - BEESCOUNT(open_wrong_flags); - break; - } - // Correct root? auto file_root = btrfs_get_root_id(rv); if (file_root != root) { @@ -602,6 +594,27 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) break; } + // As of 4.12 the kernel rejects dedup requests with + // src and dst that have different datasum flags. + // + // We can't detect those from userspace reliably, but + // we can detect the common case where one file is + // marked with the nodatasum (which implies nodatacow) + // on a filesystem that is mounted with datacow. + // These are arguably out of scope for dedup. + // + // To fix this properly, we have to keep track of which + // pairs of inodes failed to dedup, guess that the reason + // for failure was a mismatch of datasum flags, and + // create temporary files with the right flags somehow. + int attr = ioctl_iflags_get(rv); + if (attr & FS_NOCOW_FL) { + BEESLOG("Opening " << name_fd(rv) << " found FS_NOCOW_FL flag in " << to_hex(attr)); + rv = Fd(); + BEESCOUNT(open_wrong_flags); + break; + } + BEESTRACE("mapped " << BeesFileId(root, ino)); BEESTRACE("\tto " << name_fd(rv)); BEESCOUNT(open_hit); From 18ae15658eacdc4f9eb7e27fd3f38b68e8370d71 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 14:57:24 -0400 Subject: [PATCH 20/37] README: remove stray whitespace No content changes. Signed-off-by: Zygo Blaxell --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0591504..692c83e 100644 --- a/README.md +++ b/README.md @@ -299,13 +299,15 @@ Not really a bug, but a gotcha nonetheless: Build ----- -Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` on the target system respectively. +Build with `make`. The build produces `bin/bees` and `lib/libcrucible.so`, +which must be copied to somewhere in `$PATH` and `$LD_LIBRARY_PATH` +on the target system respectively. ### Ubuntu 16.04 - 17.04: `$ apt -y install build-essential btrfs-tools uuid-dev markdown && make` ### Ubuntu 14.04: -You can try to carry on the work done here: https://gist.github.com/dagelf/99ee07f5638b346adb8c058ab3d57492 +You can try to carry on the work done here: https://gist.github.com/dagelf/99ee07f5638b346adb8c058ab3d57492 Dependencies ------------ @@ -329,7 +331,7 @@ Dependencies Don't bother trying to make Bees work with older kernels. It won't end well. - + * markdown From ceda8ee6c3939389409c641dc512264f9687178c Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:30:02 -0400 Subject: [PATCH 21/37] Makefile: add test to PHONY list Signed-off-by: Zygo Blaxell --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4cc6576..5c9d511 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ default all: lib src test README.html clean: ## Cleanup git clean -dfx -.PHONY: lib src +.PHONY: lib src test lib: ## Build libs $(MAKE) -C lib From f6a6992ac9b5b67a40af0bd847bf1ba825113305 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:30:50 -0400 Subject: [PATCH 22/37] README: update list of currently known kernel bugs Signed-off-by: Zygo Blaxell --- README.md | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 692c83e..e4f357d 100644 --- a/README.md +++ b/README.md @@ -232,12 +232,16 @@ Bug fixes (sometimes included in older LTS kernels): * 4.5: hang in the `INO_PATHS` ioctl used by Bees. * 4.5: use-after-free in the `FILE_EXTENT_SAME` ioctl used by Bees. +* 4.6: lost inodes after a rename, crash, and log tree replay + (triggered by the fsync() while writing `beescrawl.dat`). * 4.7: *slow backref* bug no longer triggers a softlockup panic. It still too long to resolve a block address to a root/inode/offset triple. -* 4.10-rc1: reduced CPU time cost of the LOGICAL_INO ioctl and dedup +* 4.10: reduced CPU time cost of the LOGICAL_INO ioctl and dedup backref processing in general. +* 4.13 integration trees: 053582a7d423 btrfs: add cond_resched() calls + when resolving backrefs -Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees: +Unfixed kernel bugs (as of 4.11.9) with workarounds in Bees: * *slow backrefs* (aka toxic extents): If the number of references to a single shared extent within a single file grows above a few thousand, @@ -258,6 +262,19 @@ Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees: blocks or filesystems with many snapshots (although this limit is far greater than the effective limit imposed by the *slow backref* bug). +* `LOGICAL_INO` on compressed extents returns a list of root/inode/offset + tuples matching the extent bytenr of its argument. On uncompressed + extents, any r/i/o tuple whose extent offset does not match the + argument's extent offset is discarded, i.e. only the single 4K block + matching the argument is returned, so a complete map of the extent + references requires calling `LOGICAL_INO` for every single block of + the extent. This is undesirable behavior for Bees, which wants a + list of all extent refs referencing a data extent (i.e. Bees wants + the compressed-extent behavior in all cases). + +* `LOGICAL_INO` is only called from one thread at any time per process. + This means at most one core is irretrievably stuck in this ioctl. + * `FILE_EXTENT_SAME` is arbitrarily limited to 16MB. This is less than 128MB which is the maximum extent size that can be created by defrag or prealloc. Bees avoids feedback loops this can generate while @@ -270,22 +287,9 @@ Unfixed kernel bugs (as of 4.5.7) with workarounds in Bees: to a temporary file and using the `FILE_EXTENT_SAME` ioctl to replace precisely the specified range of offending fragmented blocks. -* When writing BeesStringFile, a crash can cause the directory entry - `beescrawl.dat.tmp` to exist without a corresponding inode. - This directory entry cannot be renamed or removed; however, it does - not prevent the creation of a second directory entry with the same - name that functions normally, so it doesn't prevent Bees operation. - - The orphan directory entry can be removed by deleting its subvol, - so place BEESHOME on a separate subvol so you can delete these orphan - directory entries when they occur (or use btrfs zero-log before mounting - the filesystem after a crash). Alternatively, place BEESHOME on a - non-btrfs filesystem. - * If the `fsync()` in `BeesTempFile::make_copy` is removed, the filesystem hangs within a few hours, requiring a reboot to recover. On the other - hand, there may be net performance benefits to calling `fsync()` before - or after each dedup. This needs further investigation. + hand, the `fsync()` only costs about 8% of overall performance. Not really a bug, but a gotcha nonetheless: @@ -296,6 +300,12 @@ Not really a bug, but a gotcha nonetheless: children* until the FD is closed. Bees avoids this gotcha by closing all of the FDs in its directory FD cache every 15 minutes. +* If a file is deleted while Bees is caching an open FD to the file, + Bees continues to scan the file. For very large files (e.g. VM + images), the deletion of the file can be delayed indefinitely. + To limit this delay, Bees closes all FDs in its file FD cache every + 15 minutes. + Build ----- From 5cc5a44661c775148153aed610f3c0ae5f3ae1ec Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:35:42 -0400 Subject: [PATCH 23/37] bees: drop unused BeesWorkQueue classes Signed-off-by: Zygo Blaxell --- src/bees-context.cc | 106 -------------------------------------------- src/bees.h | 36 --------------- 2 files changed, 142 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index 8ff0cc3..0a63101 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -72,97 +72,6 @@ BeesFdCache::insert_root_ino(shared_ptr ctx, Fd fd) return m_file_cache.insert(fd, ctx, fid.root(), fid.ino()); } -mutex BeesWorkQueueBase::s_mutex; -set BeesWorkQueueBase::s_all_workers; - -BeesWorkQueueBase::BeesWorkQueueBase(const string &name) : - m_name(name) -{ -} - -BeesWorkQueueBase::~BeesWorkQueueBase() -{ - unique_lock lock(s_mutex); - s_all_workers.erase(this); -} - -void -BeesWorkQueueBase::for_each_work_queue(std::function f) -{ - unique_lock lock(s_mutex); - for (auto i : s_all_workers) { - f(i); - } -} - -string -BeesWorkQueueBase::name() const -{ - return m_name; -} - -void -BeesWorkQueueBase::name(const string &new_name) -{ - m_name = new_name; -} - -template -BeesWorkQueue::~BeesWorkQueue() -{ -} - -template -BeesWorkQueue::BeesWorkQueue(const string &name) : - BeesWorkQueueBase(name) -{ - unique_lock lock(s_mutex); - s_all_workers.insert(this); -} - -template -void -BeesWorkQueue::push_active(const Task &t) -{ - BEESNOTE("pushing task " << t); - m_active_queue.push(t); -} - -template -void -BeesWorkQueue::push_active(const Task &t, size_t limit) -{ - // BEESNOTE("pushing limit " << limit << " task " << t); - m_active_queue.push_wait(t, limit); -} - -template -size_t -BeesWorkQueue::active_size() const -{ - return m_active_queue.size(); -} - -template -list -BeesWorkQueue::peek_active(size_t count) const -{ - list rv; - for (auto i : m_active_queue.peek(count)) { - ostringstream oss; - oss << i; - rv.push_back(oss.str()); - } - return rv; -} - -template -Task -BeesWorkQueue::pop() -{ - return m_active_queue.pop(); -} - void BeesContext::dump_status() { @@ -189,12 +98,6 @@ BeesContext::dump_status() ofs << "\ttid " << t.first << ": " << t.second << "\n"; } - BeesWorkQueueBase::for_each_work_queue([&](BeesWorkQueueBase *worker) { - ofs << "QUEUE: " << worker->name() << " active: " << worker->active_size() << "\n"; - for (auto t : worker->peek_active(10)) { - ofs << "\t" << t << "\n"; - } - }); ofs.close(); BEESNOTE("renaming status file '" << status_file << "'"); @@ -230,10 +133,6 @@ BeesContext::show_progress() }; lastProgressStats = thisStats; - BeesWorkQueueBase::for_each_work_queue([&](BeesWorkQueueBase *worker) { - BEESLOG("QUEUE: " << worker->name() << " active: " << worker->active_size()); - }); - BEESLOG("THREADS:"); for (auto t : BeesNote::get_status()) { @@ -1031,8 +930,3 @@ BeesContext::insert_root_ino(Fd fd) { fd_cache()->insert_root_ino(shared_from_this(), fd); } - -// instantiate templates for linkage ---------------------------------------- - -template class BeesWorkQueue; -template class BeesWorkQueue; diff --git a/src/bees.h b/src/bees.h index c1b827e..5155bc8 100644 --- a/src/bees.h +++ b/src/bees.h @@ -597,42 +597,6 @@ public: friend ostream & operator<<(ostream &os, const BeesRangePair &brp); }; -class BeesWorkQueueBase { - string m_name; - -protected: - static mutex s_mutex; - static set s_all_workers; - -public: - virtual ~BeesWorkQueueBase(); - BeesWorkQueueBase(const string &name); - - string name() const; - void name(const string &new_name); - - virtual size_t active_size() const = 0; - virtual list peek_active(size_t count) const = 0; - - static void for_each_work_queue(function f); -}; - -template -class BeesWorkQueue : public BeesWorkQueueBase { - WorkQueue m_active_queue; - -public: - BeesWorkQueue(const string &name); - ~BeesWorkQueue(); - void push_active(const Task &task, size_t limit); - void push_active(const Task &task); - - size_t active_size() const override; - list peek_active(size_t count) const override; - - Task pop(); -}; - class BeesTempFile { shared_ptr m_ctx; Fd m_fd; From 732896b47127499d724beb66f95fa567a72a3216 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:42:52 -0400 Subject: [PATCH 24/37] log: simplify output for dedup and scan With many threads it is inconvenient to reassemble the elided parts of the dedup src/dst and scan filenames output. Simply output them unconditionally, and balance the line lengths. Signed-off-by: Zygo Blaxell --- src/bees-context.cc | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index 0a63101..e18711f 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -181,29 +181,16 @@ BeesContext::dedup(const BeesRangePair &brp) BEESTOOLONG("dedup " << brp); - thread_local BeesFileId tl_first_fid, tl_second_fid; - if (tl_first_fid != brp.first.fid()) { - BEESLOG("dedup: src " << name_fd(brp.first.fd())); - tl_first_fid = brp.first.fid(); - tl_second_fid = BeesFileId(); - } - ostringstream dst_line; - dst_line << " dst " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "]"; - if (brp.first.begin() != brp.second.begin()) { - dst_line << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "]"; - } BeesAddress first_addr(brp.first.fd(), brp.first.begin()); BeesAddress second_addr(brp.second.fd(), brp.second.begin()); - dst_line << " {" << first_addr << "->" << second_addr << "}"; + + BEESLOG("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd())); + BEESLOG(" dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd())); + if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) { BEESLOGTRACE("equal physical addresses in dedup"); BEESCOUNT(bug_dedup_same_physical); } - if (tl_second_fid != brp.second.fid()) { - dst_line << " " << name_fd(brp.second.fd()); - tl_second_fid = brp.second.fid(); - } - BEESLOG(dst_line.str()); THROW_CHECK1(invalid_argument, brp, !brp.first.overlaps(brp.second)); THROW_CHECK1(invalid_argument, brp, brp.first.size() == brp.second.size()); @@ -687,13 +674,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) // Visualize if (bar != string(block_count, '.')) { - thread_local BeesFileId last_fid; - string file_name; - if (bfr.fid() != last_fid) { - last_fid = bfr.fid(); - file_name = " " + name_fd(bfr.fd()); - } - BEESLOG("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << file_name); + BEESLOG("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd())); } return bfr; From a07728bc7e6793d3e1f06f5520c2de5f83efa122 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:46:39 -0400 Subject: [PATCH 25/37] tmpfiles: note that kernel race condition is not yet fixed Signed-off-by: Zygo Blaxell --- src/bees.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bees.cc b/src/bees.cc index df39866..d555899 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -563,7 +563,11 @@ BeesTempFile::make_copy(const BeesFileRange &src) // We seem to get lockups without this! if (did_block_write) { +#if 1 + // Is this fixed by "Btrfs: fix deadlock between dedup on same file and starting writeback"? + // No. bees_sync(m_fd); +#endif } BEESCOUNT(tmp_copy); From 52752493962753a87f492c82958b8708e8eb6da5 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 16:47:35 -0400 Subject: [PATCH 26/37] roots: trace transid_max calculation transid_max calculations can take considerable time. Report their progress in more detail. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index b861129..818c730 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -174,9 +174,9 @@ BeesRoots::transid_min() uint64_t BeesRoots::transid_max() { - BEESNOTE("Calculating transid_max"); uint64_t rv = 0; uint64_t root = 0; + BEESNOTE("Calculating transid_max (" << rv << " as of root " << root << ")"); BEESTRACE("Calculating transid_max..."); do { root = next_root(root); From 5afbcb99e36bf5a649842ba859af0e7d7a8577ea Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 16 Sep 2017 21:16:40 -0400 Subject: [PATCH 27/37] roots: drop open_root_nocache log entry After a few hundred subvol threads start running, the inode cache starts to thrash, and the log gets spammed with messages of the form: "open_root_nocache : " Ideally there would be some way to schedule work to minimize inode thrashing. Until that gets done, just silence the messages for now. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 818c730..bc0c9ae 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -397,7 +397,6 @@ Fd BeesRoots::open_root_nocache(uint64_t rootid) { BEESTRACE("open_root_nocache " << rootid); - BEESNOTE("open_root_nocache " << rootid); // Stop recursion at the root of the filesystem tree if (rootid == BTRFS_FS_TREE_OBJECTID) { From 23749eb634c6bf15db77b41402af65f607775281 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Mon, 18 Sep 2017 01:29:39 +0200 Subject: [PATCH 28/37] Enable detect of markdown binary Some distributions do not provide markdown as "markdown". Let's figure out which version to use during build. --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4cc6576..aacb0c5 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,8 @@ PREFIX ?= / +MARKDOWN := $(shell which markdown markdown2 markdown_py 2>/dev/null) +MARKDOWN ?= markdown + default all: lib src test README.html clean: ## Cleanup @@ -19,7 +22,7 @@ test: lib src $(MAKE) -C test README.html: README.md - markdown README.md > README.html.new + $(MARKDOWN) README.md > README.html.new mv -f README.html.new README.html install: ## Install bees + libs From cceb0480a58d2d2fcebf4eba8166abd071ede34b Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Mon, 18 Sep 2017 01:44:07 +0200 Subject: [PATCH 29/37] Change README.md reflecting nodatacow inode attribute The previous patch changed behavior regarding nodatacow inode attribute. Let's document the new behavior. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c4ead6..813d6be 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,8 @@ Bees has not been tested with the following, and undesirable interactions may oc * btrfs qgroups (never tested, no idea what might happen) * btrfs seed filesystems (does anyone even use those?) * btrfs autodefrag mount option (never tested, could fight with Bees) -* btrfs nodatacow mount option or inode attribute (*could* work, but might not) +* btrfs nodatacow inode attribute (needs datasum detection on extents, skipped for now) +* btrfs nodatacow mount option (*could* work, but might not, skipped due to above note) * btrfs out-of-tree kernel patches (e.g. in-band dedup or encryption) * btrfs-convert from ext2/3/4 (never tested) * btrfs mixed block groups (don't know a reason why it would *not* work, but never tested) From 04cb25bd04da1088f643a3183e76db6cd7b02735 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:30:51 +0200 Subject: [PATCH 30/37] Move bees to libexec install dir When bees is meant to be run mainly through the service frontend script, we should move the bees binary to the libexec directory. --- Makefile | 2 +- scripts/beesd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 35378fb..8160da2 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ README.html: README.md install: ## Install bees + libs install: lib src test install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so - install -Dm755 bin/bees $(PREFIX)/usr/bin/bees + install -Dm755 bin/bees $(PREFIX)/usr/libexec/bees install_scripts: ## Install scipts install -Dm755 scripts/beesd $(PREFIX)/usr/bin/beesd diff --git a/scripts/beesd b/scripts/beesd index cfe256c..cd52c9a 100755 --- a/scripts/beesd +++ b/scripts/beesd @@ -21,7 +21,7 @@ readonly CONFIG_DIR=/etc/bees/ [ "$UID" == "0" ] || ERRO "Must be runned as root" } -command -v bees &> /dev/null || ERRO "Missing 'bees' command" +command -v /usr/libexec/bees &> /dev/null || ERRO "Missing 'bees' agent" ## Parse args UUID="$1" @@ -123,6 +123,6 @@ filter_path(){ fi } -bees "$MNT_DIR" 3>&1 2>&1 | filter_time | filter_path +/usr/libexec/bees "$MNT_DIR" 3>&1 2>&1 | filter_time | filter_path exit 0 From 5622ebd4111d24465b36385703406cf26f96e9c4 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:32:09 +0200 Subject: [PATCH 31/37] Bees is meant to be run as root only As bees is meant to be run as root only, move it to /usr/sbin which is usually not part of normal users path environment. --- Makefile | 2 +- scripts/beesd | 2 +- scripts/beesd@.service | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8160da2..0d1ce79 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ install: lib src test install -Dm755 bin/bees $(PREFIX)/usr/libexec/bees install_scripts: ## Install scipts - install -Dm755 scripts/beesd $(PREFIX)/usr/bin/beesd + install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service diff --git a/scripts/beesd b/scripts/beesd index cd52c9a..4c6e99e 100755 --- a/scripts/beesd +++ b/scripts/beesd @@ -18,7 +18,7 @@ readonly CONFIG_DIR=/etc/bees/ ## Pre checks { [ ! -d "$CONFIG_DIR" ] && ERRO "Missing: $CONFIG_DIR" - [ "$UID" == "0" ] || ERRO "Must be runned as root" + [ "$UID" == "0" ] || ERRO "Must be run as root" } command -v /usr/libexec/bees &> /dev/null || ERRO "Missing 'bees' agent" diff --git a/scripts/beesd@.service b/scripts/beesd@.service index da73864..537daca 100644 --- a/scripts/beesd@.service +++ b/scripts/beesd@.service @@ -3,7 +3,7 @@ Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i After=local-fs.target [Service] -ExecStart=/usr/bin/beesd %i +ExecStart=/usr/sbin/beesd %i Nice=19 IOSchedulingClass=idle CPUAccounting=true From 3bf4e69c4d3ae84ca2e861f78dd5f6e06c314b6e Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:33:03 +0200 Subject: [PATCH 32/37] Make config example more clear A pre-defined UUID should not be part of the sample config file. Instead, make it more clear how the config file is intended to be used. --- scripts/beesd.conf.sample | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/beesd.conf.sample b/scripts/beesd.conf.sample index 7245c1b..53060fa 100644 --- a/scripts/beesd.conf.sample +++ b/scripts/beesd.conf.sample @@ -2,8 +2,11 @@ ## https://github.com/Zygo/bees ## It's a default values, change it, if needed +# How to use? +# Copy this file to a new file name and adjust the UUID below + # Which FS will be used -UUID=5d3c0ad5-bedf-463d-8235-b4d4f6f99476 +UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx ## System Vars # Change carefully From f59e311809f843323a9fc61f8d849176ec138f66 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:34:00 +0200 Subject: [PATCH 33/37] Explicitly mark systemd unit as Type=simple Bees does not fork, so let's not rely on systemd defaults. --- scripts/beesd@.service | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/beesd@.service b/scripts/beesd@.service index 537daca..920295e 100644 --- a/scripts/beesd@.service +++ b/scripts/beesd@.service @@ -3,6 +3,7 @@ Description=Bees - Best-Effort Extent-Same, a btrfs deduplicator daemon: %i After=local-fs.target [Service] +Type=simple ExecStart=/usr/sbin/beesd %i Nice=19 IOSchedulingClass=idle From 62626aef7f7e842b56631fd9ab1d78baff8f6a35 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:36:13 +0200 Subject: [PATCH 34/37] Adjust service restart and shutdown behavior Explicitly set control-group kill mode, that is: try SIGTERM first, and use SIGKILL after a timeout. This exactly defines how bees is running as a child process within the frontend service starter. Not sure if bees cares about signals but SIGTERM first seems cleaner. On the way, let bees restart on abnormal termination. --- scripts/beesd@.service | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/beesd@.service b/scripts/beesd@.service index 920295e..54fc187 100644 --- a/scripts/beesd@.service +++ b/scripts/beesd@.service @@ -6,7 +6,10 @@ After=local-fs.target Type=simple ExecStart=/usr/sbin/beesd %i Nice=19 +KillMode=control-group +KillSignal=SIGTERM IOSchedulingClass=idle +Restart=on-abnormal CPUAccounting=true MemoryAccounting=true # CPUQuota=95% From 045582798911d959c819f3d3a294d8604426b0b7 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 19 Sep 2017 20:42:07 +0200 Subject: [PATCH 35/37] Adjust CPU and IO shares when running under systemd Let's remove the CPUQuota example and instead give bees a share of what's available. 128 CPU shares will give it about 12% max CPU under load, give it a slight boost during startup to allow reading the hash table faster. 100 block shares will give it about 10% max disk bandwidht under load, give it a slight boost during startup to allow reading the hash table faster. Then let's adjust the CPU and IO scheduler to prefer other processes. This way bees runs completely in the background, barely noticable during, e.g., gaming. --- scripts/beesd@.service | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/beesd@.service b/scripts/beesd@.service index 54fc187..55df99c 100644 --- a/scripts/beesd@.service +++ b/scripts/beesd@.service @@ -8,11 +8,17 @@ ExecStart=/usr/sbin/beesd %i Nice=19 KillMode=control-group KillSignal=SIGTERM +CPUShares=128 +StartupCPUShares=256 +BlockIOWeight=100 +StartupBlockIOWeight=250 IOSchedulingClass=idle +IOSchedulingPriority=7 +CPUSchedulingPolicy=batch +Nice=19 Restart=on-abnormal CPUAccounting=true MemoryAccounting=true -# CPUQuota=95% [Install] WantedBy=local-fs.target From 893595190fdfb50e32ce48f77e59514ab9f15715 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 20 Sep 2017 21:00:54 +0200 Subject: [PATCH 36/37] Allow custom libexec location To install for different distributions, LIBEXEC_PREFIX can now be set. It defaults to $(PREFIX)/usr/lib/bees as used in most common distributions. Local overrides are possible by setting variables in a "localconf" file which will be included by the Makefile if it exists. For some distributions you may want to set it to /usr/libexec or /usr/libexec/bees. --- .gitignore | 1 + Makefile | 10 +++++++++- scripts/{beesd => beesd.in} | 7 +++---- 3 files changed, 13 insertions(+), 5 deletions(-) rename scripts/{beesd => beesd.in} (94%) diff --git a/.gitignore b/.gitignore index 7a91af7..d8ca283 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ html/ latex/ make.log make.log.new +localconf diff --git a/Makefile b/Makefile index 0d1ce79..3d75cd7 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,12 @@ PREFIX ?= / +LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib MARKDOWN := $(shell which markdown markdown2 markdown_py 2>/dev/null) MARKDOWN ?= markdown +# allow local configuration to override above variables +-include localconf + default all: lib src test README.html clean: ## Cleanup @@ -21,6 +25,9 @@ test: ## Run tests test: lib src $(MAKE) -C test +scripts/beesd: scripts/beesd.in + sed -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' -e's#@PREFIX@#$(PREFIX)#' "$<" >"$@" + README.html: README.md $(MARKDOWN) README.md > README.html.new mv -f README.html.new README.html @@ -28,9 +35,10 @@ README.html: README.md install: ## Install bees + libs install: lib src test install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so - install -Dm755 bin/bees $(PREFIX)/usr/libexec/bees + install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees/bees install_scripts: ## Install scipts +install_scripts: scripts/beesd install -Dm755 scripts/beesd $(PREFIX)/usr/sbin/beesd install -Dm644 scripts/beesd.conf.sample $(PREFIX)/etc/bees/beesd.conf.sample install -Dm644 scripts/beesd@.service $(PREFIX)/lib/systemd/system/beesd@.service diff --git a/scripts/beesd b/scripts/beesd.in similarity index 94% rename from scripts/beesd rename to scripts/beesd.in index 4c6e99e..f99ea09 100755 --- a/scripts/beesd +++ b/scripts/beesd.in @@ -1,5 +1,4 @@ #!/bin/bash -# /usr/bin/beesd ## Helpful functions INFO(){ echo "INFO:" "$@"; } @@ -13,7 +12,7 @@ export CONFIG_FILE export UUID AL16M readonly AL16M="$((16*1024*1024))" -readonly CONFIG_DIR=/etc/bees/ +readonly CONFIG_DIR=@PREFIX@/etc/bees/ ## Pre checks { @@ -21,7 +20,7 @@ readonly CONFIG_DIR=/etc/bees/ [ "$UID" == "0" ] || ERRO "Must be run as root" } -command -v /usr/libexec/bees &> /dev/null || ERRO "Missing 'bees' agent" +command -v @LIBEXEC_PREFIX@/bees &> /dev/null || ERRO "Missing 'bees' agent" ## Parse args UUID="$1" @@ -123,6 +122,6 @@ filter_path(){ fi } -/usr/libexec/bees "$MNT_DIR" 3>&1 2>&1 | filter_time | filter_path +@LIBEXEC_PREFIX@/bees "$MNT_DIR" 3>&1 2>&1 | filter_time | filter_path exit 0 From 29d2d51c47fbf62e2f0383266c97350c6e19bf2e Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Wed, 20 Sep 2017 21:50:58 +0200 Subject: [PATCH 37/37] Fix libexec prefix discrepancy Whoops... --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3d75cd7..11567ec 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PREFIX ?= / -LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib +LIBEXEC_PREFIX ?= $(PREFIX)/usr/lib/bees MARKDOWN := $(shell which markdown markdown2 markdown_py 2>/dev/null) MARKDOWN ?= markdown @@ -35,7 +35,7 @@ README.html: README.md install: ## Install bees + libs install: lib src test install -Dm644 lib/libcrucible.so $(PREFIX)/usr/lib/libcrucible.so - install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees/bees + install -Dm755 bin/bees $(LIBEXEC_PREFIX)/bees install_scripts: ## Install scipts install_scripts: scripts/beesd