From ded26ff044ab1ba88a0469a35fbc135107f90254 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 24 Jan 2018 23:32:33 -0500 Subject: [PATCH] FdCache: clear cache on every new transid / crawl cycle The periodic cache age check was not protected by a lock, so multiple threads may decide to concurrently clear the cache. This led to duplicate log messages. Fix by moving the cache expiry trigger out of FdCache and into Roots, which knows when transids change and can perform cache clears at exactly the time they are most relevant, i.e. after something that was deleted becomes permanently so. This removes the last references to BEES_COMMIT_INTERVAL, so get rid of its definition too. Signed-off-by: Zygo Blaxell --- src/bees-context.cc | 26 +++++++++++--------------- src/bees-roots.cc | 11 +++++++++++ src/bees.h | 10 ++++------ 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index a9722fe..153d7b5 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -40,30 +40,26 @@ BeesFdCache::BeesFdCache() m_file_cache.max_size(BEES_FILE_FD_CACHE_SIZE); } +void +BeesFdCache::clear() +{ + BEESNOTE("Clearing root FD cache to enable subvol delete"); + m_root_cache.clear(); + BEESCOUNT(root_clear); + BEESNOTE("Clearing open FD cache to enable file delete"); + m_file_cache.clear(); + BEESCOUNT(open_clear); +} + Fd BeesFdCache::open_root(shared_ptr ctx, uint64_t root) { - // Don't hold root FDs open too long. - // The open FDs prevent snapshots from being deleted. - // cleaner_kthread just keeps skipping over the open dir and all its children. - if (m_root_cache_timer.age() > BEES_COMMIT_INTERVAL) { - BEESLOGINFO("Clearing root FD cache to enable subvol delete"); - m_root_cache.clear(); - m_root_cache_timer.reset(); - BEESCOUNT(root_clear); - } return m_root_cache(ctx, root); } Fd BeesFdCache::open_root_ino(shared_ptr ctx, uint64_t root, uint64_t ino) { - if (m_file_cache_timer.age() > BEES_COMMIT_INTERVAL) { - BEESLOGINFO("Clearing open FD cache to enable file delete"); - m_file_cache.clear(); - m_file_cache_timer.reset(); - BEESCOUNT(open_clear); - } return m_file_cache(ctx, root, ino); } diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 863378d..0b4c7d7 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -335,11 +335,22 @@ BeesRoots::crawl_thread() // Monitor transid_max and wake up roots when it changes BEESNOTE("tracking transids"); + auto last_count = m_transid_re.count(); while (true) { // Make sure we have a full complement of crawlers // Calls transid_max() which updates m_transid_re insert_new_crawl(); + // Don't hold root FDs open too long. + // The open FDs prevent snapshots from being deleted. + // cleaner_kthread just keeps skipping over the open dir and all its children. + // Even open files are a problem if they're big enough. + auto new_count = m_transid_re.count(); + if (new_count != last_count) { + m_ctx->fd_cache()->clear(); + } + last_count = new_count; + BEESNOTE("waiting for next transid " << m_transid_re); // We don't use wait_for here because somebody needs to // be updating m_transid_re from time to time. diff --git a/src/bees.h b/src/bees.h index 38d51c4..1b416e8 100644 --- a/src/bees.h +++ b/src/bees.h @@ -59,11 +59,8 @@ const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024; // Bytes per second we want to flush (8GB every two hours) const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0; -// How long we should wait for new btrfs transactions -const double BEES_COMMIT_INTERVAL = 900; - -// Interval between writing non-hash-table things to disk, and starting new subvol crawlers -const int BEES_WRITEBACK_INTERVAL = BEES_COMMIT_INTERVAL; +// Interval between writing crawl state to disk +const int BEES_WRITEBACK_INTERVAL = 900; // Statistics reports while scanning const int BEES_STATS_INTERVAL = 3600; @@ -92,7 +89,7 @@ const double BEES_TOO_LONG = 5.0; // Avoid any extent where LOGICAL_INO takes this long // const double BEES_TOXIC_DURATION = 9.9; // EXPERIMENT: Kernel v4.14+ may let us ignore toxicity -const double BEES_TOXIC_DURATION = BEES_COMMIT_INTERVAL; +const double BEES_TOXIC_DURATION = 99.9; // How long between hash table histograms const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL; @@ -657,6 +654,7 @@ public: Fd open_root(shared_ptr ctx, uint64_t root); Fd open_root_ino(shared_ptr ctx, uint64_t root, uint64_t ino); void insert_root_ino(shared_ptr ctx, Fd fd); + void clear(); }; struct BeesResolveAddrResult {