From 8a70bca0118c2aeba4124238488db78d11082b7c Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 28 May 2021 02:07:47 -0400 Subject: [PATCH] bees: misc comment updates These have been accumulating in unpublished bees commits. Squash them all into one. Signed-off-by: Zygo Blaxell --- src/bees-context.cc | 3 ++- src/bees-resolve.cc | 2 ++ src/bees-roots.cc | 23 ++++++++++++----------- src/bees.h | 3 +-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index 992c61d..4df4a78 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -891,9 +891,10 @@ BeesContext::resolve_addr_uncached(BeesAddress addr) auto rt_age = resolve_timer.age(); - // Avoid performance bug BeesResolveAddrResult rv; rv.m_biors = log_ino.m_iors; + + // Avoid performance bug if (sys_usage_delta < BEES_TOXIC_SYS_DURATION) { rv.m_is_toxic = false; } else { diff --git a/src/bees-resolve.cc b/src/bees-resolve.cc index ae80c2c..99af113 100644 --- a/src/bees-resolve.cc +++ b/src/bees-resolve.cc @@ -229,6 +229,8 @@ BeesResolver::chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData & // Search near the resolved address for a matching data block. // ...even if it's not compressed, we should do this sanity // check before considering the block as a duplicate candidate. + // FIXME: this is mostly obsolete now and we shouldn't do it here. + // Don't bother fixing it because it will all go away with (extent, offset) reads. auto new_bbd = adjust_offset(haystack_bbd, needle_bbd); if (new_bbd.empty()) { // matching offset search failed diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 0fb4a79..3a976ac 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -1004,10 +1004,7 @@ BeesCrawl::fetch_extents() sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY; sk.min_offset = old_state.m_offset; sk.min_transid = old_state.m_min_transid; - // Don't set max_transid here. We want to see old extents with - // new references, and max_transid filtering in the kernel locks - // the filesystem while slowing us down. - // sk.max_transid = old_state.m_max_transid; + // Don't set max_transid to m_max_transid here. See below. sk.max_transid = numeric_limits::max(); sk.nr_items = BEES_MAX_CRAWL_ITEMS; @@ -1077,7 +1074,6 @@ BeesCrawl::fetch_extents() if (gen < get_state_end().m_min_transid) { BEESCOUNT(crawl_gen_low); ++count_low; - // We want (need?) to scan these anyway? // The header generation refers to the transid // of the metadata page holding the current ref. // This includes anything else in that page that @@ -1085,17 +1081,22 @@ BeesCrawl::fetch_extents() // old it is. // The file_extent_generation refers to the // transid of the extent item's page, which is - // a different approximation of what we want. - // Combine both of these filters to minimize - // the number of times we unnecessarily re-read - // an extent. + // what we really want when we are slicing up + // the extent data by transid. continue; } if (gen > get_state_end().m_max_transid) { BEESCOUNT(crawl_gen_high); ++count_high; - // We have to filter these here because we can't - // do it in the kernel. + // We want to see old extents with references in + // new pages, which means we have to get extent + // refs from every page older than min_transid, + // not every page between min_transid and + // max_transid. This means that we will get + // refs to new extent data that we don't want to + // process yet, because we'll process it again + // on the next crawl cycle. We filter out refs + // to new extents here. continue; } diff --git a/src/bees.h b/src/bees.h index f993131..4059837 100644 --- a/src/bees.h +++ b/src/bees.h @@ -50,7 +50,7 @@ const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024; const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1; const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1; -// Maximum temporary file size +// Maximum temporary file size (maximum extent size for temporary copy) const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024; // Bucket size for hash table (size of one hash bucket) @@ -330,7 +330,6 @@ public: // Blocks with no physical address (not yet allocated, hole, or "other"). // PREALLOC blocks have a physical address so they're not magic enough to be handled here. - // Compressed blocks have a physical address but it's two-dimensional. enum MagicValue { ZERO, // BeesAddress uninitialized DELALLOC, // delayed allocation