scan_one_extent: reduce the number of LOGICAL_INO calls before finding a duplicate block range

When we have multiple possible matches for a block, we proceed in three phases: 1. retrieve each match's extent refs and put them in a list, 2. iterate over the list converting viable block matches into range matches, 3. sort and flatten the list of range matches into a non-overlapping list of ranges that cover all duplicate blocks exactly once. The separation of phase 1 and 2 creates a performance issue when there are many block matches in phase 1, and all the range matches in phase 2 are the same length. Even though we might quickly find the longest possible matching range early in phase 2, we first extract all of the extent refs from every possible matching block in phase 1, even though most of those refs will never be used. Fix this by moving the extent ref retrieval in phase 1 into a single loop in phase 2, and stop looping over matching blocks as soon as any dedupe range is created. This avoids iterating over a large list of blocks with expensive `LOGICAL_INO` ioctls in an attempt to improve the match when there is no hope of improvement, e.g. when all match ranges are 4K and the content is extremely prevalent in the data. If we find a matched block that is part of a short matching range, we can replace it with a block that is part of a long matching range, because there is a good chance we will find a matching hash block in the long range by looking up hashes after the end of the short range. In that case, overlapping dedupe ranges covering both blocks in the target extent will be inserted into the dedupe list, and the longest matches will be selected at phase 3. This usually provides a similar result to that of the loop in phase 1, but _much_ more efficiently. Some operations are left in phase 1, but they are all using internal functions, not ioctls. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2025-07-12 05:12:25 +02:00 · 2024-11-27 20:29:09 -05:00
parent 54ed6e1cff
commit ce0367dafe
1 changed files with 86 additions and 102 deletions
--- a/src/bees-context.cc
+++ b/src/bees-context.cc
@ -451,13 +451,8 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 		const auto found = hash_table->find_cell(hash);
 		BEESCOUNT(scan_lookup);

-		set<BeesResolver> resolved_addrs;
 		set<BeesAddress> found_addrs;
-
-		// We know that there is at least one copy of the data and where it is,
-		// but we don't want to do expensive LOGICAL_INO operations unless there
-		// are at least two distinct addresses to look at.
-		found_addrs.insert(addr);
+		list<BeesAddress> ordered_addrs;

 		for (const auto &i : found) {
 			BEESTRACE("found (hash, address): " << i);
@ -466,6 +461,9 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 			// Hash has to match
 			THROW_CHECK2(runtime_error, i.e_hash, hash, i.e_hash == hash);

+			// We know that there is at least one copy of the data and where it is.
+			// Filter out anything that can't possibly match before we pull out the
+			// LOGICAL_INO hammer.
 			BeesAddress found_addr(i.e_addr);

 			// If address already in hash table, move on to next extent.
@ -484,15 +482,16 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				return;
 			}

-			// Block must have matching EOF alignment
-			if (found_addr.is_unaligned_eof() != addr.is_unaligned_eof()) {
-				BEESCOUNT(scan_malign);
+			// Address is a duplicate.
+			// Check this early so we don't have duplicate counts.
+			if (!found_addrs.insert(found_addr).second) {
+				BEESCOUNT(scan_twice);
 				continue;
 			}

-			// Address is a duplicate
-			if (!found_addrs.insert(found_addr).second) {
-				BEESCOUNT(scan_twice);
+			// Block must have matching EOF alignment
+			if (found_addr.is_unaligned_eof() != addr.is_unaligned_eof()) {
+				BEESCOUNT(scan_malign);
 				continue;
 			}

@ -506,9 +505,14 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				return;
 			}

-			// Distinct address, go resolve it
-			bool abandon_extent = false;
-			catch_all([&]() {
+			// Put this address in the list without changing hash table order
+			ordered_addrs.push_back(found_addr);
+		}
+
+		// Cheap filtering is now out of the way, now for some heavy lifting
+		for (auto found_addr : ordered_addrs) {
+			// Hash table says there's a matching block on the filesystem.
+			// Go find refs to it.
 			BEESNOTE("resolving " << found_addr << " matched " << bbd);
 			BEESTRACE("resolving " << found_addr << " matched " << bbd);
 			BEESTRACE("BeesContext::scan_one_extent calling BeesResolver " << found_addr);
@ -521,75 +525,53 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				// It has become toxic since it was inserted into the hash table.
 				found_addr.set_toxic();
 				hash_table->push_front_hash_addr(hash, found_addr);
-					abandon_extent = true;
+				return;
 			} else if (!resolved.count()) {
 				BEESCOUNT(scan_resolve_zero);
 				// Didn't find a block at the table address, address is dead
 				BEESLOGDEBUG("Erasing stale addr " << addr << " hash " << hash);
 				hash_table->erase_hash_addr(hash, found_addr);
+				continue;
 			} else {
-					resolved_addrs.insert(resolved);
 				BEESCOUNT(scan_resolve_hit);
 			}
-			});

-			if (abandon_extent) {
-				return;
-			}
-		}
-
-		// This shouldn't happen (often), so let's count it separately
-		if (resolved_addrs.size() > 2) {
-			BEESCOUNT(matched_3_or_more);
-		}
-		if (resolved_addrs.size() > 1) {
-			BEESCOUNT(matched_2_or_more);
-		}
-
-		// No need to do all this unless there are one or more distinct matches
-		if (!resolved_addrs.empty()) {
+			// `resolved` contains references to a block on the filesystem that still exists.
 			bar.at(bar_p) = 'M';
-			BEESCOUNT(matched_1_or_more);
-			BEESTRACE("resolved_addrs.size() = " << resolved_addrs.size());
-			BEESNOTE("resolving " << resolved_addrs.size() << " matches for hash " << hash);

-			BeesFileRange replaced_bfr;
-
-			for (auto it = resolved_addrs.begin(); it != resolved_addrs.end(); ++it) {
-				auto it_copy = *it;
-				BEESNOTE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
-				BEESTRACE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
-				// If we replaced extents containing the incoming addr,
-				// push the addr we kept to the front of the hash LRU.
-				auto replaced_brp = it_copy.replace_dst(bbd);
-				replaced_bfr = replaced_brp.second;
+			BEESNOTE("finding one match (out of " << resolved.count() << ") at " << resolved.addr() << " for " << bbd);
+			BEESTRACE("finding one match (out of " << resolved.count() << ") at " << resolved.addr() << " for " << bbd);
+			auto replaced_brp = resolved.replace_dst(bbd);
+			BeesFileRange &replaced_bfr = replaced_brp.second;
 			BEESTRACE("next_p " << to_hex(next_p) << " -> replaced_bfr " << replaced_bfr);

-				// If we did find a block, but not this hash, correct the hash table
-				if (it_copy.found_hash()) {
+			// If we did find a block, but not this hash, correct the hash table and move on
+			if (resolved.found_hash()) {
 				BEESCOUNT(scan_hash_hit);
 			} else {
-					BEESLOGDEBUG("Erasing stale hash " << hash << " addr " << it_copy.addr());
-					hash_table->erase_hash_addr(hash, it_copy.addr());
+				BEESLOGDEBUG("Erasing stale hash " << hash << " addr " << resolved.addr());
+				hash_table->erase_hash_addr(hash, resolved.addr());
 				BEESCOUNT(scan_hash_miss);
+				continue;
 			}

-				if (it_copy.found_dup()) {
+			// We found a block and it was a duplicate
+			if (resolved.found_dup()) {
 				THROW_CHECK0(runtime_error, replaced_bfr);
 				BEESCOUNT(scan_dup_hit);

 				// Save this match.  If a better match is found later,
-					// it will be replaced.  (FIXME:  not really...)
+				// it will be replaced.
 				dedupe_list.push_back(replaced_brp);

 				// Push matching block to front of LRU
-					front_hash_list.push_back(make_pair(hash, it_copy.addr()));
+				front_hash_list.push_back(make_pair(hash, resolved.addr()));

-					off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
+				// This is the block that matched in the replaced bfr
 				bar.at(bar_p) = '=';

 				// Invalidate resolve cache so we can count refs correctly
-					invalidate_addr_list.push_back(it_copy.addr());
+				invalidate_addr_list.push_back(resolved.addr());
 				invalidate_addr_list.push_back(bbd.addr());

 				// next_p may be past EOF so check p only
@ -598,13 +580,15 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				// We may find duplicate ranges of various lengths, so make sure
 				// we don't pick a smaller one
 				next_p = max(next_p, replaced_bfr.end());
+
+				// Stop after one dedupe is found.  If there's a longer matching range
+				// out there, we'll find a matching block after the end of this range,
+				// since the longer range is longer than this one.
+				break;
 			} else {
 				BEESCOUNT(scan_dup_miss);
 			}
 		}
-		} else {
-			BEESCOUNT(matched_0);
-		}
 	}

 	bool force_insert = false;