roots: separate crawl sizes into bytes and items

Number of items should be low enough that we don't have too many stale items, but high enough to amortize system call overhead to a reasonable ratio. Number of bytes should be constant: one worst-case metadata page (the btrfs limit is 64K, though 16K is much more common) so that we always have enough space for one worst-case item; otherwise, we get EOVERFLOW if we set the number of items too low and there's a big item in the tree, and we can't make further progress. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2025-09-17 02:22:15 +02:00 · 2020-11-12 12:10:29 -05:00
parent d332616eff
commit de6282c6cd
2 changed files with 6 additions and 3 deletions
--- a/src/bees-roots.cc
+++ b/src/bees-roots.cc
@@ -995,7 +995,7 @@ BeesCrawl::fetch_extents()

 	Timer crawl_timer;

-	BtrfsIoctlSearchKey sk(BEES_MAX_CRAWL_SIZE * (sizeof(btrfs_file_extent_item) + sizeof(btrfs_ioctl_search_header)));
+	BtrfsIoctlSearchKey sk(BEES_MAX_CRAWL_BYTES);
 	sk.tree_id = old_state.m_root;
 	sk.min_objectid = old_state.m_objectid;
 	sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
@@ -1006,7 +1006,7 @@ BeesCrawl::fetch_extents()
 	// the filesystem while slowing us down.
 	// sk.max_transid = old_state.m_max_transid;
 	sk.max_transid = numeric_limits<uint64_t>::max();
-	sk.nr_items = BEES_MAX_CRAWL_SIZE;
+	sk.nr_items = BEES_MAX_CRAWL_ITEMS;

 	// Lock in the old state
 	set_state(old_state);
--- a/src/bees.h
+++ b/src/bees.h
@@ -101,7 +101,10 @@ const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
 const size_t BEES_MAX_QUEUE_SIZE = 128;

 // Read this many items at a time in SEARCHv2
-const size_t BEES_MAX_CRAWL_SIZE = 1024;
+const size_t BEES_MAX_CRAWL_ITEMS = 8;
+
+// Read this many bytes at a time in SEARCHv2 (one maximum-sized metadata page)
+const size_t BEES_MAX_CRAWL_BYTES = 64 * 1024;

 // Insert this many items before switching to a new subvol
 const size_t BEES_MAX_CRAWL_BATCH = 128;