From f5c4714a2818194549553afa9918f28f28cd59a4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 29 Nov 2021 00:41:29 -0500 Subject: [PATCH] roots: add 'recent' crawl mode for a mix of new and old data Crawl mode 3 'recent' prioritizes data from new updates to previously scanned subvols over subvols that have not been completely scanned yet. If no such new data exists, falls back to a variation of 'lockstep' scan mode. This enables us to keep up with new data as it arrives, a key weakness of all the other scan modes, and worth violating our unwritten "no new scan modes until we have extent-tree dedupe working" policy for. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 44 +++++++++++++++++++++++++++++++++++++++++++- src/bees.h | 1 + 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 02ea6a9..94e4751 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -57,6 +57,7 @@ BeesRoots::scan_mode_ntoa(BeesRoots::ScanMode mode) { .n = SCAN_MODE_LOCKSTEP, .mask = ~0ULL, .a = "lockstep" }, { .n = SCAN_MODE_INDEPENDENT, .mask = ~0ULL, .a = "independent" }, { .n = SCAN_MODE_SEQUENTIAL, .mask = ~0ULL, .a = "sequential" }, + { .n = SCAN_MODE_RECENT, .mask = ~0ULL, .a = "recent" }, NTOA_TABLE_ENTRY_END() }; return bits_ntoa(mode, table); @@ -480,7 +481,48 @@ BeesRoots::crawl_roots() break; } - case SCAN_MODE_COUNT: assert(false); break; + case SCAN_MODE_RECENT: { + // Scan highest min_transid first, then oldest, then lockstep + using crawl_tuple = shared_ptr; + vector crawl_vector; + for (const auto &i : crawl_map_copy) { + crawl_vector.push_back(i.second); + } + sort(crawl_vector.begin(), crawl_vector.end(), [&](const crawl_tuple &a, const crawl_tuple &b) { + const auto a_state = a->get_state_end(); + const auto b_state = b->get_state_end(); + return tie( + b_state.m_min_transid, + a_state.m_started, + a_state.m_objectid, + a_state.m_root, + a_state.m_offset + ) < tie( + a_state.m_min_transid, + b_state.m_started, + b_state.m_objectid, + b_state.m_root, + b_state.m_offset + ); + }); + size_t count = 0; + for (const auto &i : crawl_vector) { + ++count; + BEESNOTE("crawling " << count << " of " << crawl_vector.size() << " roots in recent order"); + const auto batch_count = crawl_batch(i); + + if (batch_count) { + return true; + } + } + + break; + } + + case SCAN_MODE_COUNT: + default: + assert(false); + break; } BEESNOTE("Crawl done"); diff --git a/src/bees.h b/src/bees.h index 271cbae..1764a24 100644 --- a/src/bees.h +++ b/src/bees.h @@ -596,6 +596,7 @@ public: SCAN_MODE_LOCKSTEP, SCAN_MODE_INDEPENDENT, SCAN_MODE_SEQUENTIAL, + SCAN_MODE_RECENT, SCAN_MODE_COUNT, // must be last };