1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-18 05:45:45 +02:00

roots: add 'recent' crawl mode for a mix of new and old data

Crawl mode 3 'recent' prioritizes data from new updates to previously
scanned subvols over subvols that have not been completely scanned yet.
If no such new data exists, falls back to a variation of 'lockstep'
scan mode.

This enables us to keep up with new data as it arrives, a key weakness
of all the other scan modes, and worth violating our unwritten "no new
scan modes until we have extent-tree dedupe working" policy for.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2021-11-29 00:41:29 -05:00
parent de96a38460
commit f5c4714a28
2 changed files with 44 additions and 1 deletions

View File

@ -57,6 +57,7 @@ BeesRoots::scan_mode_ntoa(BeesRoots::ScanMode mode)
{ .n = SCAN_MODE_LOCKSTEP, .mask = ~0ULL, .a = "lockstep" },
{ .n = SCAN_MODE_INDEPENDENT, .mask = ~0ULL, .a = "independent" },
{ .n = SCAN_MODE_SEQUENTIAL, .mask = ~0ULL, .a = "sequential" },
{ .n = SCAN_MODE_RECENT, .mask = ~0ULL, .a = "recent" },
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(mode, table);
@ -480,7 +481,48 @@ BeesRoots::crawl_roots()
break;
}
case SCAN_MODE_COUNT: assert(false); break;
case SCAN_MODE_RECENT: {
// Scan highest min_transid first, then oldest, then lockstep
using crawl_tuple = shared_ptr<BeesCrawl>;
vector<crawl_tuple> crawl_vector;
for (const auto &i : crawl_map_copy) {
crawl_vector.push_back(i.second);
}
sort(crawl_vector.begin(), crawl_vector.end(), [&](const crawl_tuple &a, const crawl_tuple &b) {
const auto a_state = a->get_state_end();
const auto b_state = b->get_state_end();
return tie(
b_state.m_min_transid,
a_state.m_started,
a_state.m_objectid,
a_state.m_root,
a_state.m_offset
) < tie(
a_state.m_min_transid,
b_state.m_started,
b_state.m_objectid,
b_state.m_root,
b_state.m_offset
);
});
size_t count = 0;
for (const auto &i : crawl_vector) {
++count;
BEESNOTE("crawling " << count << " of " << crawl_vector.size() << " roots in recent order");
const auto batch_count = crawl_batch(i);
if (batch_count) {
return true;
}
}
break;
}
case SCAN_MODE_COUNT:
default:
assert(false);
break;
}
BEESNOTE("Crawl done");

View File

@ -596,6 +596,7 @@ public:
SCAN_MODE_LOCKSTEP,
SCAN_MODE_INDEPENDENT,
SCAN_MODE_SEQUENTIAL,
SCAN_MODE_RECENT,
SCAN_MODE_COUNT, // must be last
};