1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

progress: estimate actual data sizes for progress report

Replace pointers in the "done" and "total" columns with estimated data
sizes for each size tier.  The estimation is based on statistics
collected from extents scanned during the current bees run.

Move the total size for the entire filesystem up to the heading.

Report the _completed_ position (i.e. the one that would be saved in
`beescrawl.dat`), not the _queued_ position (i.e. the one where the
next Task would be created in memory).

At the end of the data, the crawl pointer ends up at some random point
in the filesystem just after the newest extent, so the progress gets to
99.7% and then goes to some random value like 47% or 3%, not to 100%.
Report "deferred" in the "done" column when the crawler is waiting for
the next transid, and "finished" in the "%done" column when the crawler
has reached the end of the data.  Suppress the ETA when finished.  This
makes it clear that there's no further work to do for these crawlers.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2024-11-26 17:47:33 -05:00
parent da32667e02
commit 9987aa8583
2 changed files with 71 additions and 16 deletions

View File

@ -439,6 +439,13 @@ friend ostream& operator<<(ostream &os, const BeesScanModeExtent::MagicCrawl& ma
Timer m_age;
};
friend ostream& operator<<(ostream &os, const BeesScanModeExtent::ExtentRef& todo);
struct ExtentSizeCount {
uint64_t m_bytes = 0;
};
struct ExtentSizeMap {
map<uint64_t, ExtentSizeCount> m_map;
uint64_t m_total = 0;
} m_extent_size;
void init_tasks();
void scan() override;
@ -737,6 +744,7 @@ BeesScanModeExtent::init_tasks()
bec->map_next_extent(subvol);
})));
}
m_extent_size.m_map.insert(make_pair(subvol, ExtentSizeCount {}));
}
}
@ -796,11 +804,23 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
break;
}
// Calculate average proportion of each extent size
const uint64_t this_range_size = this_range.size();
unique_lock<mutex> lock(m_mutex);
for (auto &i : m_extent_size.m_map) {
const auto &svm = s_magic_crawl_map.at(i.first);
if (this_range_size >= svm.m_min_size && this_range_size <= svm.m_max_size) {
i.second.m_bytes += this_range_size;
break;
}
}
m_extent_size.m_total += this_range_size;
lock.unlock();
// Check extent length against size range
const auto &subvol_magic = s_magic_crawl_map.at(subvol);
const uint64_t lower_size_bound = subvol_magic.m_min_size;
const uint64_t upper_size_bound = subvol_magic.m_max_size;
const uint64_t this_range_size = this_range.size();
// If this extent is out of range, move on to the next
if (this_range_size < lower_size_bound || this_range_size > upper_size_bound) {
@ -842,10 +862,10 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
}
const auto bytenr = this_range.fid().ino();
const auto bti = beif.at(bytenr);
// Check extent item generation is in range
// FIXME: we already had this in crawl state, and we threw it away
const auto bti = beif.at(bytenr);
const auto gen = bti.extent_generation();
if (gen < this_state.m_min_transid) {
BEESCOUNT(crawl_gen_low);
@ -859,7 +879,7 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
}
// Map this extent here to regulate task creation
create_extent_map(bytenr, this_crawl->hold_state(this_state), bti.offset());
create_extent_map(bytenr, this_crawl->hold_state(this_state), this_range_size);
BEESCOUNT(crawl_extent);
const auto search_calls = BtrfsIoctlSearchKey::s_calls - init_s_calls;
@ -892,10 +912,13 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
init_tasks();
// insert_root does this for non-magic subvols, we have to do it ourselves
map<uint64_t,pair<bool,bool>> deferred_map;
for (const auto &i : s_magic_crawl_map) {
const auto subvol = i.first;
const auto found = crawl_map.find(subvol);
if (found != crawl_map.end()) {
// Have to save these for the progress table
deferred_map.insert(make_pair(subvol, make_pair(found->second->deferred(), found->second->finished())));
found->second->deferred(false);
}
}
@ -949,6 +972,19 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
return;
}
// Grab a copy of the extent size statistics so we can use it without it changing under us
lock.lock();
const auto mes = m_extent_size;
// Decay the extent size map averages
static const double decay = .99;
for (auto &i : m_extent_size.m_map) {
i.second.m_bytes *= decay;
}
m_extent_size.m_total *= decay;
lock.unlock();
const bool mes_sample_size_ok = mes.m_total > fs_size * .001;
// Report on progress using extent bytenr map
Table::Table eta;
for (const auto &i : s_magic_crawl_map) {
@ -965,14 +1001,10 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
const auto this_crawl = found->second;
THROW_CHECK1(runtime_error, subvol, this_crawl);
const auto this_range = this_crawl->peek_front();
if (!this_range) {
BEESLOGDEBUG("PROGRESS: completed crawl " << magic);
BEESCOUNT(progress_complete);
continue;
}
// Get the last _completed_ state
const auto this_state = this_crawl->get_state_begin();
const auto bytenr = this_range.fid().ino();
auto bytenr = this_state.m_objectid;
const auto bg_found = bg_info_map.lower_bound(bytenr);
if (bg_found == bg_info_map.end()) {
BEESLOGDEBUG("PROGRESS: bytenr " << to_hex(bytenr) << " not found in a block group for " << magic);
@ -980,6 +1012,10 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
continue;
}
const auto &bi = bg_found->second;
if (!bytenr) {
// Zero bytenr means we have just started a crawl. Point to the first defined bytenr instead
bytenr = bi.first_bytenr;
}
const auto bi_last_bytenr = bg_found->first;
if (bytenr > bi_last_bytenr || bytenr < bi.first_bytenr) {
// This can happen if the crawler happens to be in a metadata block group,
@ -989,12 +1025,12 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
}
const auto bytenr_offset = min(bi_last_bytenr, max(bytenr, bi.first_bytenr)) - bi.first_bytenr + bi.first_total;
const auto bytenr_percent = bytenr_offset / (0.01 * fs_size);
const auto this_state = this_crawl->get_state_end();
const auto now = time(NULL);
const auto time_so_far = now - min(now, this_state.m_started);
string eta_stamp = "-";
string eta_pretty = "-";
if (time_so_far > 1 && bytenr_percent > 0) {
const bool finished = deferred_map.at(subvol).second;
if (time_so_far > 1 && bytenr_percent > 0 && !finished) {
const time_t eta_duration = time_so_far / (bytenr_percent / 100);
const time_t eta_time = eta_duration + now;
struct tm ltm = { 0 };
@ -1005,10 +1041,15 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
eta_stamp = string(buf);
eta_pretty = pretty_seconds(eta_duration);
}
const auto &mma = mes.m_map.at(subvol);
const auto mma_ratio = mes_sample_size_ok ? (mma.m_bytes / double(mes.m_total)) : 1.0;
const auto pos_text = Table::Text(deferred_map.at(subvol).first ? "deferred" : pretty(bytenr_offset * mma_ratio));
const auto pct_text = Table::Text(finished ? "finished" : astringprintf("%.4f%%", bytenr_percent));
const auto size_text = Table::Text( mes_sample_size_ok ? pretty(fs_size * mma_ratio) : "-");
eta.insert_row(Table::endpos, vector<Table::Content> {
Table::Text(pretty(bytenr_offset)),
Table::Text(pretty(fs_size)),
Table::Text(astringprintf("%.4f%%", bytenr_percent)),
pos_text,
size_text,
pct_text,
Table::Number(subvol),
Table::Text(pretty(magic.m_min_size & ~BLOCK_MASK_CLONE)),
Table::Text(pretty(magic.m_max_size)),
@ -1025,7 +1066,7 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
eta.right("");
eta.insert_row(0, vector<Table::Content> {
Table::Text("done"),
Table::Text("total"),
Table::Text(pretty(fs_size)),
Table::Text("%done"),
Table::Text("sub"),
Table::Text("szmn"),
@ -2203,3 +2244,15 @@ BeesCrawl::deferred(bool def_setting)
unique_lock<mutex> lock(m_state_mutex);
m_deferred = def_setting;
}
bool
BeesCrawl::deferred() const
{
return m_deferred;
}
bool
BeesCrawl::finished() const
{
return m_finished;
}

View File

@ -533,6 +533,8 @@ public:
BeesCrawlState get_state_end() const;
void set_state(const BeesCrawlState &bcs);
void deferred(bool def_setting);
bool deferred() const;
bool finished() const;
};
class BeesScanMode;