1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

throttle: add delays to match deferred request rate to btrfs completion rate

Measure the time spent running various operations that extend btrfs
transaction completion times (`LOGICAL_INO`, tmpfiles, and dedupe)
and arrange for each operation to run for not less than the average
amount of time by adding a sleep after each operation that takes less
than the average.

The delay after each operation is intended to slow down the rate of
deferred and long-running requests from bees to match the rate at which
btrfs is actually completing them.  This may help avoid big spikes in
latency if btrfs has so many requests queued that it has to force a
commit to release memory.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2024-12-10 00:36:28 -05:00
parent f209cafcd8
commit ea45982293
4 changed files with 58 additions and 9 deletions

View File

@ -20,7 +20,6 @@
using namespace crucible; using namespace crucible;
using namespace std; using namespace std;
BeesFdCache::BeesFdCache(shared_ptr<BeesContext> ctx) : BeesFdCache::BeesFdCache(shared_ptr<BeesContext> ctx) :
m_ctx(ctx) m_ctx(ctx)
{ {
@ -214,6 +213,7 @@ BeesContext::dedup(const BeesRangePair &brp_in)
{ {
// TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't // TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't
BEESNOTE("dedup " << brp_in); BEESNOTE("dedup " << brp_in);
BEESTRACE("dedup " << brp_in);
if (is_root_ro(brp_in.second.fid().root())) { if (is_root_ro(brp_in.second.fid().root())) {
// BEESLOGDEBUG("WORKAROUND: dst root " << (brp_in.second.fid().root()) << " is read-only); // BEESLOGDEBUG("WORKAROUND: dst root " << (brp_in.second.fid().root()) << " is read-only);
@ -262,6 +262,7 @@ BeesContext::dedup(const BeesRangePair &brp_in)
BEESLOGWARN("NO Dedup! " << brp); BEESLOGWARN("NO Dedup! " << brp);
} }
bees_throttle(dedup_timer.age(), "dedup");
return rv; return rv;
} catch (const std::system_error &e) { } catch (const std::system_error &e) {
if (e.code().value() == EAGAIN) { if (e.code().value() == EAGAIN) {
@ -989,6 +990,7 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
Timer resolve_timer; Timer resolve_timer;
struct rusage usage_before; struct rusage usage_before;
struct rusage usage_after;
{ {
BEESNOTE("waiting to resolve addr " << addr << " with LOGICAL_INO"); BEESNOTE("waiting to resolve addr " << addr << " with LOGICAL_INO");
const auto lock = MultiLocker::get_lock("logical_ino"); const auto lock = MultiLocker::get_lock("logical_ino");
@ -1005,13 +1007,12 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
} else { } else {
BEESCOUNT(resolve_fail); BEESCOUNT(resolve_fail);
} }
BEESCOUNTADD(resolve_ms, resolve_timer.age() * 1000); DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));
const auto resolve_timer_age = resolve_timer.age();
BEESCOUNTADD(resolve_ms, resolve_timer_age * 1000);
bees_throttle(resolve_timer_age, "resolve_addr");
} }
// Again!
struct rusage usage_after;
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));
const double sys_usage_delta = const double sys_usage_delta =
(usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) - (usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) -
(usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0); (usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0);

View File

@ -605,8 +605,10 @@ BeesScanModeExtent::create_extent_map(const uint64_t bytenr, const ProgressTrack
} else { } else {
BEESCOUNT(extent_fail); BEESCOUNT(extent_fail);
} }
const auto resolve_age = resolve_timer.age();
BEESCOUNTADD(extent_ms, resolve_timer.age() * 1000); BEESCOUNTADD(extent_ms, resolve_age * 1000);
bees_throttle(resolve_age, "extent_map");
} }
const size_t rv_count = log_ino.m_iors.size(); const size_t rv_count = log_ino.m_iors.size();

View File

@ -305,6 +305,48 @@ bees_unreadahead(int const fd, off_t offset, size_t size)
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000); BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
} }
static double bees_throttle_factor = 1.0;
void
bees_throttle(const double time_used, const char *const context)
{
static mutex s_mutex;
unique_lock<mutex> throttle_lock(s_mutex);
struct time_pair {
double time_used = 0;
double time_count = 0;
double longest_sleep_time = 0;
};
static map<string, time_pair> s_time_map;
auto &this_time = s_time_map[context];
auto &this_time_used = this_time.time_used;
auto &this_time_count = this_time.time_count;
auto &longest_sleep_time = this_time.longest_sleep_time;
this_time_used += time_used;
++this_time_count;
// Keep the timing data fresh
static Timer s_fresh_timer;
if (s_fresh_timer.age() > 300) {
s_fresh_timer.reset();
this_time_count *= 0.9;
this_time_used *= 0.9;
}
// Wait for enough data to calculate rates
if (this_time_used < 1.0 || this_time_count < 1.0) return;
const auto avg_time = this_time_used / this_time_count;
const auto sleep_time = min(60.0, bees_throttle_factor * avg_time - time_used);
if (sleep_time <= 0) {
return;
}
if (sleep_time > longest_sleep_time) {
BEESLOGDEBUG(context << ": throttle delay " << sleep_time << " s, time used " << time_used << " s, avg time " << avg_time << " s");
longest_sleep_time = sleep_time;
}
throttle_lock.unlock();
BEESNOTE(context << ": throttle delay " << sleep_time << " s, time used " << time_used << " s, avg time " << avg_time << " s");
nanosleep(sleep_time);
}
thread_local random_device bees_random_device; thread_local random_device bees_random_device;
thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist( thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
numeric_limits<default_random_engine::result_type>::min(), numeric_limits<default_random_engine::result_type>::min(),
@ -401,6 +443,8 @@ BeesTempFile::resize(off_t offset)
// Count time spent here // Count time spent here
BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000); BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000);
bees_throttle(resize_timer.age(), "tmpfile_resize");
} }
void void
@ -536,6 +580,8 @@ BeesTempFile::make_copy(const BeesFileRange &src)
} }
BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000); BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);
bees_throttle(copy_timer.age(), "tmpfile_copy");
BEESCOUNT(tmp_copy); BEESCOUNT(tmp_copy);
return rv; return rv;
} }

View File

@ -550,9 +550,8 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
Timer m_crawl_timer; Timer m_crawl_timer;
BeesThread m_crawl_thread; BeesThread m_crawl_thread;
BeesThread m_writeback_thread; BeesThread m_writeback_thread;
bool m_workaround_btrfs_send = false;
RateEstimator m_transid_re; RateEstimator m_transid_re;
bool m_workaround_btrfs_send = false;
shared_ptr<BeesScanMode> m_scanner; shared_ptr<BeesScanMode> m_scanner;
@ -889,6 +888,7 @@ string pretty(double d);
void bees_readahead(int fd, off_t offset, size_t size); void bees_readahead(int fd, off_t offset, size_t size);
void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2); void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2);
void bees_unreadahead(int fd, off_t offset, size_t size); void bees_unreadahead(int fd, off_t offset, size_t size);
void bees_throttle(double time_used, const char *context);
string format_time(time_t t); string format_time(time_t t);
#endif #endif