1
0
mirror of https://github.com/Zygo/bees.git synced 2025-08-02 13:53:28 +02:00

5 Commits

Author SHA1 Message Date
Zygo Blaxell
3430f16998 context: create a Pool of BtrfsIoctlLogicalInoArgs objects
Each object contains a 16 MiB buffer, which is very heavy for some
malloc implementations.

Keep the objects in a Pool so that their buffers are only allocated and
deallocated once in the process lifetime.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:45:31 -05:00
Zygo Blaxell
7c764a73c8 fs: allow BtrfsIoctlLogicalInoArgs to be reused, remove virtual methods
Some malloc implementations will try to mmap() and munmap() large buffers
every time they are used, causing a severe loss of performance.

Nothing ever overrode the virtual methods, and there was no virtual
destructor, so they cause compiler warnings at build time when used with
a template that tries to delete pointers to them.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:40:12 -05:00
Zygo Blaxell
a9a5cd03a5 ProgressTracker: reduce memory usage with long-running work items
ProgressTracker was only freeing memory for work items when they reach
the head of the work tracking queue.  If the first work item takes
hours to complete, and thousands of items are processed every second,
this leads to millions of completed items tracked in memory at a time,
wasting gigabytes of system RAM.

Rewrite ProgressHolderState methods to keep only incomplete work items
in memory, regardless of the order in which they are added or removed.

Also fix the unit tests which were relying on the memory leak to work,
and add test cases for code coverage.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:33:35 -05:00
Zygo Blaxell
299509ce32 seeker: fix the test for ILP32 platforms
Not sure what I was thinking, but the argument here should clearly
be uint64_t.

Fixes: https://github.com/Zygo/bees/issues/248
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-20 11:30:56 -05:00
Zygo Blaxell
d5a99c2f5e roots: don't share a RootFetcher between threads
If the send workaround is enabled, it is possible for two threads (a
thread running the crawl_new task, and a thread attempting to apply the
send workaround) to access the same RootFetcher object at the same time.
That never ends well.

Give each function its own BtrfsRootFetcher object.

Fixes: https://github.com/Zygo/bees/issues/250
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-20 11:14:34 -05:00
8 changed files with 108 additions and 34 deletions

View File

@@ -69,9 +69,11 @@ namespace crucible {
uint64_t get_flags() const;
void set_flags(uint64_t new_flags);
void set_logical(uint64_t new_logical);
void set_size(uint64_t new_size);
virtual void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd);
void do_ioctl(int fd);
bool do_ioctl_nothrow(int fd);
struct BtrfsInodeOffsetRootSpan {
using iterator = BtrfsInodeOffsetRoot*;

View File

@@ -4,13 +4,20 @@
#include "crucible/error.h"
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <cassert>
namespace crucible {
using namespace std;
/// A class to track progress of multiple workers using only two points:
/// the first and last incomplete state. The first incomplete
/// state can be recorded as a checkpoint to resume later on.
/// The last completed state is the starting point for workers that
/// need something to do.
template <class T>
class ProgressTracker {
struct ProgressTrackerState;
@@ -19,8 +26,16 @@ namespace crucible {
using value_type = T;
using ProgressHolder = shared_ptr<ProgressHolderState>;
/// Create ProgressTracker with initial begin and end state 'v'.
ProgressTracker(const value_type &v);
/// The first incomplete state. This is not "sticky",
/// it will revert to the end state if there are no
/// items in progress.
value_type begin() const;
/// The last incomplete state. This is "sticky",
/// it can only increase and never decrease.
value_type end() const;
ProgressHolder hold(const value_type &v);
@@ -31,7 +46,7 @@ namespace crucible {
struct ProgressTrackerState {
using key_type = pair<value_type, ProgressHolderState *>;
mutex m_mutex;
map<key_type, bool> m_in_progress;
set<key_type> m_in_progress;
value_type m_begin;
value_type m_end;
};
@@ -39,6 +54,7 @@ namespace crucible {
class ProgressHolderState {
shared_ptr<ProgressTrackerState> m_state;
const value_type m_value;
using key_type = typename ProgressTrackerState::key_type;
public:
ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v);
~ProgressHolderState();
@@ -86,7 +102,11 @@ namespace crucible {
m_value(v)
{
unique_lock<mutex> lock(m_state->m_mutex);
m_state->m_in_progress[make_pair(m_value, this)] = true;
const auto rv = m_state->m_in_progress.insert(key_type(m_value, this));
THROW_CHECK1(runtime_error, m_value, rv.second);
// Set the beginning to the first existing in-progress item
m_state->m_begin = m_state->m_in_progress.begin()->first;
// If this value is past the end, move the end, but don't go backwards
if (m_state->m_end < m_value) {
m_state->m_end = m_value;
}
@@ -96,17 +116,15 @@ namespace crucible {
ProgressTracker<T>::ProgressHolderState::~ProgressHolderState()
{
unique_lock<mutex> lock(m_state->m_mutex);
m_state->m_in_progress[make_pair(m_value, this)] = false;
auto p = m_state->m_in_progress.begin();
while (p != m_state->m_in_progress.end()) {
if (p->second) {
break;
}
if (m_state->m_begin < p->first.first) {
m_state->m_begin = p->first.first;
}
m_state->m_in_progress.erase(p);
p = m_state->m_in_progress.begin();
const auto rv = m_state->m_in_progress.erase(key_type(m_value, this));
// THROW_CHECK2(runtime_error, m_value, rv, rv == 1);
assert(rv == 1);
if (m_state->m_in_progress.empty()) {
// If we made the list empty, then m_begin == m_end
m_state->m_begin = m_state->m_end;
} else {
// If we deleted the first element, then m_begin = current first element
m_state->m_begin = m_state->m_in_progress.begin()->first;
}
}

View File

@@ -315,6 +315,18 @@ namespace crucible {
return m_flags;
}
void
BtrfsIoctlLogicalInoArgs::set_logical(uint64_t new_logical)
{
m_logical = new_logical;
}
void
BtrfsIoctlLogicalInoArgs::set_size(uint64_t new_size)
{
m_container_size = new_size;
}
bool
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
{

View File

@@ -757,6 +757,15 @@ BeesResolveAddrResult::BeesResolveAddrResult()
{
}
shared_ptr<BtrfsIoctlLogicalInoArgs>
BeesContext::logical_ino(const uint64_t logical, const bool all_refs)
{
const auto rv = m_logical_ino_pool();
rv->set_logical(logical);
rv->set_flags(all_refs ? BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET : 0);
return rv;
}
BeesResolveAddrResult
BeesContext::resolve_addr_uncached(BeesAddress addr)
{
@@ -768,7 +777,8 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
// transaction latency, competing threads, and freeze/SIGSTOP
// pausing the bees process.
BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
const auto log_ino_ptr = logical_ino(addr.get_physical_or_zero(), false);
auto &log_ino = *log_ino_ptr;
// Time how long this takes
Timer resolve_timer;
@@ -910,6 +920,9 @@ BeesContext::start()
m_tmpfile_pool.generator([=]() -> shared_ptr<BeesTempFile> {
return make_shared<BeesTempFile>(shared_from_this());
});
m_logical_ino_pool.generator([]() {
return make_shared<BtrfsIoctlLogicalInoArgs>(0);
});
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
catch_all([&](){
btf->reset();

View File

@@ -500,7 +500,8 @@ BeesRoots::transid_max_nocache()
// We look for the root of the extent tree and read its transid.
// Should run in O(1) time and be fairly reliable.
const auto bti = m_root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID);
BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
const auto bti = root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID);
BEESTRACE("extracting transid from " << bti);
const auto rv = bti.transid();
@@ -927,7 +928,6 @@ BeesRoots::state_load()
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
m_ctx(ctx),
m_root_fetcher(ctx->root_fd()),
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
m_crawl_thread("crawl_transid"),
m_writeback_thread("crawl_writeback")
@@ -1101,7 +1101,8 @@ BeesRoots::is_root_ro(uint64_t root)
BEESTRACE("checking subvol flags on root " << root);
const auto item = m_root_fetcher.root(root);
BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
const auto item = root_fetcher.root(root);
// If we can't access the subvol's root item...guess it's ro?
if (!item || item.root_flags() & BTRFS_ROOT_SUBVOL_RDONLY) {
return true;

View File

@@ -534,7 +534,6 @@ class BeesScanMode;
class BeesRoots : public enable_shared_from_this<BeesRoots> {
shared_ptr<BeesContext> m_ctx;
BtrfsRootFetcher m_root_fetcher;
BeesStringFile m_crawl_state_file;
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
mutex m_mutex;
@@ -715,6 +714,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
shared_ptr<BeesHashTable> m_hash_table;
shared_ptr<BeesRoots> m_roots;
Pool<BeesTempFile> m_tmpfile_pool;
Pool<BtrfsIoctlLogicalInoArgs> m_logical_ino_pool;
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
@@ -754,6 +754,8 @@ public:
bool scan_forward(const BeesFileRange &bfr);
shared_ptr<BtrfsIoctlLogicalInoArgs> logical_ino(uint64_t bytenr, bool all_refs);
bool is_root_ro(uint64_t root);
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
bool dedup(const BeesRangePair &brp);

View File

@@ -12,23 +12,49 @@ using namespace std;
void
test_progress()
{
// On create, begin == end == constructor argument
ProgressTracker<uint64_t> pt(123);
auto hold = pt.hold(234);
auto hold2 = pt.hold(345);
assert(pt.begin() == 123);
assert(pt.end() == 345);
auto hold3 = pt.hold(456);
assert(pt.begin() == 123);
assert(pt.end() == 456);
hold2.reset();
assert(pt.begin() == 123);
assert(pt.end() == 456);
hold.reset();
assert(pt.end() == 123);
// Holding a position past the end increases the end (and moves begin to match)
auto hold345 = pt.hold(345);
assert(pt.begin() == 345);
assert(pt.end() == 345);
// Holding a position before begin reduces begin, without changing end
auto hold234 = pt.hold(234);
assert(pt.begin() == 234);
assert(pt.end() == 345);
// Holding a position past the end increases the end, without affecting begin
auto hold456 = pt.hold(456);
assert(pt.begin() == 234);
assert(pt.end() == 456);
hold3.reset();
// Releasing a position in the middle affects neither begin nor end
hold345.reset();
assert(pt.begin() == 234);
assert(pt.end() == 456);
// Hold another position in the middle to test begin moving forward
auto hold400 = pt.hold(400);
// Releasing a position at the beginning moves begin forward
hold234.reset();
assert(pt.begin() == 400);
assert(pt.end() == 456);
// Releasing a position at the end doesn't move end backward
hold456.reset();
assert(pt.begin() == 400);
assert(pt.end() == 456);
// Releasing a position in the middle doesn't move end backward but does move begin forward
hold400.reset();
assert(pt.begin() == 456);
assert(pt.end() == 456);
}
int

View File

@@ -28,7 +28,7 @@ static bool test_fails = false;
static
void
seeker_test(const vector<uint64_t> &vec, size_t target)
seeker_test(const vector<uint64_t> &vec, uint64_t const target)
{
cerr << "Find " << target << " in {";
for (auto i : vec) {
@@ -42,7 +42,7 @@ seeker_test(const vector<uint64_t> &vec, size_t target)
return seeker_finder(vec, lower, upper);
});
cerr << found;
size_t my_found = 0;
uint64_t my_found = 0;
for (auto i : vec) {
if (i <= target) {
my_found = i;