mirror of
https://github.com/Zygo/bees.git
synced 2025-06-17 10:06:16 +02:00
context: move TempFile from TLS to Pool and fix some FdCache issues
Get rid of the thread-local TempFiles and use Pool instead. This eliminates a potential FD leak when the loadavg governor repeatedly creates and destroys threads. With the old per-thread TempFiles, we were guaranteed to have exclusive ownership of the TempFile object within the current thread. Pool is somewhat stricter: it only guarantees ownership while the checked-out Handle exists. Adjust the users of TempFile objects to ensure they hold the Handle object until they are finished using the TempFile. It appears that maintaining large, heavily-reflinked, long-lived temporary files costs more than truncating after every use: btrfs has to write multiple references to the temporary file's extents, then some commits later, remove references as the temporary file is deleted or truncated. Using the temporary file in a dedupe operation flushes the data to disk, so nothing is saved by pretending that there is writeback pipelining and trying to avoid flushes in truncate. Pool provides usage tracking and a checkin callback, so use it to truncate the temporary file immediately after every use. Redesign TempFile so that every instance creates exactly one Fd which persists over the lifetime of the TempFile object. Provide a reset() method which resets the file back to the initial state and call it from the Pool checkin callback. This makes TempFile's lifetime equivalent to its Fd's lifetime, which simplifies interactions with FdCache and Roots. This change means we can now blacklist temporary files without having an effective memory leak, so do that. We also have a reason to ever remove something from the blacklist, so add a method for that too. In order to move to extent-centric addressing, we need to be able to reliably open temporary files by root and inode number. Previously we would place TempFile fd's into the cache with insert_root_ino, but the cache would be cleared periodically, and it would not be possible to reopen temporary files after that happened. Now that the TempFile's lifetime is the same as the TempFile Fd's lifetime, we can have TempFile manage a separate FileId -> Fd map in Roots which is unaffected by the periodic cache clearing. BeesRoots::open_root_ino_nocache will check this map before attempting to open the file via btrfs root+ino lookup, and return it through the cache as if Roots had opened the file via btrfs. Hold a reference to BeesRoots in BeesTempFile because the usual way to get such a reference now throws an exception in BeesTempFile's destructor. These changes make method BeesTempFile::create() and all methods named insert_root_ino unnecessary, so delete them. We construct and destroy TempFiles much less often now, so make their constructor and destructor more informative. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@ -62,13 +62,6 @@ BeesFdCache::open_root_ino(shared_ptr<BeesContext> ctx, uint64_t root, uint64_t
|
||||
return m_file_cache(ctx, root, ino);
|
||||
}
|
||||
|
||||
void
|
||||
BeesFdCache::insert_root_ino(shared_ptr<BeesContext> ctx, Fd fd)
|
||||
{
|
||||
BeesFileId fid(fd);
|
||||
return m_file_cache.insert(fd, ctx, fid.root(), fid.ino());
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::dump_status()
|
||||
{
|
||||
@ -256,11 +249,11 @@ BeesContext::dedup(const BeesRangePair &brp)
|
||||
}
|
||||
|
||||
BeesRangePair
|
||||
BeesContext::dup_extent(const BeesFileRange &src)
|
||||
BeesContext::dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile)
|
||||
{
|
||||
BEESTRACE("dup_extent " << src);
|
||||
BEESCOUNTADD(dedup_copy, src.size());
|
||||
return BeesRangePair(tmpfile()->make_copy(src), src);
|
||||
return BeesRangePair(tmpfile->make_copy(src), src);
|
||||
}
|
||||
|
||||
void
|
||||
@ -268,7 +261,8 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
||||
{
|
||||
auto m_ctx = shared_from_this();
|
||||
BEESNOTE("Rewriting bfr " << bfr);
|
||||
BeesRangePair dup_brp(dup_extent(BeesFileRange(bfr.fd(), bfr.begin(), min(bfr.file_size(), bfr.end()))));
|
||||
auto rewrite_tmpfile = tmpfile();
|
||||
BeesRangePair dup_brp(dup_extent(BeesFileRange(bfr.fd(), bfr.begin(), min(bfr.file_size(), bfr.end())), rewrite_tmpfile));
|
||||
// BEESLOG("\tdup_brp " << dup_brp);
|
||||
BeesBlockData orig_bbd(bfr.fd(), bfr.begin(), min(BLOCK_SIZE_SUMS, bfr.size()));
|
||||
// BEESLOG("\torig_bbd " << orig_bbd);
|
||||
@ -964,6 +958,16 @@ BeesContext::start()
|
||||
BEESLOGNOTICE("Starting bees main loop...");
|
||||
BEESNOTE("starting BeesContext");
|
||||
|
||||
// Set up temporary file pool
|
||||
m_tmpfile_pool.generator([=]() -> shared_ptr<BeesTempFile> {
|
||||
return make_shared<BeesTempFile>(shared_from_this());
|
||||
});
|
||||
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
|
||||
catch_all([&](){
|
||||
btf->reset();
|
||||
});
|
||||
});
|
||||
|
||||
// Force these to exist now so we don't have recursive locking
|
||||
// operations trying to access them
|
||||
fd_cache();
|
||||
@ -1022,7 +1026,7 @@ BeesContext::stop()
|
||||
|
||||
BEESNOTE("closing tmpfiles");
|
||||
BEESLOGDEBUG("Closing tmpfiles");
|
||||
m_tmpfiles.clear();
|
||||
m_tmpfile_pool.clear();
|
||||
|
||||
BEESNOTE("closing FD caches");
|
||||
BEESLOGDEBUG("Closing FD caches");
|
||||
@ -1058,44 +1062,44 @@ BeesContext::stop_requested() const
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::blacklist_add(const BeesFileId &fid)
|
||||
BeesContext::blacklist_insert(const BeesFileId &fid)
|
||||
{
|
||||
BEESLOGDEBUG("Adding " << fid << " to blacklist");
|
||||
unique_lock<mutex> lock(m_blacklist_mutex);
|
||||
m_blacklist.insert(fid);
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::blacklist_erase(const BeesFileId &fid)
|
||||
{
|
||||
BEESLOGDEBUG("Removing " << fid << " from blacklist");
|
||||
unique_lock<mutex> lock(m_blacklist_mutex);
|
||||
m_blacklist.erase(fid);
|
||||
}
|
||||
|
||||
bool
|
||||
BeesContext::is_blacklisted(const BeesFileId &fid) const
|
||||
{
|
||||
// Everything on root 1 is blacklisted, no locks necessary.
|
||||
// Everything on root 1 is blacklisted (it is mostly free space cache), no locks necessary.
|
||||
if (fid.root() == 1) {
|
||||
return true;
|
||||
}
|
||||
unique_lock<mutex> lock(m_blacklist_mutex);
|
||||
return m_blacklist.count(fid);
|
||||
return m_blacklist.find(fid) != m_blacklist.end();
|
||||
}
|
||||
|
||||
shared_ptr<BeesTempFile>
|
||||
BeesContext::tmpfile()
|
||||
{
|
||||
// FIXME: this whole thing leaks FDs (quite slowly). Make a pool instead.
|
||||
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
|
||||
if (m_stop_requested) {
|
||||
throw BeesHalt();
|
||||
}
|
||||
|
||||
if (!m_tmpfiles[this_thread::get_id()]) {
|
||||
// We know we are the only possible accessor of this,
|
||||
// so drop the lock to avoid a deadlock loop
|
||||
lock.unlock();
|
||||
auto rv = make_shared<BeesTempFile>(shared_from_this());
|
||||
lock.lock();
|
||||
m_tmpfiles[this_thread::get_id()] = rv;
|
||||
}
|
||||
return m_tmpfiles[this_thread::get_id()];
|
||||
lock.unlock();
|
||||
|
||||
return m_tmpfile_pool();
|
||||
}
|
||||
|
||||
shared_ptr<BeesFdCache>
|
||||
@ -1147,9 +1151,3 @@ BeesContext::set_root_path(string path)
|
||||
m_root_path = path;
|
||||
set_root_fd(open_or_die(m_root_path, FLAGS_OPEN_DIR));
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::insert_root_ino(Fd fd)
|
||||
{
|
||||
fd_cache()->insert_root_ino(shared_from_this(), fd);
|
||||
}
|
||||
|
Reference in New Issue
Block a user