diff --git a/src/bees-context.cc b/src/bees-context.cc index a1d3471..3d7d22e 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -62,13 +62,6 @@ BeesFdCache::open_root_ino(shared_ptr ctx, uint64_t root, uint64_t return m_file_cache(ctx, root, ino); } -void -BeesFdCache::insert_root_ino(shared_ptr ctx, Fd fd) -{ - BeesFileId fid(fd); - return m_file_cache.insert(fd, ctx, fid.root(), fid.ino()); -} - void BeesContext::dump_status() { @@ -256,11 +249,11 @@ BeesContext::dedup(const BeesRangePair &brp) } BeesRangePair -BeesContext::dup_extent(const BeesFileRange &src) +BeesContext::dup_extent(const BeesFileRange &src, const shared_ptr &tmpfile) { BEESTRACE("dup_extent " << src); BEESCOUNTADD(dedup_copy, src.size()); - return BeesRangePair(tmpfile()->make_copy(src), src); + return BeesRangePair(tmpfile->make_copy(src), src); } void @@ -268,7 +261,8 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr) { auto m_ctx = shared_from_this(); BEESNOTE("Rewriting bfr " << bfr); - BeesRangePair dup_brp(dup_extent(BeesFileRange(bfr.fd(), bfr.begin(), min(bfr.file_size(), bfr.end())))); + auto rewrite_tmpfile = tmpfile(); + BeesRangePair dup_brp(dup_extent(BeesFileRange(bfr.fd(), bfr.begin(), min(bfr.file_size(), bfr.end())), rewrite_tmpfile)); // BEESLOG("\tdup_brp " << dup_brp); BeesBlockData orig_bbd(bfr.fd(), bfr.begin(), min(BLOCK_SIZE_SUMS, bfr.size())); // BEESLOG("\torig_bbd " << orig_bbd); @@ -964,6 +958,16 @@ BeesContext::start() BEESLOGNOTICE("Starting bees main loop..."); BEESNOTE("starting BeesContext"); + // Set up temporary file pool + m_tmpfile_pool.generator([=]() -> shared_ptr { + return make_shared(shared_from_this()); + }); + m_tmpfile_pool.checkin([](const shared_ptr &btf) { + catch_all([&](){ + btf->reset(); + }); + }); + // Force these to exist now so we don't have recursive locking // operations trying to access them fd_cache(); @@ -1022,7 +1026,7 @@ BeesContext::stop() BEESNOTE("closing tmpfiles"); BEESLOGDEBUG("Closing tmpfiles"); - m_tmpfiles.clear(); + m_tmpfile_pool.clear(); BEESNOTE("closing FD caches"); BEESLOGDEBUG("Closing FD caches"); @@ -1058,44 +1062,44 @@ BeesContext::stop_requested() const } void -BeesContext::blacklist_add(const BeesFileId &fid) +BeesContext::blacklist_insert(const BeesFileId &fid) { BEESLOGDEBUG("Adding " << fid << " to blacklist"); unique_lock lock(m_blacklist_mutex); m_blacklist.insert(fid); } +void +BeesContext::blacklist_erase(const BeesFileId &fid) +{ + BEESLOGDEBUG("Removing " << fid << " from blacklist"); + unique_lock lock(m_blacklist_mutex); + m_blacklist.erase(fid); +} + bool BeesContext::is_blacklisted(const BeesFileId &fid) const { - // Everything on root 1 is blacklisted, no locks necessary. + // Everything on root 1 is blacklisted (it is mostly free space cache), no locks necessary. if (fid.root() == 1) { return true; } unique_lock lock(m_blacklist_mutex); - return m_blacklist.count(fid); + return m_blacklist.find(fid) != m_blacklist.end(); } shared_ptr BeesContext::tmpfile() { - // FIXME: this whole thing leaks FDs (quite slowly). Make a pool instead. - unique_lock lock(m_stop_mutex); if (m_stop_requested) { throw BeesHalt(); } - if (!m_tmpfiles[this_thread::get_id()]) { - // We know we are the only possible accessor of this, - // so drop the lock to avoid a deadlock loop - lock.unlock(); - auto rv = make_shared(shared_from_this()); - lock.lock(); - m_tmpfiles[this_thread::get_id()] = rv; - } - return m_tmpfiles[this_thread::get_id()]; + lock.unlock(); + + return m_tmpfile_pool(); } shared_ptr @@ -1147,9 +1151,3 @@ BeesContext::set_root_path(string path) m_root_path = path; set_root_fd(open_or_die(m_root_path, FLAGS_OPEN_DIR)); } - -void -BeesContext::insert_root_ino(Fd fd) -{ - fd_cache()->insert_root_ino(shared_from_this(), fd); -} diff --git a/src/bees-hash.cc b/src/bees-hash.cc index d6b0204..29a9081 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -740,7 +740,7 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t // Blacklist might fail if the hash table is not stored on a btrfs catch_all([&]() { - m_ctx->blacklist_add(BeesFileId(m_fd)); + m_ctx->blacklist_insert(BeesFileId(m_fd)); }); } diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 28821b6..696ab89 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -796,6 +796,16 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) { BEESTRACE("opening root " << root << " ino " << ino); + // Check the tmpfiles map first + { + unique_lock lock(m_tmpfiles_mutex); + auto found = m_tmpfiles.find(BeesFileId(root, ino)); + if (found != m_tmpfiles.end()) { + BEESCOUNT(open_tmpfile); + return found->second; + } + } + Fd root_fd = open_root(root); if (!root_fd) { BEESCOUNT(open_no_root); @@ -922,6 +932,25 @@ BeesRoots::transid_re() return m_transid_re; } +void +BeesRoots::insert_tmpfile(Fd fd) +{ + BeesFileId fid(fd); + unique_lock lock(m_tmpfiles_mutex); + auto rv = m_tmpfiles.insert(make_pair(fid, fd)); + THROW_CHECK1(runtime_error, fd, rv.second); +} + +void +BeesRoots::erase_tmpfile(Fd fd) +{ + BeesFileId fid(fd); + unique_lock lock(m_tmpfiles_mutex); + auto found = m_tmpfiles.find(fid); + THROW_CHECK1(runtime_error, fd, found != m_tmpfiles.end()); + m_tmpfiles.erase(found); +} + BeesCrawl::BeesCrawl(shared_ptr ctx, BeesCrawlState initial_state) : m_ctx(ctx), m_state(initial_state) diff --git a/src/bees.cc b/src/bees.cc index d936f48..44d4ab1 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -443,39 +443,6 @@ BeesStringFile::write(string contents) renameat_or_die(m_dir_fd, tmpname, m_dir_fd, m_name); } -void -BeesTempFile::create() -{ - // BEESLOG("creating temporary file in " << m_ctx->root_path()); - BEESNOTE("creating temporary file in " << m_ctx->root_path()); - BEESTOOLONG("creating temporary file in " << m_ctx->root_path()); - - Timer create_timer; - DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR)); - BEESCOUNT(tmp_create); - - // Can't reopen this file, so don't allow any resolves there - // Resolves won't work there anyway. There are lots of tempfiles - // and they're short-lived, so this ends up being just a memory leak - // m_ctx->blacklist_add(BeesFileId(m_fd)); - - // Put this inode in the cache so we can resolve it later - m_ctx->insert_root_ino(m_fd); - - // Set compression attribute - BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd)); - int flags = ioctl_iflags_get(m_fd); - flags |= FS_COMPR_FL; - BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags)); - ioctl_iflags_set(m_fd, flags); - - // Always leave first block empty to avoid creating a file with an inline extent - m_end_offset = BLOCK_SIZE_CLONE; - - // Count time spent here - BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000); -} - void BeesTempFile::resize(off_t offset) { @@ -483,9 +450,6 @@ BeesTempFile::resize(off_t offset) BEESNOTE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset)); BEESTRACE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset)); - // Ensure that file covers m_end_offset..offset - THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset); - // Truncate Timer resize_timer; DIE_IF_NON_ZERO(ftruncate(m_fd, offset)); @@ -498,17 +462,56 @@ BeesTempFile::resize(off_t offset) BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000); } +void +BeesTempFile::reset() +{ + // Always leave first block empty to avoid creating a file with an inline extent + resize(BLOCK_SIZE_CLONE); +} + + BeesTempFile::~BeesTempFile() { - BEESLOGDEBUG("Destructing BeesTempFile " << this); + BEESLOGDEBUG("destroying temporary file " << this << " in " << m_ctx->root_path() << " fd " << name_fd(m_fd)); + + // Remove this file from open_root_ino lookup table + m_roots->erase_tmpfile(m_fd); + + // Remove from blacklist + m_ctx->blacklist_erase(BeesFileId(m_fd)); } BeesTempFile::BeesTempFile(shared_ptr ctx) : m_ctx(ctx), + m_roots(ctx->roots()), m_end_offset(0) { - BEESLOGDEBUG("Constructing BeesTempFile " << this); - create(); + BEESLOGDEBUG("creating temporary file " << this << " in " << m_ctx->root_path()); + BEESNOTE("creating temporary file in " << m_ctx->root_path()); + BEESTOOLONG("creating temporary file in " << m_ctx->root_path()); + + Timer create_timer; + DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR)); + BEESCOUNT(tmp_create); + + // Don't include this file in new extent scans + m_ctx->blacklist_insert(BeesFileId(m_fd)); + + // Add this file to open_root_ino lookup table + m_roots->insert_tmpfile(m_fd); + + // Set compression attribute + BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd)); + int flags = ioctl_iflags_get(m_fd); + flags |= FS_COMPR_FL; + BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags)); + ioctl_iflags_set(m_fd, flags); + + // Count time spent here + BEESCOUNTADD(tmp_create_ms, create_timer.age() * 1000); + + // Set initial size + reset(); } void @@ -517,12 +520,14 @@ BeesTempFile::realign() if (m_end_offset > BLOCK_SIZE_MAX_TEMP_FILE) { BEESLOGINFO("temporary file size " << to_hex(m_end_offset) << " > max " << BLOCK_SIZE_MAX_TEMP_FILE); BEESCOUNT(tmp_trunc); - return create(); + reset(); + return; } if (m_end_offset & BLOCK_MASK_CLONE) { // BEESTRACE("temporary file size " << to_hex(m_end_offset) << " not aligned"); BEESCOUNT(tmp_realign); - return create(); + reset(); + return; } // OK as is BEESCOUNT(tmp_aligned); diff --git a/src/bees.h b/src/bees.h index 7606ba5..f836ede 100644 --- a/src/bees.h +++ b/src/bees.h @@ -8,6 +8,7 @@ #include "crucible/fd.h" #include "crucible/fs.h" #include "crucible/lockset.h" +#include "crucible/pool.h" #include "crucible/progress.h" #include "crucible/time.h" #include "crucible/task.h" @@ -546,6 +547,9 @@ class BeesRoots : public enable_shared_from_this { bool m_workaround_btrfs_send = false; LRUCache m_root_ro_cache; + mutex m_tmpfiles_mutex; + map m_tmpfiles; + mutex m_stop_mutex; condition_variable m_stop_condvar; bool m_stop_requested = false; @@ -572,9 +576,12 @@ class BeesRoots : public enable_shared_from_this { RateEstimator& transid_re(); size_t crawl_batch(shared_ptr crawl); void clear_caches(); + void insert_tmpfile(Fd fd); + void erase_tmpfile(Fd fd); friend class BeesFdCache; friend class BeesCrawl; +friend class BeesTempFile; public: BeesRoots(shared_ptr ctx); @@ -668,10 +675,10 @@ friend ostream & operator<<(ostream &os, const BeesRangePair &brp); class BeesTempFile { shared_ptr m_ctx; + shared_ptr m_roots; Fd m_fd; off_t m_end_offset; - void create(); void realign(); void resize(off_t new_end_offset); @@ -680,6 +687,7 @@ public: BeesTempFile(shared_ptr ctx); BeesFileRange make_hole(off_t count); BeesFileRange make_copy(const BeesFileRange &src); + void reset(); }; class BeesFdCache { @@ -692,7 +700,6 @@ public: BeesFdCache(); Fd open_root(shared_ptr ctx, uint64_t root); Fd open_root_ino(shared_ptr ctx, uint64_t root, uint64_t ino); - void insert_root_ino(shared_ptr ctx, Fd fd); void clear(); }; @@ -715,7 +722,7 @@ class BeesContext : public enable_shared_from_this { shared_ptr m_fd_cache; shared_ptr m_hash_table; shared_ptr m_roots; - map> m_tmpfiles; + Pool m_tmpfile_pool; LRUCache m_resolve_cache; @@ -763,10 +770,11 @@ public: BeesFileRange scan_forward(const BeesFileRange &bfr); bool is_root_ro(uint64_t root); - BeesRangePair dup_extent(const BeesFileRange &src); + BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr &tmpfile); bool dedup(const BeesRangePair &brp); - void blacklist_add(const BeesFileId &fid); + void blacklist_insert(const BeesFileId &fid); + void blacklist_erase(const BeesFileId &fid); bool is_blacklisted(const BeesFileId &fid) const; BeesResolveAddrResult resolve_addr(BeesAddress addr); @@ -786,9 +794,6 @@ public: const Timer &total_timer() const { return m_total_timer; } LockSet &extent_lock_set() { return m_extent_lock_set; } - - // TODO: move the rest of the FD cache methods here - void insert_root_ino(Fd fd); }; class BeesResolver {