From 642581e89aecf5abcb61109b499b1817cc0be79e Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 1 Dec 2016 23:44:11 -0500 Subject: [PATCH] hash: remove the experimental shared hash-table and shared mmap features The experiments are over, and the results were not success. Having two filesystems cohabiting in the same hash table results in a lot of false positives, each of which requires some heavy IO to resolve. Using MAP_SHARED to share a beeshash.dat between processes results in catastrophically bad performance. These features were abandoned long ago, but some of the code--and even worse, its documentation--still remains. Bees wants a hash table false positive rate below 0.1%. With a shared hash table the FP rate is about the same as the dedup rate. Typically duplicate files on one filesystem are duplicate on many filesystems. One or more of Linux VFS and the btrfs mmap(MAP_SHARED) implementation produce extremely poor performance results. A five-order-of-magnitude speedup was achieved by implementing paging in userspace with worker threads. We no longer need the support code for the MAP_SHARED case. It is still possible to run many BeesContexts in a single process, but now the only thing contexts share is the FD cache. --- src/bees-context.cc | 2 -- src/bees-hash.cc | 72 ++++++++++++++++----------------------------- src/bees.h | 6 ---- 3 files changed, 25 insertions(+), 55 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index 10783ca..c0f4e4e 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -247,8 +247,6 @@ BeesContext::BeesContext(shared_ptr parent) : m_parent_ctx(parent) { if (m_parent_ctx) { - m_hash_table = m_parent_ctx->hash_table(); - m_hash_table->set_shared(true); m_fd_cache = m_parent_ctx->fd_cache(); } } diff --git a/src/bees-hash.cc b/src/bees-hash.cc index cec1d3c..da4b6db 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -101,8 +101,6 @@ BeesHashTable::get_extent_range(HashType hash) void BeesHashTable::flush_dirty_extents() { - if (using_shared_map()) return; - THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); unique_lock lock(m_extent_mutex); @@ -124,16 +122,12 @@ BeesHashTable::flush_dirty_extents() uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte; THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr); THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end); - if (using_shared_map()) { - BEESTOOLONG("flush extent " << extent_number); - copy(dirty_extent, dirty_extent_end, dirty_extent); - } else { - BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); - // Page locks slow us down more than copying the data does - vector extent_copy(dirty_extent, dirty_extent_end); - pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); - BEESCOUNT(hash_extent_out); - } + THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT); + BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")"); + // Page locks slow us down more than copying the data does + vector extent_copy(dirty_extent, dirty_extent_end); + pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr); + BEESCOUNT(hash_extent_out); }); BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")"); m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT); @@ -143,7 +137,6 @@ BeesHashTable::flush_dirty_extents() void BeesHashTable::set_extent_dirty(HashType hash) { - if (using_shared_map()) return; THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); auto pr = get_extent_range(hash); uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; @@ -156,10 +149,8 @@ BeesHashTable::set_extent_dirty(HashType hash) void BeesHashTable::writeback_loop() { - if (!using_shared_map()) { - while (1) { - flush_dirty_extents(); - } + while (true) { + flush_dirty_extents(); } } @@ -310,7 +301,6 @@ void BeesHashTable::fetch_missing_extent(HashType hash) { BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash)); - if (using_shared_map()) return; THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0); auto pr = get_extent_range(hash); uint64_t extent_number = reinterpret_cast(pr.first) - m_extent_ptr; @@ -396,7 +386,6 @@ BeesHashTable::find_cell(HashType hash) void BeesHashTable::erase_hash_addr(HashType hash, AddrType addr) { - // if (m_shared) return; fetch_missing_extent(hash); BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr); unique_lock lock(m_bucket_mutex); @@ -573,12 +562,6 @@ BeesHashTable::try_mmap_flags(int flags) } } -void -BeesHashTable::set_shared(bool shared) -{ - m_shared = shared; -} - void BeesHashTable::open_file() { @@ -625,12 +608,6 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t // Sanity checks to protect the implementation from its weaknesses THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0); - // Does the union work? - THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr); - THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr); - THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr); - THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr); - // There's more than one union THROW_CHECK2(runtime_error, sizeof(Bucket), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket)); THROW_CHECK2(runtime_error, sizeof(Bucket::p_byte), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket::p_byte)); @@ -655,27 +632,28 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t BEESLOG("\tflush rate limit " << BEES_FLUSH_RATE); - if (using_shared_map()) { - try_mmap_flags(MAP_SHARED); - } else { - try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS); - } + // Try to mmap that much memory + try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS); if (!m_cell_ptr) { THROW_ERRNO("unable to mmap " << filename); } - if (!using_shared_map()) { - // madvise fails if MAP_SHARED - if (using_any_madvise()) { - // DONTFORK because we sometimes do fork, - // but the child doesn't touch any of the many, many pages - BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)"); - DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK)); - } - for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) { - m_buckets_missing.insert(i); - } + // Do unions work the way we think (and rely on)? + THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr); + THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr); + THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr); + THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr); + + // madvise fails if MAP_SHARED + if (using_any_madvise()) { + // DONTFORK because fork won't end well + BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)"); + DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK)); + } + + for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) { + m_buckets_missing.insert(i); } m_writeback_thread.exec([&]() { diff --git a/src/bees.h b/src/bees.h index 82cbdae..e0dc2eb 100644 --- a/src/bees.h +++ b/src/bees.h @@ -419,8 +419,6 @@ public: void erase_hash_addr(HashType hash, AddrType addr); bool push_front_hash_addr(HashType hash, AddrType addr); - void set_shared(bool shared); - private: string m_filename; Fd m_fd; @@ -456,8 +454,6 @@ private: LockSet m_extent_lock_set; - DefaultBool m_shared; - void open_file(); void writeback_loop(); void prefetch_loop(); @@ -469,8 +465,6 @@ private: void flush_dirty_extents(); bool is_toxic_hash(HashType h) const; - bool using_shared_map() const { return false; } - BeesHashTable(const BeesHashTable &) = delete; BeesHashTable &operator=(const BeesHashTable &) = delete; };