1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

hash: remove the experimental shared hash-table and shared mmap features

The experiments are over, and the results were not success.

Having two filesystems cohabiting in the same hash table results in a
lot of false positives, each of which requires some heavy IO to resolve.

Using MAP_SHARED to share a beeshash.dat between processes results in
catastrophically bad performance.

These features were abandoned long ago, but some of the code--and even
worse, its documentation--still remains.

Bees wants a hash table false positive rate below 0.1%.  With a shared
hash table the FP rate is about the same as the dedup rate.  Typically
duplicate files on one filesystem are duplicate on many filesystems.

One or more of Linux VFS and the btrfs mmap(MAP_SHARED) implementation
produce extremely poor performance results.  A five-order-of-magnitude
speedup was achieved by implementing paging in userspace with worker
threads.  We no longer need the support code for the MAP_SHARED case.

It is still possible to run many BeesContexts in a single process,
but now the only thing contexts share is the FD cache.
This commit is contained in:
Zygo Blaxell 2016-12-01 23:44:11 -05:00
parent fdfa78a81b
commit 642581e89a
3 changed files with 25 additions and 55 deletions

View File

@ -247,8 +247,6 @@ BeesContext::BeesContext(shared_ptr<BeesContext> parent) :
m_parent_ctx(parent)
{
if (m_parent_ctx) {
m_hash_table = m_parent_ctx->hash_table();
m_hash_table->set_shared(true);
m_fd_cache = m_parent_ctx->fd_cache();
}
}

View File

@ -101,8 +101,6 @@ BeesHashTable::get_extent_range(HashType hash)
void
BeesHashTable::flush_dirty_extents()
{
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
unique_lock<mutex> lock(m_extent_mutex);
@ -124,16 +122,12 @@ BeesHashTable::flush_dirty_extents()
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
if (using_shared_map()) {
BEESTOOLONG("flush extent " << extent_number);
copy(dirty_extent, dirty_extent_end, dirty_extent);
} else {
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
// Page locks slow us down more than copying the data does
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
BEESCOUNT(hash_extent_out);
}
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
// Page locks slow us down more than copying the data does
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
BEESCOUNT(hash_extent_out);
});
BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
@ -143,7 +137,6 @@ BeesHashTable::flush_dirty_extents()
void
BeesHashTable::set_extent_dirty(HashType hash)
{
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
auto pr = get_extent_range(hash);
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
@ -156,10 +149,8 @@ BeesHashTable::set_extent_dirty(HashType hash)
void
BeesHashTable::writeback_loop()
{
if (!using_shared_map()) {
while (1) {
flush_dirty_extents();
}
while (true) {
flush_dirty_extents();
}
}
@ -310,7 +301,6 @@ void
BeesHashTable::fetch_missing_extent(HashType hash)
{
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
auto pr = get_extent_range(hash);
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
@ -396,7 +386,6 @@ BeesHashTable::find_cell(HashType hash)
void
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
{
// if (m_shared) return;
fetch_missing_extent(hash);
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
unique_lock<mutex> lock(m_bucket_mutex);
@ -573,12 +562,6 @@ BeesHashTable::try_mmap_flags(int flags)
}
}
void
BeesHashTable::set_shared(bool shared)
{
m_shared = shared;
}
void
BeesHashTable::open_file()
{
@ -625,12 +608,6 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
// Sanity checks to protect the implementation from its weaknesses
THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0);
// Does the union work?
THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr);
// There's more than one union
THROW_CHECK2(runtime_error, sizeof(Bucket), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket));
THROW_CHECK2(runtime_error, sizeof(Bucket::p_byte), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket::p_byte));
@ -655,27 +632,28 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
BEESLOG("\tflush rate limit " << BEES_FLUSH_RATE);
if (using_shared_map()) {
try_mmap_flags(MAP_SHARED);
} else {
try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS);
}
// Try to mmap that much memory
try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS);
if (!m_cell_ptr) {
THROW_ERRNO("unable to mmap " << filename);
}
if (!using_shared_map()) {
// madvise fails if MAP_SHARED
if (using_any_madvise()) {
// DONTFORK because we sometimes do fork,
// but the child doesn't touch any of the many, many pages
BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)");
DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK));
}
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
m_buckets_missing.insert(i);
}
// Do unions work the way we think (and rely on)?
THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr);
// madvise fails if MAP_SHARED
if (using_any_madvise()) {
// DONTFORK because fork won't end well
BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)");
DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK));
}
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
m_buckets_missing.insert(i);
}
m_writeback_thread.exec([&]() {

View File

@ -419,8 +419,6 @@ public:
void erase_hash_addr(HashType hash, AddrType addr);
bool push_front_hash_addr(HashType hash, AddrType addr);
void set_shared(bool shared);
private:
string m_filename;
Fd m_fd;
@ -456,8 +454,6 @@ private:
LockSet<uint64_t> m_extent_lock_set;
DefaultBool m_shared;
void open_file();
void writeback_loop();
void prefetch_loop();
@ -469,8 +465,6 @@ private:
void flush_dirty_extents();
bool is_toxic_hash(HashType h) const;
bool using_shared_map() const { return false; }
BeesHashTable(const BeesHashTable &) = delete;
BeesHashTable &operator=(const BeesHashTable &) = delete;
};