1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 13:25:45 +02:00

hash: remove preloaded toxic hash blacklist

Faster and more reliable toxic extent detection means we can now be much
less paranoid about creating toxic extents.

The paranoia has significant impact on dedupe hit rates because every
extent that contains even one toxic hash is abandoned.  The preloaded
toxic hashes were chosen because they occur more frequently than any
other block contents in typical filesystem data.  The combination of these
resulted in as much as 30% of duplicate extents being left untouched.

Remove the preloaded toxic extent blacklist, and rely on the new
kernel-CPU-usage-based workaround instead.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2018-10-31 23:03:01 -04:00
parent 6e6b08ea0e
commit aa74a238b3
2 changed files with 0 additions and 25 deletions

View File

@ -384,25 +384,9 @@ BeesHashTable::fetch_missing_extent_by_hash(HashType hash)
fetch_missing_extent_by_index(extent_index);
}
bool
BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
{
return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
}
vector<BeesHashTable::Cell>
BeesHashTable::find_cell(HashType hash)
{
// This saves a lot of time prefilling the hash table, and there's no risk of eviction
if (is_toxic_hash(hash)) {
BEESCOUNT(hash_toxic);
BeesAddress toxic_addr(0x1000);
toxic_addr.set_toxic();
Cell toxic_cell(hash, toxic_addr);
vector<Cell> rv;
rv.push_back(toxic_cell);
return rv;
}
fetch_missing_extent_by_hash(hash);
BEESTOOLONG("find_cell hash " << BeesHash(hash));
vector<Cell> rv;
@ -716,13 +700,6 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
catch_all([&]() {
m_ctx->blacklist_add(BeesFileId(m_fd));
});
// Skip zero because we already weed that out before it gets near a hash function
for (unsigned i = 1; i < 256; ++i) {
vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
HashType hash = Digest::CRC::crc64(v.data(), v.size());
m_toxic_hashes.insert(hash);
}
}
BeesHashTable::~BeesHashTable()

View File

@ -441,7 +441,6 @@ private:
BeesThread m_writeback_thread;
BeesThread m_prefetch_thread;
RateLimiter m_flush_rate_limit;
set<HashType> m_toxic_hashes;
BeesStringFile m_stats_file;
// Mutex/condvar for the writeback thread
@ -468,7 +467,6 @@ private:
void set_extent_dirty_locked(uint64_t extent_index);
void flush_dirty_extents();
bool flush_dirty_extent(uint64_t extent_index);
bool is_toxic_hash(HashType h) const;
size_t hash_to_extent_index(HashType ht);
unique_lock<mutex> lock_extent_by_hash(HashType ht);