1
0
mirror of https://github.com/Zygo/bees.git synced 2025-06-17 01:56:16 +02:00

hash: remove preloaded toxic hash blacklist

Faster and more reliable toxic extent detection means we can now be much
less paranoid about creating toxic extents.

The paranoia has significant impact on dedupe hit rates because every
extent that contains even one toxic hash is abandoned.  The preloaded
toxic hashes were chosen because they occur more frequently than any
other block contents in typical filesystem data.  The combination of these
resulted in as much as 30% of duplicate extents being left untouched.

Remove the preloaded toxic extent blacklist, and rely on the new
kernel-CPU-usage-based workaround instead.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell
2018-10-31 23:03:01 -04:00
parent 6e6b08ea0e
commit aa74a238b3
2 changed files with 0 additions and 25 deletions

View File

@ -384,25 +384,9 @@ BeesHashTable::fetch_missing_extent_by_hash(HashType hash)
fetch_missing_extent_by_index(extent_index);
}
bool
BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
{
return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
}
vector<BeesHashTable::Cell>
BeesHashTable::find_cell(HashType hash)
{
// This saves a lot of time prefilling the hash table, and there's no risk of eviction
if (is_toxic_hash(hash)) {
BEESCOUNT(hash_toxic);
BeesAddress toxic_addr(0x1000);
toxic_addr.set_toxic();
Cell toxic_cell(hash, toxic_addr);
vector<Cell> rv;
rv.push_back(toxic_cell);
return rv;
}
fetch_missing_extent_by_hash(hash);
BEESTOOLONG("find_cell hash " << BeesHash(hash));
vector<Cell> rv;
@ -716,13 +700,6 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
catch_all([&]() {
m_ctx->blacklist_add(BeesFileId(m_fd));
});
// Skip zero because we already weed that out before it gets near a hash function
for (unsigned i = 1; i < 256; ++i) {
vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
HashType hash = Digest::CRC::crc64(v.data(), v.size());
m_toxic_hashes.insert(hash);
}
}
BeesHashTable::~BeesHashTable()