mirror of
https://github.com/Zygo/bees.git
synced 2025-06-17 01:56:16 +02:00
hash: remove preloaded toxic hash blacklist
Faster and more reliable toxic extent detection means we can now be much less paranoid about creating toxic extents. The paranoia has significant impact on dedupe hit rates because every extent that contains even one toxic hash is abandoned. The preloaded toxic hashes were chosen because they occur more frequently than any other block contents in typical filesystem data. The combination of these resulted in as much as 30% of duplicate extents being left untouched. Remove the preloaded toxic extent blacklist, and rely on the new kernel-CPU-usage-based workaround instead. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@ -384,25 +384,9 @@ BeesHashTable::fetch_missing_extent_by_hash(HashType hash)
|
||||
fetch_missing_extent_by_index(extent_index);
|
||||
}
|
||||
|
||||
bool
|
||||
BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
|
||||
{
|
||||
return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
|
||||
}
|
||||
|
||||
vector<BeesHashTable::Cell>
|
||||
BeesHashTable::find_cell(HashType hash)
|
||||
{
|
||||
// This saves a lot of time prefilling the hash table, and there's no risk of eviction
|
||||
if (is_toxic_hash(hash)) {
|
||||
BEESCOUNT(hash_toxic);
|
||||
BeesAddress toxic_addr(0x1000);
|
||||
toxic_addr.set_toxic();
|
||||
Cell toxic_cell(hash, toxic_addr);
|
||||
vector<Cell> rv;
|
||||
rv.push_back(toxic_cell);
|
||||
return rv;
|
||||
}
|
||||
fetch_missing_extent_by_hash(hash);
|
||||
BEESTOOLONG("find_cell hash " << BeesHash(hash));
|
||||
vector<Cell> rv;
|
||||
@ -716,13 +700,6 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
|
||||
catch_all([&]() {
|
||||
m_ctx->blacklist_add(BeesFileId(m_fd));
|
||||
});
|
||||
|
||||
// Skip zero because we already weed that out before it gets near a hash function
|
||||
for (unsigned i = 1; i < 256; ++i) {
|
||||
vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
|
||||
HashType hash = Digest::CRC::crc64(v.data(), v.size());
|
||||
m_toxic_hashes.insert(hash);
|
||||
}
|
||||
}
|
||||
|
||||
BeesHashTable::~BeesHashTable()
|
||||
|
Reference in New Issue
Block a user