diff --git a/src/bees-hash.cc b/src/bees-hash.cc index 60e60e7..91b669d 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -452,6 +452,34 @@ BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index) readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size); } }); + + Cell *cell = m_extent_ptr[extent_index ].p_buckets[0].p_cells; + Cell *cell_end = m_extent_ptr[extent_index + 1].p_buckets[0].p_cells; + size_t toxic_cleared_count = 0; + set seen_it(cell, cell_end); + while (cell < cell_end) { + if (cell->e_addr & BeesAddress::c_toxic_mask) { + ++toxic_cleared_count; + cell->e_addr &= ~BeesAddress::c_toxic_mask; + // Clearing the toxic bit might mean we now have a duplicate. + // This could be due to a race between two + // inserts, one finds the extent toxic while the + // other does not. That's arguably a bug elsewhere, + // but we should rewrite the whole extent lookup/insert + // loop, not spend time fixing code that will be + // thrown out later anyway. + // If there is a cell that is identical to this one + // except for the toxic bit, then we don't need this one. + if (seen_it.count(*cell)) { + cell->e_addr = 0; + cell->e_hash = 0; + } + } + ++cell; + } + if (toxic_cleared_count) { + BEESLOGDEBUG("Cleared " << toxic_cleared_count << " hashes while fetching hash table extent " << extent_index); + } } void diff --git a/src/bees.h b/src/bees.h index 870e2be..ea81116 100644 --- a/src/bees.h +++ b/src/bees.h @@ -93,7 +93,7 @@ const double BEES_DEFAULT_THREAD_FACTOR = 1.0; const double BEES_TOO_LONG = 5.0; // Avoid any extent where LOGICAL_INO takes this much kernel CPU time -const double BEES_TOXIC_SYS_DURATION = 0.1; +const double BEES_TOXIC_SYS_DURATION = 5.0; // Maximum number of refs to a single extent before we have other problems // If we have more than 10K refs to an extent, adding another will save 0.01% space