mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
scan_one_extent: eliminate nuisance dedupes, drop caches after reading data
A laundry list of problems fixed: * Track which physical blocks have been read recently without making any changes, and don't read them again. * Separate dedupe, split, and hole-punching operations into distinct planning and execution phases. * Keep the longest dedupe from overlapping dedupe matches, and flatten them into non-overlapping operations. * Don't scan extents that have blocks already in the hash table. We can't (yet) touch such an extent without making unreachable space. Let them go. * Give better information in the scan summary visualization: show dedupe range start and end points (<ddd>), matching blocks (=), copy blocks (+), zero blocks (0), inserted blocks (.), unresolved match blocks (M), should-have-been-inserted-but-for-some-reason-wasn't blocks (i), and there's-a-bug-we-didn't-do-this-one blocks (#). * Drop cached data from extents that have been inserted into the hash table without modification. * Rewrite the hole punching for uncompressed extents, which apparently hasn't worked properly since the beginning. Nuisance dedupe elimination: * Don't do more than 100 dedupe, copy, or hole-punch operations per extent ref. * Don't split an extent or punch a hole unless dedupe would save at least half of the extent ref's size. * Write a "skip:" summary showing the planned work when nuisance dedupe elimination decides to skip an extent. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
97eab9655c
commit
24b08ef7b7
@ -264,6 +264,7 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
||||
// BEESLOG("BeesResolver br(..., " << bfr << ")");
|
||||
BEESTRACE("BeesContext::rewrite_file_range calling BeesResolver " << bfr);
|
||||
BeesResolver br(m_ctx, BeesAddress(bfr.fd(), bfr.begin()));
|
||||
BEESTRACE("BeesContext::rewrite_file_range calling replace_src " << dup_bbd);
|
||||
// BEESLOG("\treplace_src " << dup_bbd);
|
||||
br.replace_src(dup_bbd);
|
||||
BEESCOUNT(scan_rewrite);
|
||||
@ -291,13 +292,35 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
||||
}
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
struct BeesSeenRange {
|
||||
uint64_t bytenr;
|
||||
off_t offset;
|
||||
off_t length;
|
||||
};
|
||||
|
||||
static
|
||||
bool
|
||||
operator<(const BeesSeenRange &bsr1, const BeesSeenRange &bsr2)
|
||||
{
|
||||
return tie(bsr1.bytenr, bsr1.offset, bsr1.length) < tie(bsr2.bytenr, bsr2.offset, bsr2.length);
|
||||
}
|
||||
|
||||
static
|
||||
__attribute__((unused))
|
||||
ostream&
|
||||
operator<<(ostream &os, const BeesSeenRange &tup)
|
||||
{
|
||||
return os << "BeesSeenRange { " << to_hex(tup.bytenr) << ", " << to_hex(tup.offset) << "+" << pretty(tup.length) << " }";
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
{
|
||||
BEESNOTE("Scanning " << pretty(e.size()) << " "
|
||||
<< to_hex(e.begin()) << ".." << to_hex(e.end())
|
||||
<< " " << name_fd(bfr.fd()) );
|
||||
BEESTRACE("scan extent " << e);
|
||||
BEESTRACE("scan bfr " << bfr);
|
||||
BEESCOUNT(scan_extent);
|
||||
|
||||
// We keep moving this method around
|
||||
@ -319,7 +342,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
if (e.flags() & Extent::HOLE) {
|
||||
// Nothing here, dispose of this early
|
||||
BEESCOUNT(scan_hole);
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
|
||||
if (e.flags() & Extent::PREALLOC) {
|
||||
@ -338,38 +361,57 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(dedup_prealloc_hit);
|
||||
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
||||
return bfr;
|
||||
return;
|
||||
} else {
|
||||
BEESCOUNT(dedup_prealloc_miss);
|
||||
}
|
||||
}
|
||||
|
||||
// If we already read this extent and inserted it into the hash table, no need to read it again
|
||||
static mutex s_seen_mutex;
|
||||
unique_lock<mutex> lock_seen(s_seen_mutex);
|
||||
const BeesSeenRange tup = {
|
||||
.bytenr = e.bytenr(),
|
||||
.offset = e.offset(),
|
||||
.length = e.size(),
|
||||
};
|
||||
static set<BeesSeenRange> s_seen;
|
||||
if (s_seen.size() > BEES_MAX_EXTENT_REF_COUNT) {
|
||||
s_seen.clear();
|
||||
BEESCOUNT(scan_seen_clear);
|
||||
}
|
||||
const auto seen_rv = s_seen.find(tup) != s_seen.end();
|
||||
if (!seen_rv) {
|
||||
BEESCOUNT(scan_seen_miss);
|
||||
} else {
|
||||
// BEESLOGDEBUG("Skip " << tup << " " << e);
|
||||
BEESCOUNT(scan_seen_hit);
|
||||
return;
|
||||
}
|
||||
lock_seen.unlock();
|
||||
|
||||
// OK we need to read extent now
|
||||
bees_readahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||
|
||||
map<off_t, pair<BeesHash, BeesAddress>> insert_map;
|
||||
set<off_t> noinsert_set;
|
||||
|
||||
// Hole handling
|
||||
bool extent_compressed = e.flags() & FIEMAP_EXTENT_ENCODED;
|
||||
bool extent_contains_zero = false;
|
||||
bool extent_contains_nonzero = false;
|
||||
|
||||
// Need to replace extent
|
||||
bool rewrite_extent = false;
|
||||
set<off_t> dedupe_set;
|
||||
set<off_t> zero_set;
|
||||
|
||||
// Pretty graphs
|
||||
off_t block_count = ((e.size() + BLOCK_MASK_SUMS) & ~BLOCK_MASK_SUMS) / BLOCK_SIZE_SUMS;
|
||||
BEESTRACE(e << " block_count " << block_count);
|
||||
string bar(block_count, '#');
|
||||
|
||||
for (off_t next_p = e.begin(); next_p < e.end(); ) {
|
||||
// List of dedupes found
|
||||
list<BeesRangePair> dedupe_list;
|
||||
list<BeesFileRange> copy_list;
|
||||
list<pair<BeesHash, BeesAddress>> front_hash_list;
|
||||
list<uint64_t> invalidate_addr_list;
|
||||
|
||||
// Guarantee forward progress
|
||||
off_t p = next_p;
|
||||
next_p += BLOCK_SIZE_SUMS;
|
||||
off_t next_p = e.begin();
|
||||
for (off_t p = e.begin(); p < e.end(); p += BLOCK_SIZE_SUMS) {
|
||||
|
||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
const off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
BeesAddress addr(e, p);
|
||||
|
||||
// This extent should consist entirely of non-magic blocks
|
||||
@ -384,41 +426,29 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
|
||||
// Calculate the hash first because it lets us shortcut on is_data_zero
|
||||
BEESNOTE("scan hash " << bbd);
|
||||
BeesHash hash = bbd.hash();
|
||||
const BeesHash hash = bbd.hash();
|
||||
|
||||
// Weed out zero blocks
|
||||
BEESNOTE("is_data_zero " << bbd);
|
||||
const bool data_is_zero = bbd.is_data_zero();
|
||||
if (data_is_zero) {
|
||||
bar.at(bar_p) = '0';
|
||||
zero_set.insert(p);
|
||||
BEESCOUNT(scan_zero);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Schedule this block for insertion if we decide to keep this extent.
|
||||
BEESCOUNT(scan_hash_preinsert);
|
||||
BEESTRACE("Pushing hash " << hash << " addr " << addr << " bbd " << bbd);
|
||||
insert_map.insert(make_pair(p, make_pair(hash, addr)));
|
||||
bar.at(bar_p) = 'R';
|
||||
bar.at(bar_p) = 'i';
|
||||
|
||||
// Weed out zero blocks
|
||||
BEESNOTE("is_data_zero " << bbd);
|
||||
bool extent_is_zero = bbd.is_data_zero();
|
||||
if (extent_is_zero) {
|
||||
bar.at(bar_p) = '0';
|
||||
if (extent_compressed) {
|
||||
if (!extent_contains_zero) {
|
||||
// BEESLOG("compressed zero bbd " << bbd << "\n\tin extent " << e);
|
||||
}
|
||||
extent_contains_zero = true;
|
||||
// Do not attempt to lookup hash of zero block
|
||||
continue;
|
||||
} else {
|
||||
BEESLOGINFO("zero bbd " << bbd << "\n\tin extent " << e);
|
||||
BEESCOUNT(scan_zero_uncompressed);
|
||||
rewrite_extent = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (extent_contains_zero && !extent_contains_nonzero) {
|
||||
// BEESLOG("compressed nonzero bbd " << bbd << "\n\tin extent " << e);
|
||||
}
|
||||
extent_contains_nonzero = true;
|
||||
}
|
||||
// Ensure we fill in the entire insert_map without skipping any non-zero blocks
|
||||
if (p < next_p) continue;
|
||||
|
||||
BEESNOTE("lookup hash " << bbd);
|
||||
auto found = hash_table->find_cell(hash);
|
||||
const auto found = hash_table->find_cell(hash);
|
||||
BEESCOUNT(scan_lookup);
|
||||
|
||||
set<BeesResolver> resolved_addrs;
|
||||
@ -429,7 +459,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
// are at least two distinct addresses to look at.
|
||||
found_addrs.insert(addr);
|
||||
|
||||
for (auto i : found) {
|
||||
for (const auto &i : found) {
|
||||
BEESTRACE("found (hash, address): " << i);
|
||||
BEESCOUNT(scan_found);
|
||||
|
||||
@ -438,15 +468,21 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
|
||||
BeesAddress found_addr(i.e_addr);
|
||||
|
||||
#if 0
|
||||
// If address already in hash table, move on to next extent.
|
||||
// We've already seen this block and may have made additional references to it.
|
||||
// The current extent is effectively "pinned" and can't be modified any more.
|
||||
// Only extents that are scanned but not modified are inserted, so if there's
|
||||
// a matching hash:address pair in the hash table:
|
||||
// 1. We have already scanned this extent.
|
||||
// 2. We may have already created references to this extent.
|
||||
// 3. We won't scan this extent again.
|
||||
// The current extent is effectively "pinned" and can't be modified
|
||||
// without rescanning all the existing references.
|
||||
if (found_addr.get_physical_or_zero() == addr.get_physical_or_zero()) {
|
||||
// No log message because this happens to many thousands of blocks
|
||||
// when bees is interrupted.
|
||||
// BEESLOGDEBUG("Found matching hash " << hash << " at same address " << addr << ", skipping " << bfr);
|
||||
BEESCOUNT(scan_already);
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Block must have matching EOF alignment
|
||||
if (found_addr.is_unaligned_eof() != addr.is_unaligned_eof()) {
|
||||
@ -467,7 +503,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
// Extents may become non-toxic so give them a chance to expire.
|
||||
// hash_table->push_front_hash_addr(hash, found_addr);
|
||||
BEESCOUNT(scan_toxic_hash);
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
|
||||
// Distinct address, go resolve it
|
||||
@ -488,8 +524,8 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
abandon_extent = true;
|
||||
} else if (!resolved.count()) {
|
||||
BEESCOUNT(scan_resolve_zero);
|
||||
// Didn't find anything, address is dead
|
||||
BEESTRACE("matched hash " << hash << " addr " << addr << " count zero");
|
||||
// Didn't find a block at the table address, address is dead
|
||||
BEESLOGDEBUG("Erasing stale addr " << addr << " hash " << hash);
|
||||
hash_table->erase_hash_addr(hash, found_addr);
|
||||
} else {
|
||||
resolved_addrs.insert(resolved);
|
||||
@ -498,7 +534,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
});
|
||||
|
||||
if (abandon_extent) {
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -510,7 +546,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
BEESCOUNT(matched_2_or_more);
|
||||
}
|
||||
|
||||
// No need to do all this unless there are two or more distinct matches
|
||||
// No need to do all this unless there are one or more distinct matches
|
||||
if (!resolved_addrs.empty()) {
|
||||
bar.at(bar_p) = 'M';
|
||||
BEESCOUNT(matched_1_or_more);
|
||||
@ -519,149 +555,307 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
|
||||
BeesFileRange replaced_bfr;
|
||||
|
||||
BeesAddress last_replaced_addr;
|
||||
for (auto it = resolved_addrs.begin(); it != resolved_addrs.end(); ++it) {
|
||||
// FIXME: Need to terminate this loop on replace_dst exception condition
|
||||
// catch_all([&]() {
|
||||
auto it_copy = *it;
|
||||
BEESNOTE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
||||
BEESTRACE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
||||
replaced_bfr = it_copy.replace_dst(bbd);
|
||||
BEESTRACE("next_p " << to_hex(next_p) << " -> replaced_bfr " << replaced_bfr);
|
||||
|
||||
// If we didn't find this hash where the hash table said it would be,
|
||||
// correct the hash table.
|
||||
if (it_copy.found_hash()) {
|
||||
BEESCOUNT(scan_hash_hit);
|
||||
} else {
|
||||
// BEESLOGDEBUG("erase src hash " << hash << " addr " << it_copy.addr());
|
||||
BEESCOUNT(scan_hash_miss);
|
||||
hash_table->erase_hash_addr(hash, it_copy.addr());
|
||||
}
|
||||
|
||||
if (it_copy.found_dup()) {
|
||||
BEESCOUNT(scan_dup_hit);
|
||||
|
||||
// FIXME: we will thrash if we let multiple references to identical blocks
|
||||
// exist in the hash table. Erase all but the last one.
|
||||
if (last_replaced_addr) {
|
||||
BEESLOGINFO("Erasing redundant hash " << hash << " addr " << last_replaced_addr);
|
||||
hash_table->erase_hash_addr(hash, last_replaced_addr);
|
||||
BEESCOUNT(scan_erase_redundant);
|
||||
}
|
||||
last_replaced_addr = it_copy.addr();
|
||||
|
||||
// Invalidate resolve cache so we can count refs correctly
|
||||
m_ctx->invalidate_addr(it_copy.addr());
|
||||
m_ctx->invalidate_addr(bbd.addr());
|
||||
|
||||
// Remove deduped blocks from insert map
|
||||
THROW_CHECK0(runtime_error, replaced_bfr);
|
||||
for (off_t ip = replaced_bfr.begin(); ip < replaced_bfr.end(); ip += BLOCK_SIZE_SUMS) {
|
||||
BEESCOUNT(scan_dup_block);
|
||||
noinsert_set.insert(ip);
|
||||
if (ip >= e.begin() && ip < e.end()) {
|
||||
off_t bar_p = (ip - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
bar.at(bar_p) = 'd';
|
||||
}
|
||||
}
|
||||
|
||||
// next_p may be past EOF so check p only
|
||||
THROW_CHECK2(runtime_error, p, replaced_bfr, p < replaced_bfr.end());
|
||||
|
||||
BEESCOUNT(scan_bump);
|
||||
next_p = replaced_bfr.end();
|
||||
} else {
|
||||
BEESCOUNT(scan_dup_miss);
|
||||
}
|
||||
// });
|
||||
}
|
||||
if (last_replaced_addr) {
|
||||
auto it_copy = *it;
|
||||
BEESNOTE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
||||
BEESTRACE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
||||
// If we replaced extents containing the incoming addr,
|
||||
// push the addr we kept to the front of the hash LRU.
|
||||
hash_table->push_front_hash_addr(hash, last_replaced_addr);
|
||||
BEESCOUNT(scan_push_front);
|
||||
auto replaced_brp = it_copy.replace_dst(bbd);
|
||||
replaced_bfr = replaced_brp.second;
|
||||
BEESTRACE("next_p " << to_hex(next_p) << " -> replaced_bfr " << replaced_bfr);
|
||||
|
||||
// If we did find a block, but not this hash, correct the hash table
|
||||
if (it_copy.found_hash()) {
|
||||
BEESCOUNT(scan_hash_hit);
|
||||
} else {
|
||||
BEESLOGDEBUG("Erasing stale hash " << hash << " addr " << it_copy.addr());
|
||||
hash_table->erase_hash_addr(hash, it_copy.addr());
|
||||
BEESCOUNT(scan_hash_miss);
|
||||
}
|
||||
|
||||
if (it_copy.found_dup()) {
|
||||
THROW_CHECK0(runtime_error, replaced_bfr);
|
||||
BEESCOUNT(scan_dup_hit);
|
||||
|
||||
// Save this match. If a better match is found later,
|
||||
// it will be replaced. (FIXME: not really...)
|
||||
dedupe_list.push_back(replaced_brp);
|
||||
|
||||
// Push matching block to front of LRU
|
||||
front_hash_list.push_back(make_pair(hash, it_copy.addr()));
|
||||
|
||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
bar.at(bar_p) = '=';
|
||||
|
||||
// Invalidate resolve cache so we can count refs correctly
|
||||
invalidate_addr_list.push_back(it_copy.addr());
|
||||
invalidate_addr_list.push_back(bbd.addr());
|
||||
|
||||
// next_p may be past EOF so check p only
|
||||
THROW_CHECK2(runtime_error, p, replaced_bfr, p < replaced_bfr.end());
|
||||
|
||||
// We may find duplicate ranges of various lengths, so make sure
|
||||
// we don't pick a smaller one
|
||||
next_p = max(next_p, replaced_bfr.end());
|
||||
} else {
|
||||
BEESCOUNT(scan_dup_miss);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
BEESCOUNT(matched_0);
|
||||
}
|
||||
}
|
||||
|
||||
// If the extent was compressed and all zeros, nuke entire thing
|
||||
if (!rewrite_extent && (extent_contains_zero && !extent_contains_nonzero)) {
|
||||
rewrite_extent = true;
|
||||
BEESCOUNT(scan_zero_compressed);
|
||||
bool force_insert = false;
|
||||
|
||||
// We don't want to punch holes into compressed extents, unless:
|
||||
// 1. There was dedupe of non-zero blocks, so we always have to copy the rest of the extent
|
||||
// 2. The entire extent is zero and the whole thing can be replaced with a single hole
|
||||
const bool extent_compressed = e.flags() & FIEMAP_EXTENT_ENCODED;
|
||||
if (extent_compressed && dedupe_list.empty() && !insert_map.empty()) {
|
||||
// BEESLOGDEBUG("Compressed extent with non-zero data and no dedupe, skipping");
|
||||
BEESCOUNT(scan_compressed_no_dedup);
|
||||
force_insert = true;
|
||||
}
|
||||
|
||||
// If we deduped any blocks then we must rewrite the remainder of the extent
|
||||
if (!noinsert_set.empty()) {
|
||||
rewrite_extent = true;
|
||||
// FIXME: dedupe_list contains a lot of overlapping matches. Get rid of all but one.
|
||||
list<BeesRangePair> dedupe_list_out;
|
||||
dedupe_list.sort([](const BeesRangePair &a, const BeesRangePair &b) {
|
||||
return b.second.size() < a.second.size();
|
||||
});
|
||||
// Shorten each dedupe brp by removing any overlap with earlier (longer) extents in list
|
||||
for (auto i : dedupe_list) {
|
||||
bool insert_i = true;
|
||||
BEESTRACE("i = " << i << " insert_i " << insert_i);
|
||||
for (const auto &j : dedupe_list_out) {
|
||||
BEESTRACE("j = " << j);
|
||||
// No overlap, try next one
|
||||
if (j.second.end() <= i.second.begin() || j.second.begin() >= i.second.end()) {
|
||||
continue;
|
||||
}
|
||||
// j fully overlaps or is the same as i, drop i
|
||||
if (j.second.begin() <= i.second.begin() && j.second.end() >= i.second.end()) {
|
||||
insert_i = false;
|
||||
break;
|
||||
}
|
||||
// i begins outside j, i ends inside j, remove the end of i
|
||||
if (i.second.end() > j.second.begin() && i.second.begin() <= j.second.begin()) {
|
||||
const auto delta = i.second.end() - j.second.begin();
|
||||
if (delta == i.second.size()) {
|
||||
insert_i = false;
|
||||
break;
|
||||
}
|
||||
i.shrink_end(delta);
|
||||
continue;
|
||||
}
|
||||
// i begins inside j, ends outside j, remove the begin of i
|
||||
if (i.second.begin() < j.second.end() && i.second.end() >= j.second.end()) {
|
||||
const auto delta = j.second.end() - i.second.begin();
|
||||
if (delta == i.second.size()) {
|
||||
insert_i = false;
|
||||
break;
|
||||
}
|
||||
i.shrink_begin(delta);
|
||||
continue;
|
||||
}
|
||||
// i fully overlaps j, split i into two parts, push the other part onto dedupe_list
|
||||
if (j.second.begin() > i.second.begin() && j.second.end() < i.second.end()) {
|
||||
auto other_i = i;
|
||||
const auto end_left_delta = i.second.end() - j.second.begin();
|
||||
const auto begin_right_delta = i.second.begin() - j.second.end();
|
||||
i.shrink_end(end_left_delta);
|
||||
other_i.shrink_begin(begin_right_delta);
|
||||
dedupe_list.push_back(other_i);
|
||||
continue;
|
||||
}
|
||||
// None of the sbove. Oops!
|
||||
THROW_CHECK0(runtime_error, false);
|
||||
}
|
||||
if (insert_i) {
|
||||
dedupe_list_out.push_back(i);
|
||||
}
|
||||
}
|
||||
dedupe_list = dedupe_list_out;
|
||||
dedupe_list_out.clear();
|
||||
|
||||
// Count total dedupes
|
||||
uint64_t bytes_deduped = 0;
|
||||
for (const auto &i : dedupe_list) {
|
||||
// Remove deduped blocks from insert map and zero map
|
||||
for (off_t ip = i.second.begin(); ip < i.second.end(); ip += BLOCK_SIZE_SUMS) {
|
||||
BEESCOUNT(scan_dup_block);
|
||||
dedupe_set.insert(ip);
|
||||
zero_set.erase(ip);
|
||||
}
|
||||
bytes_deduped += i.second.size();
|
||||
}
|
||||
|
||||
// If we need to replace part of the extent, rewrite all instances of it
|
||||
if (rewrite_extent) {
|
||||
bool blocks_rewritten = false;
|
||||
// Copy all blocks of the extent that were not deduped or zero, but don't copy an entire extent
|
||||
uint64_t bytes_zeroed = 0;
|
||||
if (!force_insert) {
|
||||
BEESTRACE("Rewriting extent " << e);
|
||||
off_t last_p = e.begin();
|
||||
off_t p = last_p;
|
||||
off_t next_p;
|
||||
off_t next_p = last_p;
|
||||
BEESTRACE("next_p " << to_hex(next_p) << " p " << to_hex(p) << " last_p " << to_hex(last_p));
|
||||
for (next_p = e.begin(); next_p < e.end(); ) {
|
||||
p = next_p;
|
||||
next_p += BLOCK_SIZE_SUMS;
|
||||
next_p = min(next_p + BLOCK_SIZE_SUMS, e.end());
|
||||
|
||||
// BEESLOG("noinsert_set.count(" << to_hex(p) << ") " << noinsert_set.count(p));
|
||||
if (noinsert_set.count(p)) {
|
||||
// Can't be both dedupe and zero
|
||||
THROW_CHECK2(runtime_error, zero_set.count(p), dedupe_set.count(p), zero_set.count(p) + dedupe_set.count(p) < 2);
|
||||
if (zero_set.count(p)) {
|
||||
bytes_zeroed += next_p - p;
|
||||
}
|
||||
// BEESLOG("dedupe_set.count(" << to_hex(p) << ") " << dedupe_set.count(p));
|
||||
if (dedupe_set.count(p)) {
|
||||
if (p - last_p > 0) {
|
||||
rewrite_file_range(BeesFileRange(bfr.fd(), last_p, p));
|
||||
blocks_rewritten = true;
|
||||
THROW_CHECK2(runtime_error, p, e.end(), p <= e.end());
|
||||
copy_list.push_back(BeesFileRange(bfr.fd(), last_p, p));
|
||||
}
|
||||
last_p = next_p;
|
||||
} else {
|
||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
bar.at(bar_p) = '+';
|
||||
}
|
||||
}
|
||||
BEESTRACE("last");
|
||||
if (next_p - last_p > 0) {
|
||||
rewrite_file_range(BeesFileRange(bfr.fd(), last_p, next_p));
|
||||
blocks_rewritten = true;
|
||||
}
|
||||
if (blocks_rewritten) {
|
||||
// Nothing left to insert, all blocks clobbered
|
||||
insert_map.clear();
|
||||
} else {
|
||||
// BEESLOG("No blocks rewritten");
|
||||
BEESCOUNT(scan_no_rewrite);
|
||||
if (next_p > last_p) {
|
||||
THROW_CHECK2(runtime_error, next_p, e.end(), next_p <= e.end());
|
||||
copy_list.push_back(BeesFileRange(bfr.fd(), last_p, next_p));
|
||||
}
|
||||
}
|
||||
|
||||
// We did not rewrite the extent and it contained data, so insert it.
|
||||
for (auto i : insert_map) {
|
||||
off_t bar_p = (i.first - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
BEESTRACE("e " << e << "bar_p = " << bar_p << " i.first-e.begin() " << i.first - e.begin() << " i.second " << i.second.first << ", " << i.second.second);
|
||||
if (noinsert_set.count(i.first)) {
|
||||
// FIXME: we removed one reference to this copy. Avoid thrashing?
|
||||
hash_table->erase_hash_addr(i.second.first, i.second.second);
|
||||
// Block was clobbered, do not insert
|
||||
// Will look like 'Ddddd' because we skip deduped blocks
|
||||
bar.at(bar_p) = 'D';
|
||||
BEESCOUNT(inserted_clobbered);
|
||||
// Don't copy an entire extent
|
||||
if (!bytes_zeroed && copy_list.size() == 1 && copy_list.begin()->size() == e.size()) {
|
||||
copy_list.clear();
|
||||
}
|
||||
|
||||
// Count total copies
|
||||
uint64_t bytes_copied = 0;
|
||||
for (const auto &i : copy_list) {
|
||||
bytes_copied += i.size();
|
||||
}
|
||||
|
||||
BEESTRACE("bar: " << bar);
|
||||
|
||||
// Don't do nuisance dedupes part 1: free more blocks than we create
|
||||
THROW_CHECK3(runtime_error, bytes_copied, bytes_zeroed, bytes_deduped, bytes_copied >= bytes_zeroed);
|
||||
const auto cost_copy = bytes_copied - bytes_zeroed;
|
||||
const auto gain_dedupe = bytes_deduped + bytes_zeroed;
|
||||
if (cost_copy > gain_dedupe) {
|
||||
BEESLOGDEBUG("Too many bytes copied (" << pretty(bytes_copied) << ") for bytes deduped (" << pretty(bytes_deduped) << ") and holes punched (" << pretty(bytes_zeroed) << "), skipping extent");
|
||||
BEESCOUNT(scan_skip_bytes);
|
||||
force_insert = true;
|
||||
}
|
||||
|
||||
// Don't do nuisance dedupes part 2: nobody needs more than 100 dedupe/copy ops in one extent
|
||||
if (dedupe_list.size() + copy_list.size() > 100) {
|
||||
BEESLOGDEBUG("Too many dedupe (" << dedupe_list.size() << ") and copy (" << copy_list.size() << ") operations, skipping extent");
|
||||
BEESCOUNT(scan_skip_ops);
|
||||
force_insert = true;
|
||||
}
|
||||
|
||||
// Track whether we rewrote anything
|
||||
bool extent_modified = false;
|
||||
|
||||
// If we didn't delete the dedupe list, do the dedupes now
|
||||
for (const auto &i : dedupe_list) {
|
||||
BEESNOTE("dedup " << i);
|
||||
if (force_insert || m_ctx->dedup(i)) {
|
||||
BEESCOUNT(replacedst_dedup_hit);
|
||||
THROW_CHECK0(runtime_error, i.second);
|
||||
for (off_t ip = i.second.begin(); ip < i.second.end(); ip += BLOCK_SIZE_SUMS) {
|
||||
if (ip >= e.begin() && ip < e.end()) {
|
||||
off_t bar_p = (ip - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
if (bar.at(bar_p) != '=') {
|
||||
if (ip == i.second.begin()) {
|
||||
bar.at(bar_p) = '<';
|
||||
} else if (ip + BLOCK_SIZE_SUMS >= i.second.end()) {
|
||||
bar.at(bar_p) = '>';
|
||||
} else {
|
||||
bar.at(bar_p) = 'd';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
extent_modified = !force_insert;
|
||||
} else {
|
||||
BEESLOGINFO("dedup failed: " << i);
|
||||
BEESCOUNT(replacedst_dedup_miss);
|
||||
// User data changed while we were looking up the extent, or we have a bug.
|
||||
// We can't fix this, but we can immediately stop wasting effort.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Then the copy/rewrites
|
||||
for (const auto &i : copy_list) {
|
||||
if (!force_insert) {
|
||||
rewrite_file_range(i);
|
||||
extent_modified = true;
|
||||
}
|
||||
for (auto p = i.begin(); p < i.end(); p += BLOCK_SIZE_SUMS) {
|
||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
// Leave zeros as-is because they aren't really copies
|
||||
if (bar.at(bar_p) != '0') {
|
||||
bar.at(bar_p) = '+';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!force_insert) {
|
||||
// Push matched hashes to front
|
||||
for (const auto &i : front_hash_list) {
|
||||
hash_table->push_front_hash_addr(i.first, i.second);
|
||||
BEESCOUNT(scan_push_front);
|
||||
}
|
||||
// Invalidate cached resolves
|
||||
for (const auto &i : invalidate_addr_list) {
|
||||
m_ctx->invalidate_addr(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't insert hashes pointing to an extent we just deleted
|
||||
if (!extent_modified) {
|
||||
// We did not rewrite the extent and it contained data, so insert it.
|
||||
// BEESLOGDEBUG("Inserting " << insert_map.size() << " hashes from " << bfr);
|
||||
for (const auto &i : insert_map) {
|
||||
hash_table->push_random_hash_addr(i.second.first, i.second.second);
|
||||
bar.at(bar_p) = '.';
|
||||
BEESCOUNT(inserted_block);
|
||||
off_t bar_p = (i.first - e.begin()) / BLOCK_SIZE_SUMS;
|
||||
if (bar.at(bar_p) == 'i') {
|
||||
bar.at(bar_p) = '.';
|
||||
}
|
||||
BEESCOUNT(scan_hash_insert);
|
||||
}
|
||||
}
|
||||
|
||||
// Visualize
|
||||
if (bar != string(block_count, '.')) {
|
||||
BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
||||
BEESLOGINFO(
|
||||
(force_insert ? "skip" : "scan") << ": "
|
||||
<< pretty(e.size()) << " "
|
||||
<< dedupe_list.size() << "d" << copy_list.size() << "c"
|
||||
<< ((bytes_zeroed + BLOCK_SIZE_SUMS - 1) / BLOCK_SIZE_SUMS) << "p {"
|
||||
<< to_hex(e.bytenr()) << "+" << to_hex(e.offset()) << "} "
|
||||
<< to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end())
|
||||
<< ' ' << name_fd(bfr.fd())
|
||||
);
|
||||
}
|
||||
|
||||
// Costs 10% on benchmarks
|
||||
// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||
return bfr;
|
||||
// Put this extent into the recently seen list if we didn't rewrite it,
|
||||
// and remove it if we did.
|
||||
lock_seen.lock();
|
||||
if (extent_modified) {
|
||||
s_seen.erase(tup);
|
||||
BEESCOUNT(scan_seen_erase);
|
||||
} else {
|
||||
// BEESLOGDEBUG("Seen " << tup << " " << e);
|
||||
s_seen.insert(tup);
|
||||
BEESCOUNT(scan_seen_insert);
|
||||
}
|
||||
lock_seen.unlock();
|
||||
|
||||
// Still hurts benchmarks...or does it?
|
||||
bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||
}
|
||||
|
||||
shared_ptr<Exclusion>
|
||||
@ -725,6 +919,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
}
|
||||
Timer one_extent_timer;
|
||||
scan_one_extent(bfr, e);
|
||||
// BEESLOGDEBUG("Scanned " << e << " " << bfr);
|
||||
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
|
||||
BEESCOUNT(scanf_extent);
|
||||
});
|
||||
|
@ -384,7 +384,7 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
|
||||
return stop_now;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesRangePair
|
||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||
{
|
||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
||||
@ -400,6 +400,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||
BEESTRACE("overlap_bfr " << overlap_bfr);
|
||||
|
||||
BeesBlockData bbd(dst_bfr);
|
||||
BeesRangePair rv = { BeesFileRange(), BeesFileRange() };
|
||||
|
||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
||||
// Open src
|
||||
@ -436,21 +437,12 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||
BEESCOUNT(replacedst_grown);
|
||||
}
|
||||
|
||||
// Dedup
|
||||
BEESNOTE("dedup " << brp);
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(replacedst_dedup_hit);
|
||||
m_found_dup = true;
|
||||
overlap_bfr = brp.second;
|
||||
// FIXME: find best range first, then dedupe that
|
||||
return true; // i.e. break
|
||||
} else {
|
||||
BEESCOUNT(replacedst_dedup_miss);
|
||||
return false; // i.e. continue
|
||||
}
|
||||
rv = brp;
|
||||
m_found_dup = true;
|
||||
return true;
|
||||
});
|
||||
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
||||
return overlap_bfr.copy_closed();
|
||||
return rv;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
|
@ -749,7 +749,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
||||
|
||||
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
||||
|
||||
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||
void scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||
void rewrite_file_range(const BeesFileRange &bfr);
|
||||
|
||||
public:
|
||||
@ -842,7 +842,7 @@ public:
|
||||
BeesFileRange find_one_match(BeesHash hash);
|
||||
|
||||
void replace_src(const BeesFileRange &src_bfr);
|
||||
BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
|
||||
BeesRangePair replace_dst(const BeesFileRange &dst_bfr);
|
||||
|
||||
bool found_addr() const { return m_found_addr; }
|
||||
bool found_data() const { return m_found_data; }
|
||||
|
Loading…
x
Reference in New Issue
Block a user