diff --git a/docs/event-counters.md b/docs/event-counters.md index be2ace4..fa90d5d 100644 --- a/docs/event-counters.md +++ b/docs/event-counters.md @@ -333,6 +333,7 @@ The `scan` event group consists of operations related to scanning incoming data. * `scan_eof`: Scan past EOF was attempted. * `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe. * `scan_extent`: An extent was scanned (`scan_one_extent`). + * `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned. No action is currently taken for these--they are merely counted. * `scan_forward`: A logical byte range was scanned (`scan_forward`). * `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem. * `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table. diff --git a/src/bees-context.cc b/src/bees-context.cc index ff08863..fe1020d 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -292,6 +292,15 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) BEESTRACE("scan extent " << e); BEESCOUNT(scan_extent); + // EXPERIMENT: Don't bother with tiny extents unless they are the entire file. + // We'll take a tiny extent at BOF or EOF but not in between. + if (e.begin() && e.size() < 128 * 1024 && e.end() != Stat(bfr.fd()).st_size) { + BEESCOUNT(scan_extent_tiny); + // This doesn't work properly with the current architecture, + // so we don't do an early return here. + // return bfr; + } + // We keep moving this method around auto m_ctx = shared_from_this();