diff --git a/src/bees-context.cc b/src/bees-context.cc index 27a8923..992c61d 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -344,7 +344,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) } // OK we need to read extent now - readahead(bfr.fd(), bfr.begin(), bfr.size()); + bees_readahead(bfr.fd(), bfr.begin(), bfr.size()); map> insert_map; set noinsert_set; diff --git a/src/bees-types.cc b/src/bees-types.cc index 1bc9380..f3f1c86 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -385,8 +385,8 @@ BeesRangePair::grow(shared_ptr ctx, bool constrained) BEESTRACE("e_second " << e_second); // Preread entire extent - readahead(second.fd(), e_second.begin(), e_second.size()); - readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size()); + bees_readahead(second.fd(), e_second.begin(), e_second.size()); + bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size()); auto hash_table = ctx->hash_table(); @@ -405,7 +405,7 @@ BeesRangePair::grow(shared_ptr ctx, bool constrained) BEESCOUNT(pairbackward_hole); break; } - readahead(second.fd(), e_second.begin(), e_second.size()); + bees_readahead(second.fd(), e_second.begin(), e_second.size()); #else // This tends to repeatedly process extents that were recently processed. // We tend to catch duplicate blocks early since we scan them forwards. @@ -514,7 +514,7 @@ BeesRangePair::grow(shared_ptr ctx, bool constrained) BEESCOUNT(pairforward_hole); break; } - readahead(second.fd(), e_second.begin(), e_second.size()); + bees_readahead(second.fd(), e_second.begin(), e_second.size()); } BEESCOUNT(pairforward_try); diff --git a/src/bees.cc b/src/bees.cc index 03b8215..c61eec3 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -371,6 +371,31 @@ bees_sync(int fd) BEESCOUNTADD(sync_ms, sync_timer.age() * 1000); } +void +bees_readahead(int const fd, off_t offset, size_t size) +{ + Timer readahead_timer; + BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size)); + BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size)); + // This might not do anything? + DIE_IF_NON_ZERO(readahead(fd, offset, size)); + // Make sure this data is in page cache + // Note spelling: readahead vs read ahead + BEESNOTE("read ahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size)); + while (size) { + static uint8_t dummy[BEES_READAHEAD_SIZE]; + size_t this_read_size = min(size, sizeof(dummy)); + // Ignore errors and short reads. + // It turns out our size parameter isn't all that accurate. + pread(fd, dummy, this_read_size, offset); + BEESCOUNT(readahead_count); + BEESCOUNTADD(readahead_bytes, this_read_size); + offset += this_read_size; + size -= this_read_size; + } + BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000); +} + BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) : m_dir_fd(dir_fd), m_name(name), diff --git a/src/bees.h b/src/bees.h index 19b53d8..f993131 100644 --- a/src/bees.h +++ b/src/bees.h @@ -119,6 +119,9 @@ const bool BEES_SERIALIZE_RESOLVE = false; // Workaround for tree mod log bugs const bool BEES_SERIALIZE_BALANCE = false; +// Workaround for silly dedupe / ineffective readahead behavior +const size_t BEES_READAHEAD_SIZE = 1024 * 1024; + // Flags const int FLAGS_OPEN_COMMON = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY; const int FLAGS_OPEN_DIR = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY; @@ -880,6 +883,7 @@ extern const char *BEES_USAGE; extern const char *BEES_VERSION; string pretty(double d); void bees_sync(int fd); +void bees_readahead(int fd, off_t offset, size_t size); string format_time(time_t t); #endif