1
0
mirror of https://github.com/Zygo/bees.git synced 2025-07-07 02:42:27 +02:00

bees: use helper function for readahead

There seem to be multiple ways to do readahead in Linux, and only some
of them work.  Hopefully reading the actual data is one of them.

This is an attempt to avoid page-by-page reads in the generic dedupe code.
We load both extents into the VFS cache (read sequentially) and hope they
are still there by the time we call dedupe on them.

We also call readahead(2) and hopefully that either helps or does nothing.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell
2021-05-28 01:58:16 -04:00
parent 0afd2850f4
commit 20b8f8ae0b
4 changed files with 34 additions and 5 deletions

View File

@ -371,6 +371,31 @@ bees_sync(int fd)
BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
}
void
bees_readahead(int const fd, off_t offset, size_t size)
{
Timer readahead_timer;
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
// This might not do anything?
DIE_IF_NON_ZERO(readahead(fd, offset, size));
// Make sure this data is in page cache
// Note spelling: readahead vs read ahead
BEESNOTE("read ahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
while (size) {
static uint8_t dummy[BEES_READAHEAD_SIZE];
size_t this_read_size = min(size, sizeof(dummy));
// Ignore errors and short reads.
// It turns out our size parameter isn't all that accurate.
pread(fd, dummy, this_read_size, offset);
BEESCOUNT(readahead_count);
BEESCOUNTADD(readahead_bytes, this_read_size);
offset += this_read_size;
size -= this_read_size;
}
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
}
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
m_dir_fd(dir_fd),
m_name(name),