From a5e2bdff47d7079d022ebfb218edc597ccc03058 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Tue, 12 Sep 2017 02:09:22 +0200 Subject: [PATCH] Skip nocow files to speed up processing If you have a lot of or a few big nocow files (like vm images) which contain a lot of potential deduplication candidates, bees becomes incredibly slow running through a lot "invalid operation" exceptions. Let's just skip over such files to get more bang for the buck. I did no regression testing as this patch seems trivial (and I cannot imagine any pitfalls either). The process progresses much faster for me now. --- include/crucible/fd.h | 6 ++++++ lib/fd.cc | 8 ++++++++ src/bees-roots.cc | 8 ++++++++ 3 files changed, 22 insertions(+) diff --git a/include/crucible/fd.h b/include/crucible/fd.h index 8492846..9818819 100644 --- a/include/crucible/fd.h +++ b/include/crucible/fd.h @@ -13,6 +13,10 @@ #include #include +// ioctl +#include +#include + // socket #include @@ -141,6 +145,8 @@ namespace crucible { Stat &lstat(const string &filename); }; + int ioctl_iflags_get(int fd); + string st_mode_ntoa(mode_t mode); // Because it's not trivial to do correctly diff --git a/lib/fd.cc b/lib/fd.cc index e0735a9..931167f 100644 --- a/lib/fd.cc +++ b/lib/fd.cc @@ -488,6 +488,14 @@ namespace crucible { lstat(filename); } + int + ioctl_iflags_get(int fd) + { + int attr = 0; + DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_GETFLAGS, &attr)); + return attr; + } + string readlink_or_die(const string &path) { diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 921132c..e99dc66 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -576,6 +576,14 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) break; } + int attr = ioctl_iflags_get(rv); + if (attr & FS_NOCOW_FL) { + BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found incompatible flags " << attr << " (FS_NOCOW_FL)"); + rv = Fd(); + BEESCOUNT(open_wrong_flags); + break; + } + // Correct root? auto file_root = btrfs_get_root_id(rv); if (file_root != root) {