1
0
mirror of https://github.com/Zygo/bees.git synced 2025-08-02 13:53:28 +02:00

1 Commits

Author SHA1 Message Date
Zygo Blaxell
11f69ff6c1 fanotify-watch: Not really part of Bees, but a useful tool nonetheless 2016-11-18 12:48:40 -05:00
19 changed files with 141 additions and 143 deletions

View File

@@ -1,52 +1,30 @@
BEES
====
Best-Effort Extent-Same, a btrfs dedup agent.
Best-Effort Extent-Same, a btrfs deduplication daemon.
About Bees
----------
Bees is a block-oriented userspace dedup agent designed to avoid
scalability problems on large filesystems.
Bees is a daemon designed to run continuously on live file servers.
Bees scans and deduplicates whole filesystems in a single pass instead
of separate scan and dedup phases. RAM usage does _not_ depend on
unique data size or the number of input files. Hash tables and scan
progress are stored persistently so the daemon can resume after a reboot.
Bees uses the Linux kernel's `dedupe_file_range` feature to ensure data
is handled safely even if other applications concurrently modify it.
Bees is designed to degrade gracefully when underprovisioned with RAM.
Bees does not use more RAM or storage as filesystem data size increases.
The dedup hash table size is fixed at creation time and does not change.
The effective dedup block size is dynamic and adjusts automatically to
fit the hash table into the configured RAM limit. Hash table overflow
is not implemented to eliminate the IO overhead of hash table overflow.
Hash table entries are only 16 bytes per dedup block to keep the average
dedup block size small.
Bees is intentionally btrfs-specific for performance and capability.
Bees uses the btrfs `SEARCH_V2` ioctl to scan for new data without the
overhead of repeatedly walking filesystem trees with the POSIX API.
Bees uses `LOGICAL_INO` and `INO_PATHS` to leverage btrfs's existing
metadata instead of building its own redundant data structures.
Bees can cope with Btrfs filesystem compression. Bees can reassemble
Btrfs extents to deduplicate extents that contain a mix of duplicate
and unique data blocks.
Bees does not require alignment between dedup blocks or extent boundaries
(i.e. it can handle any multiple-of-4K offset between dup block pairs).
Bees rearranges blocks into shared and unique extents if required to
work within current btrfs kernel dedup limitations.
Bees can dedup any combination of compressed and uncompressed extents.
Bees operates in a single pass which removes duplicate extents immediately
during scan. There are no separate scanning and dedup phases.
Bees uses only data-safe btrfs kernel operations, so it can dedup live
data (e.g. build servers, sqlite databases, VM disk images). It does
not modify file attributes or timestamps.
Bees does not store any information about filesystem structure, so it is
not affected by the number or size of files (except to the extent that
these cause performance problems for btrfs in general). It retrieves such
information on demand through btrfs SEARCH_V2 and LOGICAL_INO ioctls.
This eliminates the storage required to maintain the equivalents of
these functions in userspace. It's also why bees has no XFS support.
Bees is a daemon designed to run continuously and maintain its state
across crahes and reboots. Bees uses checkpoints for persistence to
eliminate the IO overhead of a transactional data store. On restart,
bees will dedup any data that was added to the filesystem since the
last checkpoint.
Bees is used to dedup filesystems ranging in size from 16GB to 35TB, with
hash tables ranging in size from 128MB to 11GB.
Bees includes a number of workarounds for Btrfs kernel bugs to (try to)
avoid ruining your day. You're welcome.
How Bees Works
--------------
@@ -292,10 +270,9 @@ Not really a bug, but a gotcha nonetheless:
Requirements
------------
* C++11 compiler (tested with GCC 4.9 and 6.2.0)
* C++11 compiler (tested with GCC 4.9)
Sorry. I really like closures and shared_ptr, so support
for earlier compiler versions is unlikely.
Sorry. I really like closures.
* btrfs-progs (tested with 4.1..4.7)

View File

@@ -8,7 +8,6 @@
#include <map>
#include <mutex>
#include <tuple>
#include <vector>
namespace crucible {
using namespace std;

View File

@@ -86,6 +86,16 @@ namespace crucible {
}
};
template <>
struct ChatterTraits<ostream &> {
Chatter &
operator()(Chatter &c, ostream & arg)
{
c.get_os() << arg;
return c;
}
};
class ChatterBox {
string m_file;
int m_line;

View File

@@ -120,9 +120,6 @@ namespace crucible {
template<> void pread_or_die<string>(int fd, string& str, off_t offset);
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset);
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset);
template<> void pwrite_or_die<string>(int fd, const string& str, off_t offset);
template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset);
template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset);
// A different approach to reading a simple string
string read_string(int fd, size_t size);

View File

@@ -171,7 +171,7 @@ namespace crucible {
ostream & operator<<(ostream &os, const BtrfsIoctlSearchKey &key);
string btrfs_search_type_ntoa(unsigned type);
string btrfs_search_objectid_ntoa(uint64_t objectid);
string btrfs_search_objectid_ntoa(unsigned objectid);
uint64_t btrfs_get_root_id(int fd);
uint64_t btrfs_get_root_transid(int fd);

View File

@@ -7,12 +7,12 @@ namespace crucible {
using namespace std;
struct bits_ntoa_table {
unsigned long long n;
unsigned long long mask;
unsigned long n;
unsigned long mask;
const char *a;
};
string bits_ntoa(unsigned long long n, const bits_ntoa_table *a);
string bits_ntoa(unsigned long n, const bits_ntoa_table *a);
};

View File

@@ -23,7 +23,7 @@ namespace crucible {
private:
struct Item {
Timestamp m_time;
unsigned long m_id;
unsigned m_id;
Task m_task;
bool operator<(const Item &that) const {

View File

@@ -15,7 +15,7 @@
namespace crucible {
using namespace std;
static shared_ptr<set<string>> chatter_names;
static auto_ptr<set<string>> chatter_names;
static const char *SPACETAB = " \t";
static

View File

@@ -72,10 +72,14 @@ namespace crucible {
catch_all([&]() {
parent_fd->close();
import_fd_fn(child_fd);
// system("ls -l /proc/$$/fd/ >&2");
rv = f();
});
_exit(rv);
cerr << "PID " << getpid() << " TID " << gettid() << "STILL ALIVE" << endl;
system("ls -l /proc/$$/task/ >&2");
exit(EXIT_FAILURE);
}
}

View File

@@ -426,27 +426,6 @@ namespace crucible {
return pread_or_die(fd, text.data(), text.size(), offset);
}
template<>
void
pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t> &text, off_t offset)
{
return pwrite_or_die(fd, text.data(), text.size(), offset);
}
template<>
void
pwrite_or_die<vector<char>>(int fd, const vector<char> &text, off_t offset)
{
return pwrite_or_die(fd, text.data(), text.size(), offset);
}
template<>
void
pwrite_or_die<string>(int fd, const string &text, off_t offset)
{
return pwrite_or_die(fd, text.data(), text.size(), offset);
}
Stat::Stat()
{
memset_zero<stat>(this);

View File

@@ -834,7 +834,7 @@ namespace crucible {
}
string
btrfs_search_objectid_ntoa(uint64_t objectid)
btrfs_search_objectid_ntoa(unsigned objectid)
{
static const bits_ntoa_table table[] = {
NTOA_TABLE_ENTRY_ENUM(BTRFS_ROOT_TREE_OBJECTID),

View File

@@ -7,7 +7,7 @@
namespace crucible {
using namespace std;
string bits_ntoa(unsigned long long n, const bits_ntoa_table *table)
string bits_ntoa(unsigned long n, const bits_ntoa_table *table)
{
string out;
while (n && table->a) {

View File

@@ -1,5 +1,6 @@
PROGRAMS = \
../bin/bees \
../bin/fanotify-watch \
../bin/fiemap \
../bin/fiewalk \

View File

@@ -5,7 +5,6 @@
#include <fstream>
#include <iostream>
#include <vector>
using namespace crucible;
using namespace std;

View File

@@ -50,18 +50,9 @@ string
BeesRoots::crawl_state_filename() const
{
string rv;
// Legacy filename included UUID
rv += "beescrawl.";
rv += m_ctx->root_uuid();
rv += ".dat";
struct stat buf;
if (fstatat(m_ctx->home_fd(), rv.c_str(), &buf, AT_SYMLINK_NOFOLLOW)) {
// Use new filename
rv = "beescrawl.dat";
}
return rv;
}
@@ -110,12 +101,6 @@ BeesRoots::state_save()
m_crawl_state_file.write(ofs.str());
// Renaming things is hard after release
if (m_crawl_state_file.name() != "beescrawl.dat") {
renameat(m_ctx->home_fd(), m_crawl_state_file.name().c_str(), m_ctx->home_fd(), "beescrawl.dat");
m_crawl_state_file.name("beescrawl.dat");
}
BEESNOTE("relocking crawl state");
lock.lock();
// Not really correct but probably close enough

View File

@@ -351,18 +351,6 @@ BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
BEESLOG("BeesStringFile " << name_fd(m_dir_fd) << "/" << m_name << " max size " << pretty(m_limit));
}
void
BeesStringFile::name(const string &new_name)
{
m_name = new_name;
}
string
BeesStringFile::name() const
{
return m_name;
}
string
BeesStringFile::read()
{

View File

@@ -374,8 +374,6 @@ public:
BeesStringFile(Fd dir_fd, string name, size_t limit = 1024 * 1024);
string read();
void write(string contents);
void name(const string &new_name);
string name() const;
};
class BeesHashTable {

91
src/fanotify-watch.cc Normal file
View File

@@ -0,0 +1,91 @@
#include <crucible/error.h>
#include <crucible/fd.h>
#include <crucible/ntoa.h>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
#include <unistd.h>
#include <sys/fanotify.h>
using namespace crucible;
using namespace std;
static
void
usage(const char *name)
{
cerr << "Usage: " << name << " directory" << endl;
cerr << "Reports fanotify events from directory" << endl;
}
struct fan_read_block {
struct fanotify_event_metadata fem;
// more here in the future. Maybe.
};
static inline
string
fan_flag_ntoa(uint64_t ui)
{
static const bits_ntoa_table flag_names[] = {
NTOA_TABLE_ENTRY_BITS(FAN_ACCESS),
NTOA_TABLE_ENTRY_BITS(FAN_OPEN),
NTOA_TABLE_ENTRY_BITS(FAN_MODIFY),
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE),
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE_WRITE),
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE_NOWRITE),
NTOA_TABLE_ENTRY_BITS(FAN_Q_OVERFLOW),
NTOA_TABLE_ENTRY_BITS(FAN_ACCESS_PERM),
NTOA_TABLE_ENTRY_BITS(FAN_OPEN_PERM),
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(ui, flag_names);
}
int
main(int argc, char **argv)
{
if (argc < 1) {
usage(argv[0]);
exit(EXIT_FAILURE);
}
Fd fd;
DIE_IF_MINUS_ONE(fd = fanotify_init(FAN_CLASS_NOTIF, O_RDONLY | O_LARGEFILE | O_CLOEXEC | O_NOATIME));
for (char **argvp = argv + 1; *argvp; ++argvp) {
cerr << "fanotify_mark(" << *argvp << ")..." << flush;
DIE_IF_MINUS_ONE(fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT, FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE | FAN_OPEN, FAN_NOFD, *argvp));
cerr << endl;
}
while (1) {
struct fan_read_block frb;
read_or_die(fd, frb);
#if 0
cout << "event_len\t= " << frb.fem.event_len << endl;
cout << "vers\t= " << static_cast<int>(frb.fem.vers) << endl;
cout << "reserved\t= " << static_cast<int>(frb.fem.reserved) << endl;
cout << "metadata_len\t= " << frb.fem.metadata_len << endl;
cout << "mask\t= " << hex << frb.fem.mask << dec << "\t" << fan_flag_ntoa(frb.fem.mask) << endl;
cout << "fd\t= " << frb.fem.fd << endl;
cout << "pid\t= " << frb.fem.pid << endl;
#endif
cout << "flags " << fan_flag_ntoa(frb.fem.mask) << " pid " << frb.fem.pid << ' ' << flush;
Fd event_fd(frb.fem.fd);
ostringstream oss;
oss << "/proc/self/fd/" << event_fd;
cout << "file " << readlink_or_die(oss.str()) << endl;
// cout << endl;
}
return EXIT_SUCCESS;
}

View File

@@ -126,13 +126,7 @@ test_cast_0x80000000_to_things()
{
auto sv = 0x80000000LL;
auto uv = 0x80000000ULL;
if (sizeof(off_t) == 4) {
SHOULD_FAIL(ranged_cast<off_t>(sv));
} else if (sizeof(off_t) == 8) {
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
} else {
assert(!"unhandled case, please add code for off_t here");
}
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_PASS(ranged_cast<uint32_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
@@ -147,13 +141,7 @@ test_cast_0x80000000_to_things()
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_PASS(ranged_cast<signed long long>(sv), sv);
if (sizeof(long) == 4) {
SHOULD_FAIL(ranged_cast<signed long>(sv));
} else if (sizeof(long) == 8) {
SHOULD_PASS(ranged_cast<signed long>(sv), sv);
} else {
assert(!"unhandled case, please add code for long here");
}
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
@@ -161,7 +149,7 @@ test_cast_0x80000000_to_things()
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<signed int>(sv), sv);
} else {
assert(!"unhandled case, please add code for int here");
assert(!"unhandled case, please add code here");
}
}
@@ -171,13 +159,7 @@ test_cast_0xffffffff_to_things()
{
auto sv = 0xffffffffLL;
auto uv = 0xffffffffULL;
if (sizeof(off_t) == 4) {
SHOULD_FAIL(ranged_cast<off_t>(sv));
} else if (sizeof(off_t) == 8) {
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
} else {
assert(!"unhandled case, please add code for off_t here");
}
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_PASS(ranged_cast<uint32_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
@@ -192,13 +174,7 @@ test_cast_0xffffffff_to_things()
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_PASS(ranged_cast<signed long long>(sv), sv);
if (sizeof(long) == 4) {
SHOULD_FAIL(ranged_cast<signed long>(sv));
} else if (sizeof(long) == 8) {
SHOULD_PASS(ranged_cast<signed long>(sv), sv);
} else {
assert(!"unhandled case, please add code for long here");
}
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
@@ -206,7 +182,7 @@ test_cast_0xffffffff_to_things()
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<signed int>(sv), sv);
} else {
assert(!"unhandled case, please add code for int here");
assert(!"unhandled case, please add code here");
}
}
@@ -216,13 +192,7 @@ test_cast_0xfffffffff_to_things()
{
auto sv = 0xfffffffffLL;
auto uv = 0xfffffffffULL;
if (sizeof(off_t) == 4) {
SHOULD_FAIL(ranged_cast<off_t>(sv));
} else if (sizeof(off_t) == 8) {
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
} else {
assert(!"unhandled case, please add code for off_t here");
}
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint32_t>(uv));
SHOULD_FAIL(ranged_cast<uint16_t>(uv));