mirror of
https://github.com/Zygo/bees.git
synced 2025-08-03 14:23:29 +02:00
Compare commits
2 Commits
v0.1
...
fanotify-w
Author | SHA1 | Date | |
---|---|---|---|
|
11f69ff6c1 | ||
|
876b76d761 |
68
README.md
68
README.md
@@ -7,19 +7,18 @@ About Bees
|
|||||||
----------
|
----------
|
||||||
|
|
||||||
Bees is a daemon designed to run continuously on live file servers.
|
Bees is a daemon designed to run continuously on live file servers.
|
||||||
Bees consumes entire filesystems and deduplicates in a single pass, using
|
Bees scans and deduplicates whole filesystems in a single pass instead
|
||||||
minimal RAM to store data. Bees maintains persistent state so it can be
|
of separate scan and dedup phases. RAM usage does _not_ depend on
|
||||||
interrupted and resumed, whether by planned upgrades or unplanned crashes.
|
unique data size or the number of input files. Hash tables and scan
|
||||||
Bees makes continuous incremental progress instead of using separate
|
progress are stored persistently so the daemon can resume after a reboot.
|
||||||
scan and dedup phases. Bees uses the Linux kernel's `dedupe_file_range`
|
Bees uses the Linux kernel's `dedupe_file_range` feature to ensure data
|
||||||
system call to ensure data is handled safely even if other applications
|
is handled safely even if other applications concurrently modify it.
|
||||||
concurrently modify it.
|
|
||||||
|
|
||||||
Bees is intentionally btrfs-specific for performance and capability.
|
Bees is intentionally btrfs-specific for performance and capability.
|
||||||
Bees uses the btrfs `SEARCH_V2` ioctl to scan for new data
|
Bees uses the btrfs `SEARCH_V2` ioctl to scan for new data without the
|
||||||
without the overhead of repeatedly walking filesystem trees with the
|
overhead of repeatedly walking filesystem trees with the POSIX API.
|
||||||
POSIX API. Bees uses `LOGICAL_INO` and `INO_PATHS` to leverage btrfs's
|
Bees uses `LOGICAL_INO` and `INO_PATHS` to leverage btrfs's existing
|
||||||
existing metadata instead of building its own redundant data structures.
|
metadata instead of building its own redundant data structures.
|
||||||
Bees can cope with Btrfs filesystem compression. Bees can reassemble
|
Bees can cope with Btrfs filesystem compression. Bees can reassemble
|
||||||
Btrfs extents to deduplicate extents that contain a mix of duplicate
|
Btrfs extents to deduplicate extents that contain a mix of duplicate
|
||||||
and unique data blocks.
|
and unique data blocks.
|
||||||
@@ -37,7 +36,8 @@ using a weighted sampling algorithm. This allows Bees to adapt itself
|
|||||||
to its filesystem size without forcing admins to do math at install time.
|
to its filesystem size without forcing admins to do math at install time.
|
||||||
At the same time, the duplicate block alignment constraint can be as low
|
At the same time, the duplicate block alignment constraint can be as low
|
||||||
as 4K, allowing efficient deduplication of files with narrowly-aligned
|
as 4K, allowing efficient deduplication of files with narrowly-aligned
|
||||||
duplicate block offsets (e.g. compiled binaries and VM/disk images).
|
duplicate block offsets (e.g. compiled binaries and VM/disk images)
|
||||||
|
even if the effective block size is much larger.
|
||||||
|
|
||||||
The Bees hash table is loaded into RAM at startup (using hugepages if
|
The Bees hash table is loaded into RAM at startup (using hugepages if
|
||||||
available), mlocked, and synced to persistent storage by trickle-writing
|
available), mlocked, and synced to persistent storage by trickle-writing
|
||||||
@@ -78,6 +78,12 @@ and some metadata bits). Each entry represents a minimum of 4K on disk.
|
|||||||
1TB 16MB 1024K
|
1TB 16MB 1024K
|
||||||
64TB 1GB 1024K
|
64TB 1GB 1024K
|
||||||
|
|
||||||
|
It is possible to resize the hash table by changing the size of
|
||||||
|
`beeshash.dat` (e.g. with `truncate`) and restarting `bees`. This
|
||||||
|
does not preserve all the existing hash table entries, but it does
|
||||||
|
preserve more than zero of them--especially if the old and new sizes
|
||||||
|
are a power-of-two multiple of each other.
|
||||||
|
|
||||||
Things You Might Expect That Bees Doesn't Have
|
Things You Might Expect That Bees Doesn't Have
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
|
|
||||||
@@ -113,6 +119,16 @@ this was removed because it made Bees too aggressive to coexist with
|
|||||||
other applications on the same machine. It also hit the *slow backrefs*
|
other applications on the same machine. It also hit the *slow backrefs*
|
||||||
on N CPU cores instead of just one.
|
on N CPU cores instead of just one.
|
||||||
|
|
||||||
|
* Block reads are currently more allocation- and CPU-intensive than they
|
||||||
|
should be, especially for filesystems on SSD where the IO overhead is
|
||||||
|
much smaller. This is a problem for power-constrained environments
|
||||||
|
(e.g. laptops with slow CPU).
|
||||||
|
|
||||||
|
* Bees can currently fragment extents when required to remove duplicate
|
||||||
|
blocks, but has no defragmentation capability yet. When possible, Bees
|
||||||
|
will attempt to work with existing extent boundaries, but it will not
|
||||||
|
aggregate blocks together from multiple extents to create larger ones.
|
||||||
|
|
||||||
Good Btrfs Feature Interactions
|
Good Btrfs Feature Interactions
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
@@ -175,7 +191,7 @@ Other Caveats
|
|||||||
unallocated space (see `btrfs fi df`) on the filesystem before running
|
unallocated space (see `btrfs fi df`) on the filesystem before running
|
||||||
Bees for the first time. Use
|
Bees for the first time. Use
|
||||||
|
|
||||||
btrfs balance start -dusage=100,limit=1 /your/filesystem
|
btrfs balance start -dusage=100,limit=1 /your/filesystem
|
||||||
|
|
||||||
If possible, raise the `limit` parameter to the current size of metadata
|
If possible, raise the `limit` parameter to the current size of metadata
|
||||||
usage (from `btrfs fi df`) plus 1.
|
usage (from `btrfs fi df`) plus 1.
|
||||||
@@ -295,14 +311,14 @@ Setup
|
|||||||
|
|
||||||
Create a directory for bees state files:
|
Create a directory for bees state files:
|
||||||
|
|
||||||
export BEESHOME=/some/path
|
export BEESHOME=/some/path
|
||||||
mkdir -p "$BEESHOME"
|
mkdir -p "$BEESHOME"
|
||||||
|
|
||||||
Create an empty hash table (your choice of size, but it must be a multiple
|
Create an empty hash table (your choice of size, but it must be a multiple
|
||||||
of 16M). This example creates a 1GB hash table:
|
of 16M). This example creates a 1GB hash table:
|
||||||
|
|
||||||
truncate -s 1g "$BEESHOME/beeshash.dat"
|
truncate -s 1g "$BEESHOME/beeshash.dat"
|
||||||
chmod 700 "$BEESHOME/beeshash.dat"
|
chmod 700 "$BEESHOME/beeshash.dat"
|
||||||
|
|
||||||
Configuration
|
Configuration
|
||||||
-------------
|
-------------
|
||||||
@@ -324,11 +340,11 @@ Running
|
|||||||
|
|
||||||
We created this directory in the previous section:
|
We created this directory in the previous section:
|
||||||
|
|
||||||
export BEESHOME=/some/path
|
export BEESHOME=/some/path
|
||||||
|
|
||||||
Use a tmpfs for BEESSTATUS, it updates once per second:
|
Use a tmpfs for BEESSTATUS, it updates once per second:
|
||||||
|
|
||||||
export BEESSTATUS=/run/bees.status
|
export BEESSTATUS=/run/bees.status
|
||||||
|
|
||||||
bees can only process the root subvol of a btrfs (seriously--if the
|
bees can only process the root subvol of a btrfs (seriously--if the
|
||||||
argument is not the root subvol directory, Bees will just throw an
|
argument is not the root subvol directory, Bees will just throw an
|
||||||
@@ -336,20 +352,20 @@ exception and stop).
|
|||||||
|
|
||||||
Use a bind mount, and let only bees access it:
|
Use a bind mount, and let only bees access it:
|
||||||
|
|
||||||
mount -osubvol=/ /dev/<your-filesystem> /var/lib/bees/root
|
mount -osubvol=/ /dev/<your-filesystem> /var/lib/bees/root
|
||||||
|
|
||||||
Reduce CPU and IO priority to be kinder to other applications
|
Reduce CPU and IO priority to be kinder to other applications
|
||||||
sharing this host (or raise them for more aggressive disk space
|
sharing this host (or raise them for more aggressive disk space
|
||||||
recovery). If you use cgroups, put bees in its own cgroup, then reduce
|
recovery). If you use cgroups, put `bees` in its own cgroup, then reduce
|
||||||
the `blkio.weight` and `cpu.shares` parameters. You can also use
|
the `blkio.weight` and `cpu.shares` parameters. You can also use
|
||||||
`schedtool` and `ionice in the shell script that launches bees:
|
`schedtool` and `ionice` in the shell script that launches `bees`:
|
||||||
|
|
||||||
schedtool -D -n20 $$
|
schedtool -D -n20 $$
|
||||||
ionice -c3 -p $$
|
ionice -c3 -p $$
|
||||||
|
|
||||||
Let the bees fly:
|
Let the bees fly:
|
||||||
|
|
||||||
bees /var/lib/bees/root >> /var/log/bees.log 2>&1
|
bees /var/lib/bees/root >> /var/log/bees.log 2>&1
|
||||||
|
|
||||||
You'll probably want to arrange for /var/log/bees.log to be rotated
|
You'll probably want to arrange for /var/log/bees.log to be rotated
|
||||||
periodically. You may also want to set umask to 077 to prevent disclosure
|
periodically. You may also want to set umask to 077 to prevent disclosure
|
||||||
@@ -363,7 +379,7 @@ Email bug reports and patches to Zygo Blaxell <bees@furryterror.org>.
|
|||||||
|
|
||||||
You can also use Github:
|
You can also use Github:
|
||||||
|
|
||||||
https://github.com/Zygo/bees
|
https://github.com/Zygo/bees
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
PROGRAMS = \
|
PROGRAMS = \
|
||||||
../bin/bees \
|
../bin/bees \
|
||||||
|
../bin/fanotify-watch \
|
||||||
../bin/fiemap \
|
../bin/fiemap \
|
||||||
../bin/fiewalk \
|
../bin/fiewalk \
|
||||||
|
|
||||||
|
91
src/fanotify-watch.cc
Normal file
91
src/fanotify-watch.cc
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#include <crucible/error.h>
|
||||||
|
#include <crucible/fd.h>
|
||||||
|
#include <crucible/ntoa.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/fanotify.h>
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
static
|
||||||
|
void
|
||||||
|
usage(const char *name)
|
||||||
|
{
|
||||||
|
cerr << "Usage: " << name << " directory" << endl;
|
||||||
|
cerr << "Reports fanotify events from directory" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct fan_read_block {
|
||||||
|
struct fanotify_event_metadata fem;
|
||||||
|
// more here in the future. Maybe.
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline
|
||||||
|
string
|
||||||
|
fan_flag_ntoa(uint64_t ui)
|
||||||
|
{
|
||||||
|
static const bits_ntoa_table flag_names[] = {
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_ACCESS),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_OPEN),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_MODIFY),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE_WRITE),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_CLOSE_NOWRITE),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_Q_OVERFLOW),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_ACCESS_PERM),
|
||||||
|
NTOA_TABLE_ENTRY_BITS(FAN_OPEN_PERM),
|
||||||
|
NTOA_TABLE_ENTRY_END()
|
||||||
|
};
|
||||||
|
return bits_ntoa(ui, flag_names);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc < 1) {
|
||||||
|
usage(argv[0]);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
Fd fd;
|
||||||
|
|
||||||
|
DIE_IF_MINUS_ONE(fd = fanotify_init(FAN_CLASS_NOTIF, O_RDONLY | O_LARGEFILE | O_CLOEXEC | O_NOATIME));
|
||||||
|
|
||||||
|
for (char **argvp = argv + 1; *argvp; ++argvp) {
|
||||||
|
cerr << "fanotify_mark(" << *argvp << ")..." << flush;
|
||||||
|
DIE_IF_MINUS_ONE(fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT, FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE | FAN_OPEN, FAN_NOFD, *argvp));
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
struct fan_read_block frb;
|
||||||
|
read_or_die(fd, frb);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
cout << "event_len\t= " << frb.fem.event_len << endl;
|
||||||
|
cout << "vers\t= " << static_cast<int>(frb.fem.vers) << endl;
|
||||||
|
cout << "reserved\t= " << static_cast<int>(frb.fem.reserved) << endl;
|
||||||
|
cout << "metadata_len\t= " << frb.fem.metadata_len << endl;
|
||||||
|
cout << "mask\t= " << hex << frb.fem.mask << dec << "\t" << fan_flag_ntoa(frb.fem.mask) << endl;
|
||||||
|
cout << "fd\t= " << frb.fem.fd << endl;
|
||||||
|
cout << "pid\t= " << frb.fem.pid << endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cout << "flags " << fan_flag_ntoa(frb.fem.mask) << " pid " << frb.fem.pid << ' ' << flush;
|
||||||
|
|
||||||
|
Fd event_fd(frb.fem.fd);
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "/proc/self/fd/" << event_fd;
|
||||||
|
cout << "file " << readlink_or_die(oss.str()) << endl;
|
||||||
|
|
||||||
|
// cout << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Reference in New Issue
Block a user