mirror of
https://github.com/Zygo/bees.git
synced 2025-08-04 14:53:28 +02:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3e96dfdef3 | ||
|
fa04c10ddd | ||
|
02008260f7 | ||
|
03f45045cf | ||
|
136e110e07 | ||
|
78134bfd78 | ||
|
593a55e829 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,8 +1,7 @@
|
|||||||
*.[ao]
|
*.[ao]
|
||||||
*.bak
|
*.bak
|
||||||
*.dep
|
|
||||||
*.new
|
*.new
|
||||||
*.tmp
|
*.dep
|
||||||
*.so*
|
*.so*
|
||||||
Doxyfile
|
Doxyfile
|
||||||
README.html
|
README.html
|
||||||
|
2
Makefile
2
Makefile
@@ -61,7 +61,7 @@ install_bees: src $(RUN_INSTALL_TESTS)
|
|||||||
install_scripts: ## Install scipts
|
install_scripts: ## Install scipts
|
||||||
install_scripts: scripts
|
install_scripts: scripts
|
||||||
install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd
|
install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd
|
||||||
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample
|
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)/$(ETC_PREFIX)/bees/beesd.conf.sample
|
||||||
ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
||||||
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
|
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
|
||||||
endif
|
endif
|
||||||
|
@@ -17,7 +17,7 @@ Strengths
|
|||||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||||
* Daemon incrementally dedupes new data using btrfs tree search
|
* Daemon incrementally dedupes new data using btrfs tree search
|
||||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](docs/options.md)
|
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](docs/options.md)
|
||||||
* Works around btrfs filesystem structure to free more disk space
|
* Works around btrfs filesystem structure to free more disk space
|
||||||
* Persistent hash table for rapid restart after shutdown
|
* Persistent hash table for rapid restart after shutdown
|
||||||
* Whole-filesystem dedupe - including snapshots
|
* Whole-filesystem dedupe - including snapshots
|
||||||
@@ -70,6 +70,6 @@ You can also use Github:
|
|||||||
Copyright & License
|
Copyright & License
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.
|
||||||
|
|
||||||
GPL (version 3 or later).
|
GPL (version 3 or later).
|
||||||
|
@@ -9,7 +9,7 @@ This issue is fixed in kernel 5.4.14 and later.
|
|||||||
|
|
||||||
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
|
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
|
||||||
with recent LTS and -stable updates.** The latest released kernel as
|
with recent LTS and -stable updates.** The latest released kernel as
|
||||||
of this writing is 5.18.18.
|
of this writing is 5.12.3.
|
||||||
|
|
||||||
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
|
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
|
||||||
some issues. Older kernels will be slower (a little slower or a lot
|
some issues. Older kernels will be slower (a little slower or a lot
|
||||||
@@ -31,7 +31,7 @@ In some future bees release, this API version may become mandatory.
|
|||||||
Kernel Bug Tracking Table
|
Kernel Bug Tracking Table
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
These bugs are particularly popular among bees users, though not all are specifically relevant to bees:
|
These bugs are particularly popular among bees users:
|
||||||
|
|
||||||
| First bad kernel | Last bad kernel | Issue Description | Fixed Kernel Versions | Fix Commit
|
| First bad kernel | Last bad kernel | Issue Description | Fixed Kernel Versions | Fix Commit
|
||||||
| :---: | :---: | --- | :---: | ---
|
| :---: | :---: | --- | :---: | ---
|
||||||
@@ -61,11 +61,7 @@ These bugs are particularly popular among bees users, though not all are specifi
|
|||||||
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
||||||
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
||||||
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
||||||
| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
|
| 4.15 | - | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | - | workaround: comment out the `WARN_ON`
|
||||||
| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
|
|
||||||
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
|
||||||
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
|
||||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl | - | workaround: reduce bees thread count to 1 with `-c1`
|
|
||||||
|
|
||||||
"Last bad kernel" refers to that version's last stable update from
|
"Last bad kernel" refers to that version's last stable update from
|
||||||
kernel.org. Distro kernels may backport additional fixes. Consult
|
kernel.org. Distro kernels may backport additional fixes. Consult
|
||||||
@@ -81,7 +77,7 @@ A "-" for "first bad kernel" indicates the bug has been present since
|
|||||||
the relevant feature first appeared in btrfs.
|
the relevant feature first appeared in btrfs.
|
||||||
|
|
||||||
A "-" for "last bad kernel" indicates the bug has not yet been fixed as
|
A "-" for "last bad kernel" indicates the bug has not yet been fixed as
|
||||||
of 5.18.18.
|
of 5.8.14.
|
||||||
|
|
||||||
In cases where issues are fixed by commits spread out over multiple
|
In cases where issues are fixed by commits spread out over multiple
|
||||||
kernel versions, "fixed kernel version" refers to the version that
|
kernel versions, "fixed kernel version" refers to the version that
|
||||||
@@ -91,11 +87,6 @@ contains all components of the fix.
|
|||||||
Workarounds for known kernel bugs
|
Workarounds for known kernel bugs
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
* **Hangs with high worker thread counts**: On kernels newer than
|
|
||||||
5.4, multiple threads running `LOGICAL_INO` and dedupe ioctls
|
|
||||||
at the same time can lead to a kernel hang. The workaround is
|
|
||||||
to reduce the thread count to 1 with `-c1`.
|
|
||||||
|
|
||||||
* **Tree mod log issues**: bees will detect that a btrfs balance is
|
* **Tree mod log issues**: bees will detect that a btrfs balance is
|
||||||
running, and pause bees activity until the balance is done. This avoids
|
running, and pause bees activity until the balance is done. This avoids
|
||||||
running both the `LOGICAL_INO` ioctl and btrfs balance at the same time,
|
running both the `LOGICAL_INO` ioctl and btrfs balance at the same time,
|
||||||
@@ -137,7 +128,7 @@ Workarounds for known kernel bugs
|
|||||||
Unfixed kernel bugs
|
Unfixed kernel bugs
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
As of 5.18.18:
|
As of 5.12.3:
|
||||||
|
|
||||||
* **The kernel does not permit `btrfs send` and dedupe to run at the
|
* **The kernel does not permit `btrfs send` and dedupe to run at the
|
||||||
same time**. Recent kernels no longer crash, but now refuse one
|
same time**. Recent kernels no longer crash, but now refuse one
|
||||||
@@ -160,3 +151,22 @@ As of 5.18.18:
|
|||||||
still saves some IO.
|
still saves some IO.
|
||||||
|
|
||||||
`btrfs receive` is not affected by this issue.
|
`btrfs receive` is not affected by this issue.
|
||||||
|
|
||||||
|
* **Spurious warnings in `fs/fs-writeback.c`** on kernel 4.15 and later
|
||||||
|
when filesystem is mounted with `flushoncommit`. These
|
||||||
|
seem to be harmless (there are other locks which prevent
|
||||||
|
concurrent umount of the filesystem), but the underlying
|
||||||
|
problems that trigger the `WARN_ON` are [not trivial to
|
||||||
|
fix](https://www.spinics.net/lists/linux-btrfs/msg87752.html).
|
||||||
|
|
||||||
|
The warnings can be especially voluminous when bees is running.
|
||||||
|
|
||||||
|
Workarounds:
|
||||||
|
|
||||||
|
1. mount with `-o noflushoncommit`
|
||||||
|
2. patch kernel to remove warning in `fs/fs-writeback.c`.
|
||||||
|
|
||||||
|
Note that using kernels 4.14 and earlier is *not* a viable workaround
|
||||||
|
for this issue, because kernels 4.14 and earlier will eventually
|
||||||
|
deadlock when a filesystem is mounted with `-o flushoncommit` (a single
|
||||||
|
commit fixes one bug and introduces the other).
|
||||||
|
@@ -67,12 +67,11 @@ The `adjust` event group consists of operations related to translating stored vi
|
|||||||
* `adjust_exact`: A block address from the hash table corresponding to an uncompressed data block was processed to find its `(root, inode, offset)` references.
|
* `adjust_exact`: A block address from the hash table corresponding to an uncompressed data block was processed to find its `(root, inode, offset)` references.
|
||||||
* `adjust_exact_correct`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches another block bees has already read.
|
* `adjust_exact_correct`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches another block bees has already read.
|
||||||
* `adjust_exact_wrong`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches the hash but not the data from another block bees has already read (i.e. there was a hash collision).
|
* `adjust_exact_wrong`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches the hash but not the data from another block bees has already read (i.e. there was a hash collision).
|
||||||
* `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block in an uncompressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
* `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
||||||
* `adjust_miss`: A block address was retrieved from the hash table and resolved to a physical block containing a hash that does not match the hash from another block bees has already read (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
* `adjust_miss`: A block address was retrieved from the hash table and resolved to a physical block containing a hash that does not match the hash from another block bees has already read (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
||||||
* `adjust_needle_too_long`: A block address was retrieved from the hash table, but when the corresponding extent item was retrieved, its offset or length were out of range to be a match (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
* `adjust_needle_too_long`: A block address was retrieved from the hash table, but when the corresponding extent item was retrieved, its offset or length were out of range to be a match (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
||||||
* `adjust_no_match`: A hash collision occurred (i.e. a block on disk was located with the same hash as the hash table entry but different data) . Effectively an alias for `hash_collision` as it is not possible to have one event without the other.
|
* `adjust_no_match`: A hash collision occurred (i.e. a block on disk was located with the same hash as the hash table entry but different data) . Effectively an alias for `hash_collision` as it is not possible to have one event without the other.
|
||||||
* `adjust_offset_high`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item ends before the desired block in the extent data.
|
* `adjust_offset_high`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item ends before the desired block in the extent data.
|
||||||
* `adjust_offset_hit`: A block address was retrieved from the hash table and resolved to a physical block in a compressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
|
||||||
* `adjust_offset_low`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item begins after the desired block in the extent data.
|
* `adjust_offset_low`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item begins after the desired block in the extent data.
|
||||||
* `adjust_try`: A block address and extent item candidate were passed to `BeesResolver::adjust_offset` for processing.
|
* `adjust_try`: A block address and extent item candidate were passed to `BeesResolver::adjust_offset` for processing.
|
||||||
|
|
||||||
@@ -300,7 +299,6 @@ The `resolve` event group consists of operations related to translating a btrfs
|
|||||||
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
||||||
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
||||||
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
||||||
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned more than 655050 extents (the limit of the v2 ioctl).
|
|
||||||
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
||||||
|
|
||||||
root
|
root
|
||||||
@@ -335,7 +333,6 @@ The `scan` event group consists of operations related to scanning incoming data.
|
|||||||
* `scan_eof`: Scan past EOF was attempted.
|
* `scan_eof`: Scan past EOF was attempted.
|
||||||
* `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
|
* `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
|
||||||
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
||||||
* `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned. No action is currently taken for these--they are merely counted.
|
|
||||||
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
||||||
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
||||||
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
||||||
|
@@ -45,7 +45,7 @@ bees will loop billions of times considering all possibilities. This is
|
|||||||
a waste of time, so an exception is currently used to break out of such
|
a waste of time, so an exception is currently used to break out of such
|
||||||
loops early. The exception text in this case is:
|
loops early. The exception text in this case is:
|
||||||
|
|
||||||
`FIXME: too many duplicate candidates, bailing out here`
|
`FIXME: bailing out here, need to fix this further up the call stack`
|
||||||
|
|
||||||
|
|
||||||
Terminating bees with SIGTERM
|
Terminating bees with SIGTERM
|
||||||
|
@@ -17,7 +17,7 @@ Strengths
|
|||||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||||
* Daemon incrementally dedupes new data using btrfs tree search
|
* Daemon incrementally dedupes new data using btrfs tree search
|
||||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](options.md)
|
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](options.md)
|
||||||
* Works around btrfs filesystem structure to free more disk space
|
* Works around btrfs filesystem structure to free more disk space
|
||||||
* Persistent hash table for rapid restart after shutdown
|
* Persistent hash table for rapid restart after shutdown
|
||||||
* Whole-filesystem dedupe - including snapshots
|
* Whole-filesystem dedupe - including snapshots
|
||||||
@@ -70,6 +70,6 @@ You can also use Github:
|
|||||||
Copyright & License
|
Copyright & License
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.
|
||||||
|
|
||||||
GPL (version 3 or later).
|
GPL (version 3 or later).
|
||||||
|
@@ -134,7 +134,7 @@ ulimit -c 0
|
|||||||
|
|
||||||
# If there were core files, generate reports for them
|
# If there were core files, generate reports for them
|
||||||
for x in core*; do
|
for x in core*; do
|
||||||
if [ -e "$x" ]; then
|
if [ -e "$x" ]; then
|
||||||
gdb --core="$x" \
|
gdb --core="$x" \
|
||||||
--eval-command='set pagination off' \
|
--eval-command='set pagination off' \
|
||||||
--eval-command='info shared' \
|
--eval-command='info shared' \
|
||||||
|
@@ -216,28 +216,7 @@ enum btrfs_compression_type {
|
|||||||
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
|
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef BTRFS_FS_INFO_FLAG_GENERATION
|
struct btrfs_ioctl_fs_info_args_v2 {
|
||||||
/* Request information about filesystem generation */
|
|
||||||
#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef BTRFS_FS_INFO_FLAG_METADATA_UUID
|
|
||||||
/* Request information about filesystem metadata UUID */
|
|
||||||
#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// BTRFS_CSUM_TYPE_CRC32 was a #define from 2008 to 2019.
|
|
||||||
// After that, it's an enum with the other 3 types.
|
|
||||||
// So if we do _not_ have CRC32 defined, it means we have the other 3;
|
|
||||||
// if we _do_ have CRC32 defined, it means we need the other 3.
|
|
||||||
// This seems likely to break some day.
|
|
||||||
#ifdef BTRFS_CSUM_TYPE_CRC32
|
|
||||||
#define BTRFS_CSUM_TYPE_XXHASH 1
|
|
||||||
#define BTRFS_CSUM_TYPE_SHA256 2
|
|
||||||
#define BTRFS_CSUM_TYPE_BLAKE2 3
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct btrfs_ioctl_fs_info_args_v3 {
|
|
||||||
__u64 max_id; /* out */
|
__u64 max_id; /* out */
|
||||||
__u64 num_devices; /* out */
|
__u64 num_devices; /* out */
|
||||||
__u8 fsid[BTRFS_FSID_SIZE]; /* out */
|
__u8 fsid[BTRFS_FSID_SIZE]; /* out */
|
||||||
@@ -248,9 +227,7 @@ struct btrfs_ioctl_fs_info_args_v3 {
|
|||||||
__u16 csum_type; /* out */
|
__u16 csum_type; /* out */
|
||||||
__u16 csum_size; /* out */
|
__u16 csum_size; /* out */
|
||||||
__u64 flags; /* in/out */
|
__u64 flags; /* in/out */
|
||||||
__u64 generation; /* out */
|
__u8 reserved[968]; /* pad to 1k */
|
||||||
__u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
|
|
||||||
__u8 reserved[944]; /* pad to 1k */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CRUCIBLE_BTRFS_H
|
#endif // CRUCIBLE_BTRFS_H
|
||||||
|
@@ -1,71 +0,0 @@
|
|||||||
#ifndef _CRUCIBLE_BYTEVECTOR_H_
|
|
||||||
#define _CRUCIBLE_BYTEVECTOR_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
namespace crucible {
|
|
||||||
using namespace std;
|
|
||||||
// new[] is a little slower than malloc
|
|
||||||
// shared_ptr is about 2x slower than unique_ptr
|
|
||||||
// vector<uint8_t> is ~160x slower
|
|
||||||
// so we won't bother with unique_ptr because we can't do shared copies with it
|
|
||||||
|
|
||||||
class ByteVector {
|
|
||||||
public:
|
|
||||||
using Pointer = shared_ptr<uint8_t>;
|
|
||||||
using value_type = Pointer::element_type;
|
|
||||||
using iterator = value_type*;
|
|
||||||
|
|
||||||
ByteVector() = default;
|
|
||||||
ByteVector(size_t size);
|
|
||||||
ByteVector(const ByteVector &that, size_t start, size_t length);
|
|
||||||
ByteVector(iterator begin, iterator end, size_t min_size = 0);
|
|
||||||
|
|
||||||
ByteVector at(size_t start, size_t length) const;
|
|
||||||
|
|
||||||
value_type& at(size_t) const;
|
|
||||||
iterator begin() const;
|
|
||||||
void clear();
|
|
||||||
value_type* data() const;
|
|
||||||
bool empty() const;
|
|
||||||
iterator end() const;
|
|
||||||
value_type& operator[](size_t) const;
|
|
||||||
size_t size() const;
|
|
||||||
bool operator==(const ByteVector &that) const;
|
|
||||||
|
|
||||||
// this version of erase only works at the beginning or end of the buffer, else throws exception
|
|
||||||
void erase(iterator first);
|
|
||||||
void erase(iterator first, iterator last);
|
|
||||||
|
|
||||||
// An important use case is ioctls that have a fixed-size header struct
|
|
||||||
// followed by a buffer for further arguments. These templates avoid
|
|
||||||
// doing reinterpret_casts every time.
|
|
||||||
template <class T> ByteVector(const T& object, size_t min_size);
|
|
||||||
template <class T> T* get() const;
|
|
||||||
private:
|
|
||||||
Pointer m_ptr;
|
|
||||||
size_t m_size = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
ByteVector::ByteVector(const T& object, size_t min_size)
|
|
||||||
{
|
|
||||||
const auto size = max(min_size, sizeof(T));
|
|
||||||
m_ptr = Pointer(static_cast<value_type*>(malloc(size)), free);
|
|
||||||
memcpy(m_ptr.get(), &object, sizeof(T));
|
|
||||||
m_size = size;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
T*
|
|
||||||
ByteVector::get() const
|
|
||||||
{
|
|
||||||
return reinterpret_cast<T*>(data());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // _CRUCIBLE_BYTEVECTOR_H_
|
|
@@ -28,7 +28,7 @@ namespace crucible {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<> struct le_to_cpu_helper<uint16_t> {
|
template<> struct le_to_cpu_helper<uint16_t> {
|
||||||
uint16_t operator()(const uint16_t v) { return le16toh(v); }
|
uint16_t operator()(const uint16_t v) { return le64toh(v); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct le_to_cpu_helper<uint8_t> {
|
template<> struct le_to_cpu_helper<uint8_t> {
|
||||||
|
@@ -126,13 +126,6 @@ namespace crucible {
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define THROW_CHECK4(type, value1, value2, value3, value4, expr) do { \
|
|
||||||
if (!(expr)) { \
|
|
||||||
THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) << ", " #value4 << " = " << (value4) \
|
|
||||||
<< " failed constraint check (" << #expr << ")"); \
|
|
||||||
} \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \
|
#define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \
|
||||||
if (!((value1) op (value2))) { \
|
if (!((value1) op (value2))) { \
|
||||||
THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \
|
THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \
|
||||||
|
@@ -42,6 +42,9 @@ namespace crucible {
|
|||||||
uint64_t bytenr() const;
|
uint64_t bytenr() const;
|
||||||
bool operator==(const Extent &that) const;
|
bool operator==(const Extent &that) const;
|
||||||
bool operator!=(const Extent &that) const { return !(*this == that); }
|
bool operator!=(const Extent &that) const { return !(*this == that); }
|
||||||
|
|
||||||
|
Extent() = default;
|
||||||
|
Extent(const Extent &e) = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ExtentWalker {
|
class ExtentWalker {
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
#ifndef CRUCIBLE_FD_H
|
#ifndef CRUCIBLE_FD_H
|
||||||
#define CRUCIBLE_FD_H
|
#define CRUCIBLE_FD_H
|
||||||
|
|
||||||
#include "crucible/bytevector.h"
|
|
||||||
#include "crucible/namedptr.h"
|
#include "crucible/namedptr.h"
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@@ -126,14 +125,11 @@ namespace crucible {
|
|||||||
// Specialization for strings which reads/writes the string content, not the struct string
|
// Specialization for strings which reads/writes the string content, not the struct string
|
||||||
template<> void write_or_die<string>(int fd, const string& str);
|
template<> void write_or_die<string>(int fd, const string& str);
|
||||||
template<> void pread_or_die<string>(int fd, string& str, off_t offset);
|
template<> void pread_or_die<string>(int fd, string& str, off_t offset);
|
||||||
|
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset);
|
||||||
|
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset);
|
||||||
template<> void pwrite_or_die<string>(int fd, const string& str, off_t offset);
|
template<> void pwrite_or_die<string>(int fd, const string& str, off_t offset);
|
||||||
template<> void pread_or_die<ByteVector>(int fd, ByteVector& str, off_t offset);
|
template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset);
|
||||||
template<> void pwrite_or_die<ByteVector>(int fd, const ByteVector& str, off_t offset);
|
template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset);
|
||||||
// Deprecated
|
|
||||||
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset) = delete;
|
|
||||||
template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset) = delete;
|
|
||||||
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset) = delete;
|
|
||||||
template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset) = delete;
|
|
||||||
|
|
||||||
// A different approach to reading a simple string
|
// A different approach to reading a simple string
|
||||||
string read_string(int fd, size_t size);
|
string read_string(int fd, size_t size);
|
||||||
|
@@ -1,9 +1,9 @@
|
|||||||
#ifndef CRUCIBLE_FS_H
|
#ifndef CRUCIBLE_FS_H
|
||||||
#define CRUCIBLE_FS_H
|
#define CRUCIBLE_FS_H
|
||||||
|
|
||||||
#include "crucible/bytevector.h"
|
|
||||||
#include "crucible/endian.h"
|
#include "crucible/endian.h"
|
||||||
#include "crucible/error.h"
|
#include "crucible/error.h"
|
||||||
|
#include "crucible/spanner.h"
|
||||||
|
|
||||||
// Terribly Linux-specific FS-wrangling functions
|
// Terribly Linux-specific FS-wrangling functions
|
||||||
|
|
||||||
@@ -31,14 +31,12 @@ namespace crucible {
|
|||||||
BtrfsExtentInfo(int dst_fd, off_t dst_offset);
|
BtrfsExtentInfo(int dst_fd, off_t dst_offset);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BtrfsExtentSame {
|
struct BtrfsExtentSame : public btrfs_ioctl_same_args {
|
||||||
virtual ~BtrfsExtentSame();
|
virtual ~BtrfsExtentSame();
|
||||||
BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length);
|
BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length);
|
||||||
void add(int fd, off_t offset);
|
void add(int fd, off_t offset);
|
||||||
virtual void do_ioctl();
|
virtual void do_ioctl();
|
||||||
|
|
||||||
uint64_t m_logical_offset = 0;
|
|
||||||
uint64_t m_length = 0;
|
|
||||||
int m_fd;
|
int m_fd;
|
||||||
vector<BtrfsExtentInfo> m_info;
|
vector<BtrfsExtentInfo> m_info;
|
||||||
};
|
};
|
||||||
@@ -55,17 +53,17 @@ namespace crucible {
|
|||||||
|
|
||||||
ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p);
|
ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p);
|
||||||
|
|
||||||
struct BtrfsDataContainer {
|
struct BtrfsDataContainer : public btrfs_data_container {
|
||||||
BtrfsDataContainer(size_t size = 64 * 1024);
|
BtrfsDataContainer(size_t size = 64 * 1024);
|
||||||
void *prepare(size_t size);
|
void *prepare(size_t size);
|
||||||
|
|
||||||
size_t get_size() const;
|
size_t get_size() const;
|
||||||
decltype(btrfs_data_container::bytes_left) get_bytes_left() const;
|
decltype(bytes_left) get_bytes_left() const;
|
||||||
decltype(btrfs_data_container::bytes_missing) get_bytes_missing() const;
|
decltype(bytes_missing) get_bytes_missing() const;
|
||||||
decltype(btrfs_data_container::elem_cnt) get_elem_cnt() const;
|
decltype(elem_cnt) get_elem_cnt() const;
|
||||||
decltype(btrfs_data_container::elem_missed) get_elem_missed() const;
|
decltype(elem_missed) get_elem_missed() const;
|
||||||
|
|
||||||
ByteVector m_data;
|
vector<uint8_t> m_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BtrfsIoctlLogicalInoArgs : public btrfs_ioctl_logical_ino_args {
|
struct BtrfsIoctlLogicalInoArgs : public btrfs_ioctl_logical_ino_args {
|
||||||
@@ -143,26 +141,16 @@ namespace crucible {
|
|||||||
off_t end() const;
|
off_t end() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Fiemap {
|
struct Fiemap : public fiemap {
|
||||||
|
|
||||||
// because fiemap.h insists on giving FIEMAP_MAX_OFFSET
|
|
||||||
// a different type from the struct fiemap members
|
|
||||||
static const uint64_t s_fiemap_max_offset = FIEMAP_MAX_OFFSET;
|
|
||||||
|
|
||||||
// Get entire file
|
// Get entire file
|
||||||
Fiemap(uint64_t start = 0, uint64_t length = s_fiemap_max_offset);
|
Fiemap(uint64_t start = 0, uint64_t length = FIEMAP_MAX_OFFSET);
|
||||||
|
|
||||||
void do_ioctl(int fd);
|
void do_ioctl(int fd);
|
||||||
|
|
||||||
vector<FiemapExtent> m_extents;
|
vector<FiemapExtent> m_extents;
|
||||||
decltype(fiemap::fm_extent_count) m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
|
uint64_t m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
|
||||||
decltype(fiemap::fm_extent_count) m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
|
uint64_t m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
|
||||||
uint64_t m_start;
|
|
||||||
uint64_t m_length;
|
|
||||||
// FIEMAP is slow and full of lies.
|
|
||||||
// This makes FIEMAP even slower, but reduces the lies a little.
|
|
||||||
decltype(fiemap::fm_flags) m_flags = FIEMAP_FLAG_SYNC;
|
|
||||||
friend ostream &operator<<(ostream &, const Fiemap &);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const fiemap_extent *info);
|
ostream & operator<<(ostream &os, const fiemap_extent *info);
|
||||||
@@ -178,8 +166,8 @@ namespace crucible {
|
|||||||
|
|
||||||
struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header {
|
struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header {
|
||||||
BtrfsIoctlSearchHeader();
|
BtrfsIoctlSearchHeader();
|
||||||
ByteVector m_data;
|
Spanner<const uint8_t> m_data;
|
||||||
size_t set_data(const ByteVector &v, size_t offset);
|
size_t set_data(const vector<uint8_t> &v, size_t offset);
|
||||||
bool operator<(const BtrfsIoctlSearchHeader &that) const;
|
bool operator<(const BtrfsIoctlSearchHeader &that) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -193,18 +181,17 @@ namespace crucible {
|
|||||||
ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr);
|
ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr);
|
||||||
|
|
||||||
struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key {
|
struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key {
|
||||||
BtrfsIoctlSearchKey(size_t buf_size = 1024);
|
BtrfsIoctlSearchKey(size_t buf_size = 4096);
|
||||||
bool do_ioctl_nothrow(int fd);
|
virtual bool do_ioctl_nothrow(int fd);
|
||||||
void do_ioctl(int fd);
|
virtual void do_ioctl(int fd);
|
||||||
|
|
||||||
// Copy objectid/type/offset so we move forward
|
// Copy objectid/type/offset so we move forward
|
||||||
void next_min(const BtrfsIoctlSearchHeader& ref);
|
void next_min(const BtrfsIoctlSearchHeader& ref);
|
||||||
|
|
||||||
// move forward to next object of a single type
|
|
||||||
void next_min(const BtrfsIoctlSearchHeader& ref, const uint8_t type);
|
|
||||||
|
|
||||||
size_t m_buf_size;
|
size_t m_buf_size;
|
||||||
|
vector<uint8_t> m_ioctl_arg;
|
||||||
set<BtrfsIoctlSearchHeader> m_result;
|
set<BtrfsIoctlSearchHeader> m_result;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
||||||
@@ -248,12 +235,11 @@ namespace crucible {
|
|||||||
|
|
||||||
template<class V> ostream &hexdump(ostream &os, const V &v);
|
template<class V> ostream &hexdump(ostream &os, const V &v);
|
||||||
|
|
||||||
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 {
|
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v2 {
|
||||||
BtrfsIoctlFsInfoArgs();
|
BtrfsIoctlFsInfoArgs();
|
||||||
void do_ioctl(int fd);
|
void do_ioctl(int fd);
|
||||||
uint16_t csum_type() const;
|
uint16_t csum_type() const;
|
||||||
uint16_t csum_size() const;
|
uint16_t csum_size() const;
|
||||||
uint64_t generation() const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
|
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
|
||||||
|
@@ -82,7 +82,7 @@ namespace crucible {
|
|||||||
// "our" map entry if it exists and is expired. The other
|
// "our" map entry if it exists and is expired. The other
|
||||||
// thread would have done the same for us if the race had
|
// thread would have done the same for us if the race had
|
||||||
// a different winner.
|
// a different winner.
|
||||||
const auto found = m_map_rep->m_map.find(m_ret_key);
|
auto found = m_map_rep->m_map.find(m_ret_key);
|
||||||
if (found != m_map_rep->m_map.end() && found->second.expired()) {
|
if (found != m_map_rep->m_map.end() && found->second.expired()) {
|
||||||
m_map_rep->m_map.erase(found);
|
m_map_rep->m_map.erase(found);
|
||||||
}
|
}
|
||||||
@@ -93,10 +93,10 @@ namespace crucible {
|
|||||||
NamedPtr<Return, Arguments...>::lookup_item(const Key &k)
|
NamedPtr<Return, Arguments...>::lookup_item(const Key &k)
|
||||||
{
|
{
|
||||||
// Must be called with lock held
|
// Must be called with lock held
|
||||||
const auto found = m_map_rep->m_map.find(k);
|
auto found = m_map_rep->m_map.find(k);
|
||||||
if (found != m_map_rep->m_map.end()) {
|
if (found != m_map_rep->m_map.end()) {
|
||||||
// Get the strong pointer back
|
// Get the strong pointer back
|
||||||
const auto rv = found->second.lock();
|
auto rv = found->second.lock();
|
||||||
if (rv) {
|
if (rv) {
|
||||||
// Have strong pointer. Return value that shares map entry.
|
// Have strong pointer. Return value that shares map entry.
|
||||||
return shared_ptr<Return>(rv, rv->m_ret_ptr.get());
|
return shared_ptr<Return>(rv, rv->m_ret_ptr.get());
|
||||||
@@ -116,36 +116,34 @@ namespace crucible {
|
|||||||
Key k(args...);
|
Key k(args...);
|
||||||
|
|
||||||
// Is it already in the map?
|
// Is it already in the map?
|
||||||
unique_lock<mutex> lock_lookup(m_map_rep->m_mutex);
|
unique_lock<mutex> lock(m_map_rep->m_mutex);
|
||||||
auto rv = lookup_item(k);
|
auto rv = lookup_item(k);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release map lock and acquire key lock
|
// Release map lock and acquire key lock
|
||||||
lock_lookup.unlock();
|
lock.unlock();
|
||||||
const auto key_lock = m_lockset.make_lock(k);
|
auto key_lock = m_lockset.make_lock(k);
|
||||||
|
|
||||||
// Did item appear in map while we were waiting for key?
|
// Did item appear in map while we were waiting for key?
|
||||||
lock_lookup.lock();
|
lock.lock();
|
||||||
rv = lookup_item(k);
|
rv = lookup_item(k);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We now hold key and index locks, but item not in map (or expired).
|
// We now hold key and index locks, but item not in map (or expired).
|
||||||
// Release map lock so other threads can use the map
|
// Release map lock
|
||||||
lock_lookup.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
// Call the function and create a new Value outside of the map
|
|
||||||
const auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
|
|
||||||
|
|
||||||
|
// Call the function and create a new Value
|
||||||
|
auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
|
||||||
// Function must return a non-null pointer
|
// Function must return a non-null pointer
|
||||||
THROW_CHECK0(runtime_error, new_value_ptr->m_ret_ptr);
|
THROW_CHECK0(runtime_error, new_value_ptr->m_ret_ptr);
|
||||||
|
|
||||||
// Reacquire index lock for map insertion. We still hold the key lock.
|
// Reacquire index lock for map insertion
|
||||||
// Use a different lock object to make exceptions unlock in the right order
|
lock.lock();
|
||||||
unique_lock<mutex> lock_insert(m_map_rep->m_mutex);
|
|
||||||
|
|
||||||
// Insert return value in map or overwrite existing
|
// Insert return value in map or overwrite existing
|
||||||
// empty or expired weak_ptr value.
|
// empty or expired weak_ptr value.
|
||||||
@@ -160,13 +158,14 @@ namespace crucible {
|
|||||||
// to find and fix.
|
// to find and fix.
|
||||||
assert(new_item_ref.expired());
|
assert(new_item_ref.expired());
|
||||||
|
|
||||||
// Update the map slot we are sure is empty
|
// Update the empty map slot
|
||||||
new_item_ref = new_value_ptr;
|
new_item_ref = new_value_ptr;
|
||||||
|
|
||||||
|
// Drop lock so we don't deadlock in constructor exceptions
|
||||||
|
lock.unlock();
|
||||||
|
|
||||||
// Return shared_ptr to Return using strong pointer's reference counter
|
// Return shared_ptr to Return using strong pointer's reference counter
|
||||||
return shared_ptr<Return>(new_value_ptr, new_value_ptr->m_ret_ptr.get());
|
return shared_ptr<Return>(new_value_ptr, new_value_ptr->m_ret_ptr.get());
|
||||||
|
|
||||||
// Release map lock, then key lock
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Return, class... Arguments>
|
template <class Return, class... Arguments>
|
||||||
@@ -189,7 +188,7 @@ namespace crucible {
|
|||||||
NamedPtr<Return, Arguments...>::insert(const Ptr &r, Arguments... args)
|
NamedPtr<Return, Arguments...>::insert(const Ptr &r, Arguments... args)
|
||||||
{
|
{
|
||||||
THROW_CHECK0(invalid_argument, r);
|
THROW_CHECK0(invalid_argument, r);
|
||||||
return insert_item([&](Arguments...) { return r; }, args...);
|
return insert_item([&](Arguments...) -> Ptr { return r; }, args...);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -20,8 +20,8 @@ namespace crucible {
|
|||||||
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
||||||
|
|
||||||
ProgressTracker(const value_type &v);
|
ProgressTracker(const value_type &v);
|
||||||
value_type begin() const;
|
value_type begin();
|
||||||
value_type end() const;
|
value_type end();
|
||||||
|
|
||||||
ProgressHolder hold(const value_type &v);
|
ProgressHolder hold(const value_type &v);
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ namespace crucible {
|
|||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
typename ProgressTracker<T>::value_type
|
typename ProgressTracker<T>::value_type
|
||||||
ProgressTracker<T>::begin() const
|
ProgressTracker<T>::begin()
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_state->m_mutex);
|
unique_lock<mutex> lock(m_state->m_mutex);
|
||||||
return m_state->m_begin;
|
return m_state->m_begin;
|
||||||
@@ -59,7 +59,7 @@ namespace crucible {
|
|||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
typename ProgressTracker<T>::value_type
|
typename ProgressTracker<T>::value_type
|
||||||
ProgressTracker<T>::end() const
|
ProgressTracker<T>::end()
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(m_state->m_mutex);
|
unique_lock<mutex> lock(m_state->m_mutex);
|
||||||
return m_state->m_end;
|
return m_state->m_end;
|
||||||
|
167
include/crucible/spanner.h
Normal file
167
include/crucible/spanner.h
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
#ifndef CRUCIBLE_SPANNER_H
|
||||||
|
#define CRUCIBLE_SPANNER_H
|
||||||
|
|
||||||
|
#include "crucible/error.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
// C++20 is already using the name "span" for something similar.
|
||||||
|
template <class T, class Head = T*, class Iter = Head>
|
||||||
|
class Spanner {
|
||||||
|
public:
|
||||||
|
using iterator = Iter;
|
||||||
|
using head_pointer = Head;
|
||||||
|
using value_type = T;
|
||||||
|
|
||||||
|
template <class Container>
|
||||||
|
Spanner(Container& container);
|
||||||
|
|
||||||
|
Spanner(head_pointer begin, iterator end);
|
||||||
|
Spanner(size_t size, head_pointer begin);
|
||||||
|
Spanner() = default;
|
||||||
|
Spanner &operator=(const Spanner &that) = default;
|
||||||
|
iterator begin() const;
|
||||||
|
iterator end() const;
|
||||||
|
value_type *data() const;
|
||||||
|
value_type &at(size_t n) const;
|
||||||
|
size_t size() const;
|
||||||
|
bool empty() const;
|
||||||
|
void clear();
|
||||||
|
value_type &operator[](size_t n) const;
|
||||||
|
iterator erase(iterator first, iterator last);
|
||||||
|
iterator erase(iterator first);
|
||||||
|
private:
|
||||||
|
head_pointer m_begin;
|
||||||
|
size_t m_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Container, class Head = typename Container::value_type *, class Iter = Head>
|
||||||
|
Spanner<typename Container::value_type, Head, Iter> make_spanner(Container &container)
|
||||||
|
{
|
||||||
|
return Spanner<typename Container::value_type, Head, Iter>(container);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This template is an attempt to turn a shared_ptr to a container
|
||||||
|
// into a range view that can be cheaply passed around.
|
||||||
|
// It probably doesn't quite work in the general case.
|
||||||
|
template <class Container, class Head = shared_ptr<typename Container::value_type>, class Iter = typename Container::value_type *>
|
||||||
|
Spanner<typename Container::value_type, Head, Iter> make_spanner(shared_ptr<Container> &cont_ptr)
|
||||||
|
{
|
||||||
|
shared_ptr<typename Container::value_type> head(cont_ptr, cont_ptr->data());
|
||||||
|
size_t const size = cont_ptr->size();
|
||||||
|
return Spanner<typename Container::value_type, Head, Iter>(size, head);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
template <class Container>
|
||||||
|
Spanner<T, Head, Iter>::Spanner(Container &container) :
|
||||||
|
m_begin(container.data()),
|
||||||
|
m_size(container.size())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
Spanner<T, Head, Iter>::Spanner(head_pointer begin, iterator end) :
|
||||||
|
m_begin(begin),
|
||||||
|
m_size(end - begin)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
Spanner<T, Head, Iter>::Spanner(size_t size, head_pointer begin) :
|
||||||
|
m_begin(begin),
|
||||||
|
m_size(size)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::iterator
|
||||||
|
Spanner<T, Head, Iter>::erase(iterator first, iterator last)
|
||||||
|
{
|
||||||
|
auto end = m_begin + m_size;
|
||||||
|
if (first == m_begin) {
|
||||||
|
THROW_CHECK0(invalid_argument, last <= end);
|
||||||
|
m_begin = last;
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
if (last == end) {
|
||||||
|
THROW_CHECK0(invalid_argument, m_begin <= first);
|
||||||
|
m_size = first - m_begin;
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
THROW_ERROR(invalid_argument, "first != begin() and last != end()");
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::iterator
|
||||||
|
Spanner<T, Head, Iter>::erase(iterator first)
|
||||||
|
{
|
||||||
|
return erase(first, first + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::value_type &
|
||||||
|
Spanner<T, Head, Iter>::operator[](size_t n) const
|
||||||
|
{
|
||||||
|
return at(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
void
|
||||||
|
Spanner<T, Head, Iter>::clear()
|
||||||
|
{
|
||||||
|
m_begin = head_pointer();
|
||||||
|
m_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
bool
|
||||||
|
Spanner<T, Head, Iter>::empty() const
|
||||||
|
{
|
||||||
|
return m_size == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
size_t
|
||||||
|
Spanner<T, Head, Iter>::size() const
|
||||||
|
{
|
||||||
|
return m_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::value_type *
|
||||||
|
Spanner<T, Head, Iter>::data() const
|
||||||
|
{
|
||||||
|
return &(*m_begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::iterator
|
||||||
|
Spanner<T, Head, Iter>::begin() const
|
||||||
|
{
|
||||||
|
return data();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::iterator
|
||||||
|
Spanner<T, Head, Iter>::end() const
|
||||||
|
{
|
||||||
|
return data() + m_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Head, class Iter>
|
||||||
|
typename Spanner<T, Head, Iter>::value_type &
|
||||||
|
Spanner<T, Head, Iter>::at(size_t n) const
|
||||||
|
{
|
||||||
|
THROW_CHECK2(out_of_range, n, size(), n < size());
|
||||||
|
return *(data() + n);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif // CRUCIBLE_SPANNER_H
|
@@ -11,6 +11,23 @@
|
|||||||
namespace crucible {
|
namespace crucible {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
// Zero-initialize a base class object (usually a C struct)
|
||||||
|
template <class Base>
|
||||||
|
void
|
||||||
|
memset_zero(Base *that)
|
||||||
|
{
|
||||||
|
memset(that, 0, sizeof(Base));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy a base class object (usually a C struct) into a vector<uint8_t>
|
||||||
|
template <class Base>
|
||||||
|
vector<uint8_t>
|
||||||
|
vector_copy_struct(Base *that)
|
||||||
|
{
|
||||||
|
const uint8_t *begin_that = reinterpret_cast<const uint8_t *>(static_cast<const Base *>(that));
|
||||||
|
return vector<uint8_t>(begin_that, begin_that + sizeof(Base));
|
||||||
|
}
|
||||||
|
|
||||||
// int->hex conversion with sprintf
|
// int->hex conversion with sprintf
|
||||||
string to_hex(uint64_t i);
|
string to_hex(uint64_t i);
|
||||||
|
|
||||||
|
@@ -1,14 +0,0 @@
|
|||||||
#ifndef CRUCIBLE_UNAME_H
|
|
||||||
#define CRUCIBLE_UNAME_H
|
|
||||||
|
|
||||||
#include <sys/utsname.h>
|
|
||||||
|
|
||||||
namespace crucible {
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
struct Uname : public utsname {
|
|
||||||
Uname();
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@@ -4,7 +4,6 @@ default: libcrucible.a
|
|||||||
%.a: Makefile
|
%.a: Makefile
|
||||||
|
|
||||||
CRUCIBLE_OBJS = \
|
CRUCIBLE_OBJS = \
|
||||||
bytevector.o \
|
|
||||||
chatter.o \
|
chatter.o \
|
||||||
city.o \
|
city.o \
|
||||||
cleanup.o \
|
cleanup.o \
|
||||||
@@ -19,7 +18,6 @@ CRUCIBLE_OBJS = \
|
|||||||
string.o \
|
string.o \
|
||||||
task.o \
|
task.o \
|
||||||
time.o \
|
time.o \
|
||||||
uname.o \
|
|
||||||
|
|
||||||
include ../makeflags
|
include ../makeflags
|
||||||
-include ../localconf
|
-include ../localconf
|
||||||
|
@@ -1,147 +0,0 @@
|
|||||||
#include "crucible/bytevector.h"
|
|
||||||
|
|
||||||
#include "crucible/error.h"
|
|
||||||
|
|
||||||
namespace crucible {
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
ByteVector::iterator
|
|
||||||
ByteVector::begin() const
|
|
||||||
{
|
|
||||||
return m_ptr.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::iterator
|
|
||||||
ByteVector::end() const
|
|
||||||
{
|
|
||||||
return m_ptr.get() + m_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
ByteVector::size() const
|
|
||||||
{
|
|
||||||
return m_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
ByteVector::empty() const
|
|
||||||
{
|
|
||||||
return !m_ptr || !m_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
ByteVector::clear()
|
|
||||||
{
|
|
||||||
m_ptr.reset();
|
|
||||||
m_size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::value_type&
|
|
||||||
ByteVector::operator[](size_t size) const
|
|
||||||
{
|
|
||||||
return m_ptr.get()[size];
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::ByteVector(const ByteVector &that, size_t start, size_t length)
|
|
||||||
{
|
|
||||||
THROW_CHECK0(out_of_range, that.m_ptr);
|
|
||||||
THROW_CHECK2(out_of_range, start, that.m_size, start <= that.m_size);
|
|
||||||
THROW_CHECK2(out_of_range, start + length, that.m_size + length, start + length <= that.m_size + length);
|
|
||||||
m_ptr = Pointer(that.m_ptr, that.m_ptr.get() + start);
|
|
||||||
m_size = length;
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector
|
|
||||||
ByteVector::at(size_t start, size_t length) const
|
|
||||||
{
|
|
||||||
return ByteVector(*this, start, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::value_type&
|
|
||||||
ByteVector::at(size_t size) const
|
|
||||||
{
|
|
||||||
THROW_CHECK0(out_of_range, m_ptr);
|
|
||||||
THROW_CHECK2(out_of_range, size, m_size, size < m_size);
|
|
||||||
return m_ptr.get()[size];
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void *
|
|
||||||
bv_allocate(size_t size)
|
|
||||||
{
|
|
||||||
#ifdef BEES_VALGRIND
|
|
||||||
// XXX: only do this to shut up valgrind
|
|
||||||
return calloc(1, size);
|
|
||||||
#else
|
|
||||||
return malloc(size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::ByteVector(size_t size)
|
|
||||||
{
|
|
||||||
m_ptr = Pointer(static_cast<value_type*>(bv_allocate(size)), free);
|
|
||||||
// bad_alloc doesn't fit THROW_CHECK's template
|
|
||||||
THROW_CHECK0(runtime_error, m_ptr);
|
|
||||||
m_size = size;
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::ByteVector(iterator begin, iterator end, size_t min_size)
|
|
||||||
{
|
|
||||||
const size_t size = end - begin;
|
|
||||||
const size_t alloc_size = max(size, min_size);
|
|
||||||
m_ptr = Pointer(static_cast<value_type*>(bv_allocate(alloc_size)), free);
|
|
||||||
THROW_CHECK0(runtime_error, m_ptr);
|
|
||||||
m_size = alloc_size;
|
|
||||||
memcpy(m_ptr.get(), begin, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
ByteVector::operator==(const ByteVector &that) const
|
|
||||||
{
|
|
||||||
if (!m_ptr) {
|
|
||||||
return !that.m_ptr;
|
|
||||||
}
|
|
||||||
if (!that.m_ptr) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (m_size != that.m_size) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (m_ptr.get() == that.m_ptr.get()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return !memcmp(m_ptr.get(), that.m_ptr.get(), m_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
ByteVector::erase(iterator begin, iterator end)
|
|
||||||
{
|
|
||||||
const size_t size = end - begin;
|
|
||||||
if (!size) return;
|
|
||||||
THROW_CHECK0(out_of_range, m_ptr);
|
|
||||||
const iterator my_begin = m_ptr.get();
|
|
||||||
const iterator my_end = my_begin + m_size;
|
|
||||||
THROW_CHECK4(out_of_range, my_begin, begin, my_end, end, my_begin == begin || my_end == end);
|
|
||||||
if (begin == my_begin) {
|
|
||||||
if (end == my_end) {
|
|
||||||
m_size = 0;
|
|
||||||
m_ptr.reset();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
m_ptr = Pointer(m_ptr, end);
|
|
||||||
}
|
|
||||||
m_size -= size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
ByteVector::erase(iterator begin)
|
|
||||||
{
|
|
||||||
erase(begin, begin + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteVector::value_type*
|
|
||||||
ByteVector::data() const
|
|
||||||
{
|
|
||||||
return m_ptr.get();
|
|
||||||
}
|
|
||||||
}
|
|
@@ -496,7 +496,7 @@ namespace crucible {
|
|||||||
BtrfsExtentWalker::Vec
|
BtrfsExtentWalker::Vec
|
||||||
BtrfsExtentWalker::get_extent_map(off_t pos)
|
BtrfsExtentWalker::get_extent_map(off_t pos)
|
||||||
{
|
{
|
||||||
BtrfsIoctlSearchKey sk;
|
BtrfsIoctlSearchKey sk(65536);
|
||||||
if (!m_root_fd) {
|
if (!m_root_fd) {
|
||||||
m_root_fd = m_fd;
|
m_root_fd = m_fd;
|
||||||
}
|
}
|
||||||
@@ -640,7 +640,9 @@ namespace crucible {
|
|||||||
ExtentWalker::get_extent_map(off_t pos)
|
ExtentWalker::get_extent_map(off_t pos)
|
||||||
{
|
{
|
||||||
EWLOG("get_extent_map(" << to_hex(pos) << ")");
|
EWLOG("get_extent_map(" << to_hex(pos) << ")");
|
||||||
Fiemap fm(ranged_cast<uint64_t>(pos), ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos));
|
Fiemap fm;
|
||||||
|
fm.fm_start = ranged_cast<uint64_t>(pos);
|
||||||
|
fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos);
|
||||||
fm.m_max_count = fm.m_min_count = sc_extent_fetch_max;
|
fm.m_max_count = fm.m_min_count = sc_extent_fetch_max;
|
||||||
fm.do_ioctl(m_fd);
|
fm.do_ioctl(m_fd);
|
||||||
Vec rv;
|
Vec rv;
|
||||||
|
34
lib/fd.cc
34
lib/fd.cc
@@ -362,7 +362,7 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
int rv = ::pwrite(fd, buf, size, offset);
|
int rv = ::pwrite(fd, buf, size, offset);
|
||||||
if (rv != static_cast<int>(size)) {
|
if (rv != static_cast<int>(size)) {
|
||||||
THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at fd " << name_fd(fd) << " offset " << offset);
|
THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at offset " << offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ namespace crucible {
|
|||||||
THROW_ERRNO("pread: " << size << " bytes");
|
THROW_ERRNO("pread: " << size << " bytes");
|
||||||
}
|
}
|
||||||
if (rv != static_cast<int>(size)) {
|
if (rv != static_cast<int>(size)) {
|
||||||
THROW_ERROR(runtime_error, "pread: " << size << " bytes at fd " << name_fd(fd) << " offset " << offset << " returned " << rv);
|
THROW_ERROR(runtime_error, "pread: " << size << " bytes at offset " << offset << " returned " << rv);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -458,14 +458,28 @@ namespace crucible {
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
void
|
void
|
||||||
pread_or_die<ByteVector>(int fd, ByteVector &text, off_t offset)
|
pread_or_die<vector<char>>(int fd, vector<char> &text, off_t offset)
|
||||||
{
|
{
|
||||||
return pread_or_die(fd, text.data(), text.size(), offset);
|
return pread_or_die(fd, text.data(), text.size(), offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void
|
void
|
||||||
pwrite_or_die<ByteVector>(int fd, const ByteVector &text, off_t offset)
|
pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t> &text, off_t offset)
|
||||||
|
{
|
||||||
|
return pread_or_die(fd, text.data(), text.size(), offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void
|
||||||
|
pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t> &text, off_t offset)
|
||||||
|
{
|
||||||
|
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void
|
||||||
|
pwrite_or_die<vector<char>>(int fd, const vector<char> &text, off_t offset)
|
||||||
{
|
{
|
||||||
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||||
}
|
}
|
||||||
@@ -477,9 +491,9 @@ namespace crucible {
|
|||||||
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
Stat::Stat() :
|
Stat::Stat()
|
||||||
stat( (stat) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<stat>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
Stat &
|
Stat &
|
||||||
@@ -498,15 +512,15 @@ namespace crucible {
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
Stat::Stat(int fd) :
|
Stat::Stat(int fd)
|
||||||
stat( (stat) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<stat>(this);
|
||||||
fstat(fd);
|
fstat(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
Stat::Stat(const string &filename) :
|
Stat::Stat(const string &filename)
|
||||||
stat( (stat) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<stat>(this);
|
||||||
lstat(filename);
|
lstat(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
306
lib/fs.cc
306
lib/fs.cc
@@ -32,23 +32,19 @@ namespace crucible {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsExtentInfo::BtrfsExtentInfo(int dst_fd, off_t dst_offset) :
|
BtrfsExtentInfo::BtrfsExtentInfo(int dst_fd, off_t dst_offset)
|
||||||
btrfs_ioctl_same_extent_info( (btrfs_ioctl_same_extent_info) { } )
|
|
||||||
{
|
{
|
||||||
assert(fd == 0);
|
memset_zero<btrfs_ioctl_same_extent_info>(this);
|
||||||
assert(logical_offset == 0);
|
|
||||||
assert(bytes_deduped == 0);
|
|
||||||
assert(status == 0);
|
|
||||||
assert(reserved == 0);
|
|
||||||
fd = dst_fd;
|
fd = dst_fd;
|
||||||
logical_offset = dst_offset;
|
logical_offset = dst_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsExtentSame::BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length) :
|
BtrfsExtentSame::BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length) :
|
||||||
m_logical_offset(src_offset),
|
|
||||||
m_length(src_length),
|
|
||||||
m_fd(src_fd)
|
m_fd(src_fd)
|
||||||
{
|
{
|
||||||
|
memset_zero<btrfs_ioctl_same_args>(this);
|
||||||
|
logical_offset = src_offset;
|
||||||
|
length = src_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsExtentSame::~BtrfsExtentSame()
|
BtrfsExtentSame::~BtrfsExtentSame()
|
||||||
@@ -115,8 +111,11 @@ namespace crucible {
|
|||||||
os << " '" << fd_name << "'";
|
os << " '" << fd_name << "'";
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
os << ", .logical_offset = " << to_hex(bes.m_logical_offset);
|
os << ", .logical_offset = " << to_hex(bes.logical_offset);
|
||||||
os << ", .length = " << to_hex(bes.m_length);
|
os << ", .length = " << to_hex(bes.length);
|
||||||
|
os << ", .dest_count = " << bes.dest_count;
|
||||||
|
os << ", .reserved1 = " << bes.reserved1;
|
||||||
|
os << ", .reserved2 = " << bes.reserved2;
|
||||||
os << ", .info[] = {";
|
os << ", .info[] = {";
|
||||||
for (size_t i = 0; i < bes.m_info.size(); ++i) {
|
for (size_t i = 0; i < bes.m_info.size(); ++i) {
|
||||||
os << " [" << i << "] = " << &(bes.m_info[i]) << ",";
|
os << " [" << i << "] = " << &(bes.m_info[i]) << ",";
|
||||||
@@ -127,25 +126,22 @@ namespace crucible {
|
|||||||
void
|
void
|
||||||
btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset)
|
btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset)
|
||||||
{
|
{
|
||||||
btrfs_ioctl_clone_range_args args ( (btrfs_ioctl_clone_range_args) {
|
struct btrfs_ioctl_clone_range_args args;
|
||||||
.src_fd = src_fd,
|
memset_zero(&args);
|
||||||
.src_offset = ranged_cast<uint64_t, off_t>(src_offset),
|
args.src_fd = src_fd;
|
||||||
.src_length = ranged_cast<uint64_t, off_t>(src_length),
|
args.src_offset = src_offset;
|
||||||
.dest_offset = ranged_cast<uint64_t, off_t>(dst_offset),
|
args.src_length = src_length;
|
||||||
} );
|
args.dest_offset = dst_offset;
|
||||||
DIE_IF_MINUS_ONE(ioctl(dst_fd, BTRFS_IOC_CLONE_RANGE, &args));
|
DIE_IF_MINUS_ONE(ioctl(dst_fd, BTRFS_IOC_CLONE_RANGE, &args));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsExtentSame::do_ioctl()
|
BtrfsExtentSame::do_ioctl()
|
||||||
{
|
{
|
||||||
const size_t buf_size = sizeof(btrfs_ioctl_same_args) + m_info.size() * sizeof(btrfs_ioctl_same_extent_info);
|
dest_count = m_info.size();
|
||||||
ByteVector ioctl_arg( (btrfs_ioctl_same_args) {
|
vector<uint8_t> ioctl_arg = vector_copy_struct<btrfs_ioctl_same_args>(this);
|
||||||
.logical_offset = m_logical_offset,
|
ioctl_arg.resize(sizeof(btrfs_ioctl_same_args) + dest_count * sizeof(btrfs_ioctl_same_extent_info), 0);
|
||||||
.length = m_length,
|
btrfs_ioctl_same_args *ioctl_ptr = reinterpret_cast<btrfs_ioctl_same_args *>(ioctl_arg.data());
|
||||||
.dest_count = ranged_cast<decltype(btrfs_ioctl_same_args::dest_count)>(m_info.size()),
|
|
||||||
}, buf_size);
|
|
||||||
btrfs_ioctl_same_args *const ioctl_ptr = ioctl_arg.get<btrfs_ioctl_same_args>();
|
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
for (auto i = m_info.cbegin(); i != m_info.cend(); ++i) {
|
for (auto i = m_info.cbegin(); i != m_info.cend(); ++i) {
|
||||||
ioctl_ptr->info[count] = static_cast<const btrfs_ioctl_same_extent_info &>(m_info[count]);
|
ioctl_ptr->info[count] = static_cast<const btrfs_ioctl_same_extent_info &>(m_info[count]);
|
||||||
@@ -198,15 +194,18 @@ namespace crucible {
|
|||||||
void *
|
void *
|
||||||
BtrfsDataContainer::prepare(size_t container_size)
|
BtrfsDataContainer::prepare(size_t container_size)
|
||||||
{
|
{
|
||||||
|
if (m_data.size() < container_size) {
|
||||||
|
m_data.resize(container_size);
|
||||||
|
}
|
||||||
|
btrfs_data_container *p = reinterpret_cast<btrfs_data_container *>(m_data.data());
|
||||||
const size_t min_size = offsetof(btrfs_data_container, val);
|
const size_t min_size = offsetof(btrfs_data_container, val);
|
||||||
if (container_size < min_size) {
|
if (container_size < min_size) {
|
||||||
THROW_ERROR(out_of_range, "container size " << container_size << " smaller than minimum " << min_size);
|
THROW_ERROR(out_of_range, "container size " << container_size << " smaller than minimum " << min_size);
|
||||||
}
|
}
|
||||||
if (m_data.size() < container_size) {
|
p->bytes_left = 0;
|
||||||
m_data = ByteVector(container_size);
|
p->bytes_missing = 0;
|
||||||
}
|
p->elem_cnt = 0;
|
||||||
const auto p = m_data.get<btrfs_data_container>();
|
p->elem_missed = 0;
|
||||||
*p = (btrfs_data_container) { };
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,29 +218,25 @@ namespace crucible {
|
|||||||
decltype(btrfs_data_container::bytes_left)
|
decltype(btrfs_data_container::bytes_left)
|
||||||
BtrfsDataContainer::get_bytes_left() const
|
BtrfsDataContainer::get_bytes_left() const
|
||||||
{
|
{
|
||||||
const auto p = m_data.get<btrfs_data_container>();
|
return bytes_left;
|
||||||
return p->bytes_left;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
decltype(btrfs_data_container::bytes_missing)
|
decltype(btrfs_data_container::bytes_missing)
|
||||||
BtrfsDataContainer::get_bytes_missing() const
|
BtrfsDataContainer::get_bytes_missing() const
|
||||||
{
|
{
|
||||||
const auto p = m_data.get<btrfs_data_container>();
|
return bytes_missing;
|
||||||
return p->bytes_missing;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
decltype(btrfs_data_container::elem_cnt)
|
decltype(btrfs_data_container::elem_cnt)
|
||||||
BtrfsDataContainer::get_elem_cnt() const
|
BtrfsDataContainer::get_elem_cnt() const
|
||||||
{
|
{
|
||||||
const auto p = m_data.get<btrfs_data_container>();
|
return elem_cnt;
|
||||||
return p->elem_cnt;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
decltype(btrfs_data_container::elem_missed)
|
decltype(btrfs_data_container::elem_missed)
|
||||||
BtrfsDataContainer::get_elem_missed() const
|
BtrfsDataContainer::get_elem_missed() const
|
||||||
{
|
{
|
||||||
const auto p = m_data.get<btrfs_data_container>();
|
return elem_missed;
|
||||||
return p->elem_missed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
@@ -262,13 +257,10 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlLogicalInoArgs::BtrfsIoctlLogicalInoArgs(uint64_t new_logical, size_t new_size) :
|
BtrfsIoctlLogicalInoArgs::BtrfsIoctlLogicalInoArgs(uint64_t new_logical, size_t new_size) :
|
||||||
btrfs_ioctl_logical_ino_args( (btrfs_ioctl_logical_ino_args) { } ),
|
|
||||||
m_container_size(new_size),
|
m_container_size(new_size),
|
||||||
m_container(new_size)
|
m_container(new_size)
|
||||||
{
|
{
|
||||||
assert(logical == 0);
|
memset_zero<btrfs_ioctl_logical_ino_args>(this);
|
||||||
assert(size == 0);
|
|
||||||
assert(flags == 0);
|
|
||||||
logical = new_logical;
|
logical = new_logical;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,7 +328,7 @@ namespace crucible {
|
|||||||
bool
|
bool
|
||||||
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
||||||
{
|
{
|
||||||
btrfs_ioctl_logical_ino_args *const p = static_cast<btrfs_ioctl_logical_ino_args *>(this);
|
btrfs_ioctl_logical_ino_args *p = static_cast<btrfs_ioctl_logical_ino_args *>(this);
|
||||||
inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size));
|
inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size));
|
||||||
size = m_container.get_size();
|
size = m_container.get_size();
|
||||||
|
|
||||||
@@ -375,8 +367,8 @@ namespace crucible {
|
|||||||
bili_version = BTRFS_IOC_LOGICAL_INO_V2;
|
bili_version = BTRFS_IOC_LOGICAL_INO_V2;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
|
btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
|
||||||
BtrfsInodeOffsetRoot *const input_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);
|
BtrfsInodeOffsetRoot *input_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);
|
||||||
|
|
||||||
// elem_cnt counts uint64_t, but BtrfsInodeOffsetRoot is 3x uint64_t
|
// elem_cnt counts uint64_t, but BtrfsInodeOffsetRoot is 3x uint64_t
|
||||||
THROW_CHECK1(runtime_error, bdc->elem_cnt, bdc->elem_cnt % 3 == 0);
|
THROW_CHECK1(runtime_error, bdc->elem_cnt, bdc->elem_cnt % 3 == 0);
|
||||||
@@ -404,10 +396,9 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlInoPathArgs::BtrfsIoctlInoPathArgs(uint64_t inode, size_t new_size) :
|
BtrfsIoctlInoPathArgs::BtrfsIoctlInoPathArgs(uint64_t inode, size_t new_size) :
|
||||||
btrfs_ioctl_ino_path_args( (btrfs_ioctl_ino_path_args) { } ),
|
|
||||||
m_container_size(new_size)
|
m_container_size(new_size)
|
||||||
{
|
{
|
||||||
assert(inum == 0);
|
memset_zero<btrfs_ioctl_ino_path_args>(this);
|
||||||
inum = inode;
|
inum = inode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -425,14 +416,14 @@ namespace crucible {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
|
btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
|
||||||
m_paths.reserve(bdc->elem_cnt);
|
m_paths.reserve(bdc->elem_cnt);
|
||||||
|
|
||||||
const uint64_t *up = reinterpret_cast<const uint64_t *>(bdc->val);
|
const uint64_t *up = reinterpret_cast<const uint64_t *>(bdc->val);
|
||||||
const char *const cp = reinterpret_cast<const char *>(bdc->val);
|
const char *cp = reinterpret_cast<const char *>(bdc->val);
|
||||||
|
|
||||||
for (auto count = bdc->elem_cnt; count > 0; --count) {
|
for (auto count = bdc->elem_cnt; count > 0; --count) {
|
||||||
const char *const path = cp + *up++;
|
const char *path = cp + *up++;
|
||||||
if (static_cast<size_t>(path - cp) > container.get_size()) {
|
if (static_cast<size_t>(path - cp) > container.get_size()) {
|
||||||
THROW_ERROR(out_of_range, "offset " << (path - cp) << " > size " << container.get_size() << " in " << __PRETTY_FUNCTION__);
|
THROW_ERROR(out_of_range, "offset " << (path - cp) << " > size " << container.get_size() << " in " << __PRETTY_FUNCTION__);
|
||||||
}
|
}
|
||||||
@@ -467,10 +458,9 @@ namespace crucible {
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid) :
|
BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid)
|
||||||
btrfs_ioctl_ino_lookup_args( (btrfs_ioctl_ino_lookup_args) { } )
|
|
||||||
{
|
{
|
||||||
assert(objectid == 0);
|
memset_zero<btrfs_ioctl_ino_lookup_args>(this);
|
||||||
objectid = new_objectid;
|
objectid = new_objectid;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -488,9 +478,9 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs() :
|
BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs()
|
||||||
btrfs_ioctl_defrag_range_args( (btrfs_ioctl_defrag_range_args) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<btrfs_ioctl_defrag_range_args>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -547,9 +537,9 @@ namespace crucible {
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
FiemapExtent::FiemapExtent() :
|
FiemapExtent::FiemapExtent()
|
||||||
fiemap_extent( (fiemap_extent) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<fiemap_extent>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
FiemapExtent::FiemapExtent(const fiemap_extent &that)
|
FiemapExtent::FiemapExtent(const fiemap_extent &that)
|
||||||
@@ -656,10 +646,13 @@ namespace crucible {
|
|||||||
operator<<(ostream &os, const Fiemap &args)
|
operator<<(ostream &os, const Fiemap &args)
|
||||||
{
|
{
|
||||||
os << "Fiemap {";
|
os << "Fiemap {";
|
||||||
os << " .m_start = " << to_hex(args.m_start) << ".." << to_hex(args.m_start + args.m_length);
|
os << " .fm_start = " << to_hex(args.fm_start) << ".." << to_hex(args.fm_start + args.fm_length);
|
||||||
os << ", .m_length = " << to_hex(args.m_length);
|
os << ", .fm_length = " << to_hex(args.fm_length);
|
||||||
os << ", .m_flags = " << fiemap_flags_ntoa(args.m_flags);
|
if (args.fm_flags) os << ", .fm_flags = " << fiemap_flags_ntoa(args.fm_flags);
|
||||||
os << ", .fm_extents[" << args.m_extents.size() << "] = {";
|
os << ", .fm_mapped_extents = " << args.fm_mapped_extents;
|
||||||
|
os << ", .fm_extent_count = " << args.fm_extent_count;
|
||||||
|
if (args.fm_reserved) os << ", .fm_reserved = " << args.fm_reserved;
|
||||||
|
os << ", .fm_extents[] = {";
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
for (auto i = args.m_extents.cbegin(); i != args.m_extents.cend(); ++i) {
|
for (auto i = args.m_extents.cbegin(); i != args.m_extents.cend(); ++i) {
|
||||||
os << "\n\t[" << count++ << "] = " << &(*i) << ",";
|
os << "\n\t[" << count++ << "] = " << &(*i) << ",";
|
||||||
@@ -667,35 +660,41 @@ namespace crucible {
|
|||||||
return os << "\n}";
|
return os << "\n}";
|
||||||
}
|
}
|
||||||
|
|
||||||
Fiemap::Fiemap(uint64_t start, uint64_t length) :
|
Fiemap::Fiemap(uint64_t start, uint64_t length)
|
||||||
m_start(start),
|
|
||||||
m_length(length)
|
|
||||||
{
|
{
|
||||||
|
memset_zero<fiemap>(this);
|
||||||
|
fm_start = start;
|
||||||
|
fm_length = length;
|
||||||
|
// FIEMAP is slow and full of lines.
|
||||||
|
// This makes FIEMAP even slower, but reduces the lies a little.
|
||||||
|
fm_flags = FIEMAP_FLAG_SYNC;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Fiemap::do_ioctl(int fd)
|
Fiemap::do_ioctl(int fd)
|
||||||
{
|
{
|
||||||
THROW_CHECK1(out_of_range, m_min_count, m_min_count <= m_max_count);
|
THROW_CHECK1(out_of_range, m_min_count, m_min_count <= m_max_count);
|
||||||
THROW_CHECK1(out_of_range, m_min_count, m_min_count > 0);
|
|
||||||
|
|
||||||
const auto extent_count = m_min_count;
|
auto extent_count = m_min_count;
|
||||||
ByteVector ioctl_arg(sizeof(fiemap) + extent_count * sizeof(fiemap_extent));
|
vector<uint8_t> ioctl_arg = vector_copy_struct<fiemap>(this);
|
||||||
|
|
||||||
fiemap *const ioctl_ptr = ioctl_arg.get<fiemap>();
|
ioctl_arg.resize(sizeof(fiemap) + extent_count * sizeof(fiemap_extent), 0);
|
||||||
|
|
||||||
auto start = m_start;
|
fiemap *ioctl_ptr = reinterpret_cast<fiemap *>(ioctl_arg.data());
|
||||||
const auto end = m_start + m_length;
|
|
||||||
|
auto start = fm_start;
|
||||||
|
auto end = fm_start + fm_length;
|
||||||
|
|
||||||
|
auto orig_start = fm_start;
|
||||||
|
auto orig_length = fm_length;
|
||||||
|
|
||||||
vector<FiemapExtent> extents;
|
vector<FiemapExtent> extents;
|
||||||
|
|
||||||
while (start < end && extents.size() < m_max_count) {
|
while (start < end && extents.size() < m_max_count) {
|
||||||
*ioctl_ptr = (fiemap) {
|
ioctl_ptr->fm_start = start;
|
||||||
.fm_start = start,
|
ioctl_ptr->fm_length = end - start;
|
||||||
.fm_length = end - start,
|
ioctl_ptr->fm_extent_count = extent_count;
|
||||||
.fm_flags = m_flags,
|
ioctl_ptr->fm_mapped_extents = 0;
|
||||||
.fm_extent_count = extent_count,
|
|
||||||
};
|
|
||||||
|
|
||||||
// cerr << "Before (fd = " << fd << ") : " << ioctl_ptr << endl;
|
// cerr << "Before (fd = " << fd << ") : " << ioctl_ptr << endl;
|
||||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_FIEMAP, ioctl_ptr));
|
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_FIEMAP, ioctl_ptr));
|
||||||
@@ -721,89 +720,74 @@ namespace crucible {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fiemap *this_ptr = static_cast<fiemap *>(this);
|
||||||
|
*this_ptr = *ioctl_ptr;
|
||||||
|
fm_start = orig_start;
|
||||||
|
fm_length = orig_length;
|
||||||
|
fm_extent_count = extents.size();
|
||||||
m_extents = extents;
|
m_extents = extents;
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlSearchKey::BtrfsIoctlSearchKey(size_t buf_size) :
|
BtrfsIoctlSearchKey::BtrfsIoctlSearchKey(size_t buf_size) :
|
||||||
btrfs_ioctl_search_key( (btrfs_ioctl_search_key) {
|
|
||||||
.max_objectid = numeric_limits<decltype(max_objectid)>::max(),
|
|
||||||
.max_offset = numeric_limits<decltype(max_offset)>::max(),
|
|
||||||
.max_transid = numeric_limits<decltype(max_transid)>::max(),
|
|
||||||
.max_type = numeric_limits<decltype(max_type)>::max(),
|
|
||||||
.nr_items = 1,
|
|
||||||
}),
|
|
||||||
m_buf_size(buf_size)
|
m_buf_size(buf_size)
|
||||||
{
|
{
|
||||||
|
memset_zero<btrfs_ioctl_search_key>(this);
|
||||||
|
max_objectid = numeric_limits<decltype(max_objectid)>::max();
|
||||||
|
max_offset = numeric_limits<decltype(max_offset)>::max();
|
||||||
|
max_transid = numeric_limits<decltype(max_transid)>::max();
|
||||||
|
max_type = numeric_limits<decltype(max_type)>::max();
|
||||||
|
nr_items = numeric_limits<decltype(nr_items)>::max();
|
||||||
}
|
}
|
||||||
|
|
||||||
BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader() :
|
BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader()
|
||||||
btrfs_ioctl_search_header( (btrfs_ioctl_search_header) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<btrfs_ioctl_search_header>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
BtrfsIoctlSearchHeader::set_data(const ByteVector &v, size_t offset)
|
BtrfsIoctlSearchHeader::set_data(const vector<uint8_t> &v, size_t offset)
|
||||||
{
|
{
|
||||||
THROW_CHECK2(invalid_argument, offset, v.size(), offset + sizeof(btrfs_ioctl_search_header) <= v.size());
|
THROW_CHECK2(invalid_argument, offset, v.size(), offset + sizeof(btrfs_ioctl_search_header) <= v.size());
|
||||||
memcpy(static_cast<btrfs_ioctl_search_header *>(this), &v[offset], sizeof(btrfs_ioctl_search_header));
|
memcpy(static_cast<btrfs_ioctl_search_header *>(this), &v[offset], sizeof(btrfs_ioctl_search_header));
|
||||||
offset += sizeof(btrfs_ioctl_search_header);
|
offset += sizeof(btrfs_ioctl_search_header);
|
||||||
THROW_CHECK2(invalid_argument, offset + len, v.size(), offset + len <= v.size());
|
THROW_CHECK2(invalid_argument, offset + len, v.size(), offset + len <= v.size());
|
||||||
m_data = ByteVector(v, offset, len);
|
m_data = Spanner<const uint8_t>(&v[offset], &v[offset + len]);
|
||||||
return offset + len;
|
return offset + len;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
||||||
{
|
{
|
||||||
// It would be really nice if the kernel tells us whether our
|
// Normally we like to be paranoid and fill empty bytes with zero,
|
||||||
// buffer overflowed or how big the overflowing object
|
// but these buffers can be huge. 80% of a 4GHz CPU huge.
|
||||||
// was; instead, we have to guess.
|
|
||||||
|
// Keep the ioctl buffer from one run to the next to save on malloc costs
|
||||||
|
size_t target_buf_size = sizeof(btrfs_ioctl_search_args_v2) + m_buf_size;
|
||||||
|
|
||||||
|
m_ioctl_arg = vector_copy_struct<btrfs_ioctl_search_key>(this);
|
||||||
|
m_ioctl_arg.resize(target_buf_size);
|
||||||
|
|
||||||
m_result.clear();
|
m_result.clear();
|
||||||
// Make sure there is space for at least the search key and one (empty) header
|
|
||||||
size_t buf_size = max(m_buf_size, sizeof(btrfs_ioctl_search_args_v2) + sizeof(btrfs_ioctl_search_header));
|
|
||||||
ByteVector ioctl_arg;
|
|
||||||
btrfs_ioctl_search_args_v2 *ioctl_ptr;
|
|
||||||
do {
|
|
||||||
// ioctl buffer size does not include search key header or buffer size
|
|
||||||
ioctl_arg = ByteVector(buf_size + sizeof(btrfs_ioctl_search_args_v2));
|
|
||||||
ioctl_ptr = ioctl_arg.get<btrfs_ioctl_search_args_v2>();
|
|
||||||
ioctl_ptr->key = static_cast<const btrfs_ioctl_search_key&>(*this);
|
|
||||||
ioctl_ptr->buf_size = buf_size;
|
|
||||||
// Don't bother supporting V1. Kernels that old have other problems.
|
|
||||||
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
|
||||||
if (rv != 0 && errno != EOVERFLOW) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (rv == 0 && nr_items <= ioctl_ptr->key.nr_items) {
|
|
||||||
// got all the items we wanted, thanks
|
|
||||||
m_buf_size = max(m_buf_size, buf_size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Didn't get all the items we wanted. Increase the buf size and try again.
|
|
||||||
// These sizes are very common on default-formatted btrfs, so use these
|
|
||||||
// instead of naive doubling.
|
|
||||||
if (buf_size < 4096) {
|
|
||||||
buf_size = 4096;
|
|
||||||
} else if (buf_size < 16384) {
|
|
||||||
buf_size = 16384;
|
|
||||||
} else if (buf_size < 65536) {
|
|
||||||
buf_size = 65536;
|
|
||||||
} else {
|
|
||||||
buf_size *= 2;
|
|
||||||
}
|
|
||||||
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
|
||||||
} while (buf_size < 65536);
|
|
||||||
|
|
||||||
// ioctl changes nr_items, this has to be copied back
|
btrfs_ioctl_search_args_v2 *ioctl_ptr = reinterpret_cast<btrfs_ioctl_search_args_v2 *>(m_ioctl_arg.data());
|
||||||
|
|
||||||
|
ioctl_ptr->buf_size = m_buf_size;
|
||||||
|
|
||||||
|
// Don't bother supporting V1. Kernels that old have other problems.
|
||||||
|
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_ptr);
|
||||||
|
if (rv != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static_cast<btrfs_ioctl_search_key&>(*this) = ioctl_ptr->key;
|
static_cast<btrfs_ioctl_search_key&>(*this) = ioctl_ptr->key;
|
||||||
|
|
||||||
size_t offset = pointer_distance(ioctl_ptr->buf, ioctl_ptr);
|
size_t offset = pointer_distance(ioctl_ptr->buf, ioctl_ptr);
|
||||||
for (decltype(nr_items) i = 0; i < nr_items; ++i) {
|
for (decltype(nr_items) i = 0; i < nr_items; ++i) {
|
||||||
BtrfsIoctlSearchHeader item;
|
BtrfsIoctlSearchHeader item;
|
||||||
offset = item.set_data(ioctl_arg, offset);
|
offset = item.set_data(m_ioctl_arg, offset);
|
||||||
m_result.insert(item);
|
m_result.insert(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -811,7 +795,7 @@ namespace crucible {
|
|||||||
BtrfsIoctlSearchKey::do_ioctl(int fd)
|
BtrfsIoctlSearchKey::do_ioctl(int fd)
|
||||||
{
|
{
|
||||||
if (!do_ioctl_nothrow(fd)) {
|
if (!do_ioctl_nothrow(fd)) {
|
||||||
THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd) << ": " << *this);
|
THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -822,46 +806,8 @@ namespace crucible {
|
|||||||
min_type = ref.type;
|
min_type = ref.type;
|
||||||
min_offset = ref.offset + 1;
|
min_offset = ref.offset + 1;
|
||||||
if (min_offset < ref.offset) {
|
if (min_offset < ref.offset) {
|
||||||
// We wrapped, try the next type
|
// We wrapped, try the next objectid
|
||||||
++min_type;
|
++min_objectid;
|
||||||
assert(min_offset == 0);
|
|
||||||
if (min_type < ref.type) {
|
|
||||||
assert(min_type == 0);
|
|
||||||
// We wrapped, try the next objectid
|
|
||||||
++min_objectid;
|
|
||||||
// no advancement possible at end
|
|
||||||
THROW_CHECK1(runtime_error, min_type, min_type == 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
BtrfsIoctlSearchKey::next_min(const BtrfsIoctlSearchHeader &ref, const uint8_t type)
|
|
||||||
{
|
|
||||||
if (ref.type < type) {
|
|
||||||
// forward to type in same object with zero offset
|
|
||||||
min_objectid = ref.objectid;
|
|
||||||
min_type = type;
|
|
||||||
min_offset = 0;
|
|
||||||
} else if (ref.type > type) {
|
|
||||||
// skip directly to start of next objectid with target type
|
|
||||||
min_objectid = ref.objectid + 1;
|
|
||||||
// no advancement possible at end
|
|
||||||
THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
|
|
||||||
min_type = type;
|
|
||||||
min_offset = 0;
|
|
||||||
} else {
|
|
||||||
// advance within this type
|
|
||||||
min_objectid = ref.objectid;
|
|
||||||
min_type = ref.type;
|
|
||||||
min_offset = ref.offset + 1;
|
|
||||||
if (min_offset < ref.offset) {
|
|
||||||
// We wrapped, try the next objectid, same type
|
|
||||||
++min_objectid;
|
|
||||||
THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
|
|
||||||
min_type = type;
|
|
||||||
assert(min_offset == 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -869,7 +815,7 @@ namespace crucible {
|
|||||||
ostream &
|
ostream &
|
||||||
hexdump(ostream &os, const V &v)
|
hexdump(ostream &os, const V &v)
|
||||||
{
|
{
|
||||||
os << "V { size = " << v.size() << ", data:\n";
|
os << "vector<uint8_t> { size = " << v.size() << ", data:\n";
|
||||||
for (size_t i = 0; i < v.size(); i += 8) {
|
for (size_t i = 0; i < v.size(); i += 8) {
|
||||||
string hex, ascii;
|
string hex, ascii;
|
||||||
for (size_t j = i; j < i + 8; ++j) {
|
for (size_t j = i; j < i + 8; ++j) {
|
||||||
@@ -1083,9 +1029,9 @@ namespace crucible {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
Statvfs::Statvfs() :
|
Statvfs::Statvfs()
|
||||||
statvfs( (statvfs) { } )
|
|
||||||
{
|
{
|
||||||
|
memset_zero<statvfs>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
Statvfs::Statvfs(int fd) :
|
Statvfs::Statvfs(int fd) :
|
||||||
@@ -1136,20 +1082,16 @@ namespace crucible {
|
|||||||
return os << " }";
|
return os << " }";
|
||||||
};
|
};
|
||||||
|
|
||||||
BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs() :
|
BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs()
|
||||||
btrfs_ioctl_fs_info_args_v3( (btrfs_ioctl_fs_info_args_v3) {
|
|
||||||
.flags = 0
|
|
||||||
| BTRFS_FS_INFO_FLAG_CSUM_INFO
|
|
||||||
| BTRFS_FS_INFO_FLAG_GENERATION
|
|
||||||
,
|
|
||||||
})
|
|
||||||
{
|
{
|
||||||
|
memset_zero<btrfs_ioctl_fs_info_args_v2>(this);
|
||||||
|
flags = BTRFS_FS_INFO_FLAG_CSUM_INFO;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsIoctlFsInfoArgs::do_ioctl(int fd)
|
BtrfsIoctlFsInfoArgs::do_ioctl(int fd)
|
||||||
{
|
{
|
||||||
btrfs_ioctl_fs_info_args_v3 *p = static_cast<btrfs_ioctl_fs_info_args_v3 *>(this);
|
btrfs_ioctl_fs_info_args_v2 *p = static_cast<btrfs_ioctl_fs_info_args_v2 *>(this);
|
||||||
if (ioctl(fd, BTRFS_IOC_FS_INFO, p)) {
|
if (ioctl(fd, BTRFS_IOC_FS_INFO, p)) {
|
||||||
THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
|
THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
|
||||||
}
|
}
|
||||||
@@ -1158,19 +1100,13 @@ namespace crucible {
|
|||||||
uint16_t
|
uint16_t
|
||||||
BtrfsIoctlFsInfoArgs::csum_type() const
|
BtrfsIoctlFsInfoArgs::csum_type() const
|
||||||
{
|
{
|
||||||
return this->btrfs_ioctl_fs_info_args_v3::csum_type;
|
return this->btrfs_ioctl_fs_info_args_v2::csum_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t
|
uint16_t
|
||||||
BtrfsIoctlFsInfoArgs::csum_size() const
|
BtrfsIoctlFsInfoArgs::csum_size() const
|
||||||
{
|
{
|
||||||
return this->btrfs_ioctl_fs_info_args_v3::csum_size;
|
return this->btrfs_ioctl_fs_info_args_v2::csum_size;
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
|
||||||
BtrfsIoctlFsInfoArgs::generation() const
|
|
||||||
{
|
|
||||||
return this->btrfs_ioctl_fs_info_args_v3::generation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
28
lib/task.cc
28
lib/task.cc
@@ -89,7 +89,6 @@ namespace crucible {
|
|||||||
|
|
||||||
TaskState &operator=(const TaskState &) = delete;
|
TaskState &operator=(const TaskState &) = delete;
|
||||||
TaskState(const TaskState &) = delete;
|
TaskState(const TaskState &) = delete;
|
||||||
TaskState(TaskState &&) = delete;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
~TaskState();
|
~TaskState();
|
||||||
@@ -200,19 +199,13 @@ namespace crucible {
|
|||||||
tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
|
tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
|
||||||
} else {
|
} else {
|
||||||
// We are not executing under a TaskConsumer.
|
// We are not executing under a TaskConsumer.
|
||||||
// If there is only one task, then just insert it at the front of the queue.
|
// Create a new task to wrap our post-exec queue,
|
||||||
if (queue.size() == 1) {
|
// then push it to the front of the global queue using normal locking methods.
|
||||||
TaskMasterState::push_front(queue);
|
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
||||||
} else {
|
swap(rescue_task->m_post_exec_queue, queue);
|
||||||
// If there are multiple tasks, create a new task to wrap our post-exec queue,
|
TaskQueue tq_one { rescue_task };
|
||||||
// then push it to the front of the global queue using normal locking methods.
|
TaskMasterState::push_front(tq_one);
|
||||||
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
|
||||||
swap(rescue_task->m_post_exec_queue, queue);
|
|
||||||
TaskQueue tq_one { rescue_task };
|
|
||||||
TaskMasterState::push_front(tq_one);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
assert(queue.empty());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskState::~TaskState()
|
TaskState::~TaskState()
|
||||||
@@ -293,23 +286,23 @@ namespace crucible {
|
|||||||
--m_run_count;
|
--m_run_count;
|
||||||
m_is_running = true;
|
m_is_running = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskStatePtr this_task = shared_from_this();
|
|
||||||
swap(this_task, tl_current_task);
|
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
char buf[24] = { 0 };
|
char buf[24] = { 0 };
|
||||||
DIE_IF_MINUS_ERRNO(pthread_getname_np(pthread_self(), buf, sizeof(buf)));
|
DIE_IF_MINUS_ERRNO(pthread_getname_np(pthread_self(), buf, sizeof(buf)));
|
||||||
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_title.c_str()));
|
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_title.c_str()));
|
||||||
|
|
||||||
|
TaskStatePtr this_task = shared_from_this();
|
||||||
|
swap(this_task, tl_current_task);
|
||||||
|
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
m_exec_fn();
|
m_exec_fn();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
swap(this_task, tl_current_task);
|
||||||
pthread_setname_np(pthread_self(), buf);
|
pthread_setname_np(pthread_self(), buf);
|
||||||
|
|
||||||
lock.lock();
|
lock.lock();
|
||||||
swap(this_task, tl_current_task);
|
|
||||||
m_is_running = false;
|
m_is_running = false;
|
||||||
|
|
||||||
// Splice task post_exec queue at front of local queue
|
// Splice task post_exec queue at front of local queue
|
||||||
@@ -756,7 +749,6 @@ namespace crucible {
|
|||||||
// There is no longer a current consumer, but hold our own shared
|
// There is no longer a current consumer, but hold our own shared
|
||||||
// state so it's still there in the destructor
|
// state so it's still there in the destructor
|
||||||
swap(this_consumer, tl_current_consumer);
|
swap(this_consumer, tl_current_consumer);
|
||||||
assert(!tl_current_consumer);
|
|
||||||
|
|
||||||
// Release lock to rescue queue (may attempt to queue a new task at TaskMaster).
|
// Release lock to rescue queue (may attempt to queue a new task at TaskMaster).
|
||||||
// rescue_queue normally sends tasks to the local queue of the current TaskConsumer thread,
|
// rescue_queue normally sends tasks to the local queue of the current TaskConsumer thread,
|
||||||
|
11
lib/uname.cc
11
lib/uname.cc
@@ -1,11 +0,0 @@
|
|||||||
#include "crucible/error.h"
|
|
||||||
#include "crucible/uname.h"
|
|
||||||
|
|
||||||
namespace crucible {
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
Uname::Uname()
|
|
||||||
{
|
|
||||||
DIE_IF_NON_ZERO(uname(static_cast<utsname*>(this)));
|
|
||||||
}
|
|
||||||
}
|
|
@@ -10,4 +10,4 @@ CCFLAGS = -Wall -Wextra -Werror -O3
|
|||||||
CCFLAGS += -I../include -D_FILE_OFFSET_BITS=64
|
CCFLAGS += -I../include -D_FILE_OFFSET_BITS=64
|
||||||
|
|
||||||
BEES_CFLAGS = $(CCFLAGS) -std=c99 $(CFLAGS)
|
BEES_CFLAGS = $(CCFLAGS) -std=c99 $(CFLAGS)
|
||||||
BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast -Wno-missing-field-initializers $(CXXFLAGS)
|
BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast $(CXXFLAGS)
|
||||||
|
@@ -31,18 +31,20 @@ help(){
|
|||||||
exec "$bees_bin" --help
|
exec "$bees_bin" --help
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in $("$bees_bin" --help 2>&1 | grep -E " --" | sed -e "s/^[^-]*-/-/" -e "s/,[^-]*--/ --/" -e "s/ [^-]*$//")
|
get_bees_supp_opts(){
|
||||||
do
|
"$bees_bin" --help |& awk '/--../ { gsub( ",", "" ); print $1 " " $2}'
|
||||||
TMP_ARGS="$TMP_ARGS $i"
|
}
|
||||||
done
|
|
||||||
IFS=" " read -r -a SUPPORTED_ARGS <<< $TMP_ARGS
|
SUPPORTED_ARGS=(
|
||||||
|
$(get_bees_supp_opts)
|
||||||
|
)
|
||||||
NOT_SUPPORTED_ARGS=()
|
NOT_SUPPORTED_ARGS=()
|
||||||
ARGUMENTS=()
|
ARGUMENTS=()
|
||||||
|
|
||||||
for arg in "${@}"; do
|
for arg in "${@}"; do
|
||||||
supp=false
|
supp=false
|
||||||
for supp_arg in "${SUPPORTED_ARGS[@]}"; do
|
for supp_arg in "${SUPPORTED_ARGS[@]}"; do
|
||||||
if [[ "$arg" == ${supp_arg}* ]]; then
|
if [ "$arg" == "$supp_arg" ]; then
|
||||||
supp=true
|
supp=true
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
@@ -71,7 +73,7 @@ done
|
|||||||
[ -z "$UUID" ] && help
|
[ -z "$UUID" ] && help
|
||||||
|
|
||||||
|
|
||||||
FILE_CONFIG="$(grep -E -l '^[^#]*UUID\s*=\s*"?'"$UUID" "$CONFIG_DIR"/*.conf | head -1)"
|
FILE_CONFIG="$(egrep -l '^[^#]*UUID\s*=\s*"?'"$UUID" "$CONFIG_DIR"/*.conf | head -1)"
|
||||||
[ ! -f "$FILE_CONFIG" ] && ERRO "No config for $UUID"
|
[ ! -f "$FILE_CONFIG" ] && ERRO "No config for $UUID"
|
||||||
INFO "Find $UUID in $FILE_CONFIG, use as conf"
|
INFO "Find $UUID in $FILE_CONFIG, use as conf"
|
||||||
source "$FILE_CONFIG"
|
source "$FILE_CONFIG"
|
||||||
|
@@ -17,7 +17,6 @@ KillSignal=SIGTERM
|
|||||||
MemoryAccounting=true
|
MemoryAccounting=true
|
||||||
Nice=19
|
Nice=19
|
||||||
Restart=on-abnormal
|
Restart=on-abnormal
|
||||||
RuntimeDirectory=bees
|
|
||||||
StartupCPUWeight=25
|
StartupCPUWeight=25
|
||||||
StartupIOWeight=25
|
StartupIOWeight=25
|
||||||
|
|
||||||
|
13
src/Makefile
13
src/Makefile
@@ -1,5 +1,9 @@
|
|||||||
BEES = ../bin/bees
|
BEES = ../bin/bees
|
||||||
PROGRAMS = \
|
PROGRAMS = \
|
||||||
|
../bin/bees \
|
||||||
|
../bin/btrsame \
|
||||||
|
../bin/clone-cat \
|
||||||
|
../bin/clone-split \
|
||||||
../bin/fiemap \
|
../bin/fiemap \
|
||||||
../bin/fiewalk \
|
../bin/fiewalk \
|
||||||
|
|
||||||
@@ -59,5 +63,14 @@ $(PROGRAMS): ../lib/libcrucible.a
|
|||||||
$(BEES): $(BEES_OBJS) bees-version.o bees-usage.o ../lib/libcrucible.a
|
$(BEES): $(BEES_OBJS) bees-version.o bees-usage.o ../lib/libcrucible.a
|
||||||
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
../bin/btrsame: btrsame.o
|
||||||
|
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
../bin/clone-cat: clone-cat.o
|
||||||
|
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
../bin/clone-split: clone-split.o
|
||||||
|
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -fv *.o bees-version.c
|
rm -fv *.o bees-version.c
|
||||||
|
@@ -187,20 +187,20 @@ BeesContext::is_root_ro(uint64_t root)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BeesContext::dedup(const BeesRangePair &brp_in)
|
BeesContext::dedup(const BeesRangePair &brp)
|
||||||
{
|
{
|
||||||
// TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't
|
// TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't
|
||||||
BEESNOTE("dedup " << brp_in);
|
BEESNOTE("dedup " << brp);
|
||||||
|
|
||||||
if (is_root_ro(brp_in.second.fid().root())) {
|
brp.second.fd(shared_from_this());
|
||||||
// BEESLOGDEBUG("WORKAROUND: dst root " << (brp_in.second.fid().root()) << " is read-only);
|
|
||||||
|
if (is_root_ro(brp.second.fid().root())) {
|
||||||
|
// BEESLOGDEBUG("WORKAROUND: dst root is read-only in " << name_fd(brp.second.fd()));
|
||||||
BEESCOUNT(dedup_workaround_btrfs_send);
|
BEESCOUNT(dedup_workaround_btrfs_send);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto brp = brp_in;
|
|
||||||
brp.first.fd(shared_from_this());
|
brp.first.fd(shared_from_this());
|
||||||
brp.second.fd(shared_from_this());
|
|
||||||
|
|
||||||
BEESTOOLONG("dedup " << brp);
|
BEESTOOLONG("dedup " << brp);
|
||||||
|
|
||||||
@@ -209,8 +209,6 @@ BeesContext::dedup(const BeesRangePair &brp_in)
|
|||||||
|
|
||||||
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||||
BEESNOTE("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
|
||||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
|
||||||
|
|
||||||
if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) {
|
if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) {
|
||||||
BEESLOGTRACE("equal physical addresses in dedup");
|
BEESLOGTRACE("equal physical addresses in dedup");
|
||||||
@@ -294,15 +292,6 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
BEESTRACE("scan extent " << e);
|
BEESTRACE("scan extent " << e);
|
||||||
BEESCOUNT(scan_extent);
|
BEESCOUNT(scan_extent);
|
||||||
|
|
||||||
// EXPERIMENT: Don't bother with tiny extents unless they are the entire file.
|
|
||||||
// We'll take a tiny extent at BOF or EOF but not in between.
|
|
||||||
if (e.begin() && e.size() < 128 * 1024 && e.end() != Stat(bfr.fd()).st_size) {
|
|
||||||
BEESCOUNT(scan_extent_tiny);
|
|
||||||
// This doesn't work properly with the current architecture,
|
|
||||||
// so we don't do an early return here.
|
|
||||||
// return bfr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We keep moving this method around
|
// We keep moving this method around
|
||||||
auto m_ctx = shared_from_this();
|
auto m_ctx = shared_from_this();
|
||||||
|
|
||||||
@@ -719,28 +708,27 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Costs 10% on benchmarks
|
|
||||||
// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
|
||||||
return bfr;
|
return bfr;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesFileRange
|
||||||
BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
BeesContext::scan_forward(const BeesFileRange &bfr)
|
||||||
{
|
{
|
||||||
BEESTRACE("scan_forward " << bfr_in);
|
// What are we doing here?
|
||||||
|
BEESTRACE("scan_forward " << bfr);
|
||||||
BEESCOUNT(scan_forward);
|
BEESCOUNT(scan_forward);
|
||||||
|
|
||||||
Timer scan_timer;
|
Timer scan_timer;
|
||||||
|
|
||||||
// Silently filter out blacklisted files
|
// Silently filter out blacklisted files
|
||||||
if (is_blacklisted(bfr_in.fid())) {
|
if (is_blacklisted(bfr.fid())) {
|
||||||
BEESCOUNT(scan_blacklisted);
|
BEESCOUNT(scan_blacklisted);
|
||||||
return bfr_in;
|
return bfr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BEESNOTE("scan open " << bfr);
|
||||||
|
|
||||||
// Reconstitute FD
|
// Reconstitute FD
|
||||||
BEESNOTE("scan open " << bfr_in);
|
|
||||||
auto bfr = bfr_in;
|
|
||||||
bfr.fd(shared_from_this());
|
bfr.fd(shared_from_this());
|
||||||
|
|
||||||
BEESNOTE("scan extent " << bfr);
|
BEESNOTE("scan extent " << bfr);
|
||||||
@@ -808,7 +796,8 @@ BeesContext::wait_for_balance()
|
|||||||
Timer balance_timer;
|
Timer balance_timer;
|
||||||
BEESNOTE("WORKAROUND: waiting for balance to stop");
|
BEESNOTE("WORKAROUND: waiting for balance to stop");
|
||||||
while (true) {
|
while (true) {
|
||||||
btrfs_ioctl_balance_args args {};
|
btrfs_ioctl_balance_args args;
|
||||||
|
memset_zero<btrfs_ioctl_balance_args>(&args);
|
||||||
const int ret = ioctl(root_fd(), BTRFS_IOC_BALANCE_PROGRESS, &args);
|
const int ret = ioctl(root_fd(), BTRFS_IOC_BALANCE_PROGRESS, &args);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
// Either can't get balance status or not running, exit either way
|
// Either can't get balance status or not running, exit either way
|
||||||
@@ -846,6 +835,24 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
|||||||
// transaction latency, competing threads, and freeze/SIGSTOP
|
// transaction latency, competing threads, and freeze/SIGSTOP
|
||||||
// pausing the bees process.
|
// pausing the bees process.
|
||||||
|
|
||||||
|
// There can be only one of these running at a time, or some lingering
|
||||||
|
// backref bug will kill the whole system. Also it looks like there
|
||||||
|
// are so many locks held while LOGICAL_INO runs that there is no
|
||||||
|
// point in trying to run two of them on the same filesystem.
|
||||||
|
// ...but it works most of the time, and the performance hit from
|
||||||
|
// not running resolve in multiple threads is significant.
|
||||||
|
// But "most of the time" really just means "between forced reboots",
|
||||||
|
// and with recent improvements in kernel uptime, this is now in the
|
||||||
|
// top 3 crash causes.
|
||||||
|
static mutex s_resolve_mutex;
|
||||||
|
unique_lock<mutex> lock(s_resolve_mutex, defer_lock);
|
||||||
|
if (BEES_SERIALIZE_RESOLVE) {
|
||||||
|
BEESNOTE("waiting to resolve addr " << addr);
|
||||||
|
lock.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is there a bug where resolve and balance cause a crash (BUG_ON at fs/btrfs/ctree.c:1227)?
|
||||||
|
// Apparently yes, and more than one.
|
||||||
// Wait for the balance to finish before we run LOGICAL_INO
|
// Wait for the balance to finish before we run LOGICAL_INO
|
||||||
wait_for_balance();
|
wait_for_balance();
|
||||||
|
|
||||||
@@ -873,15 +880,15 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
|||||||
struct rusage usage_after;
|
struct rusage usage_after;
|
||||||
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));
|
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));
|
||||||
|
|
||||||
const double sys_usage_delta =
|
double sys_usage_delta =
|
||||||
(usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) -
|
(usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) -
|
||||||
(usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0);
|
(usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0);
|
||||||
|
|
||||||
const double user_usage_delta =
|
double user_usage_delta =
|
||||||
(usage_after.ru_utime.tv_sec + usage_after.ru_utime.tv_usec / 1000000.0) -
|
(usage_after.ru_utime.tv_sec + usage_after.ru_utime.tv_usec / 1000000.0) -
|
||||||
(usage_before.ru_utime.tv_sec + usage_before.ru_utime.tv_usec / 1000000.0);
|
(usage_before.ru_utime.tv_sec + usage_before.ru_utime.tv_usec / 1000000.0);
|
||||||
|
|
||||||
const auto rt_age = resolve_timer.age();
|
auto rt_age = resolve_timer.age();
|
||||||
|
|
||||||
BeesResolveAddrResult rv;
|
BeesResolveAddrResult rv;
|
||||||
|
|
||||||
@@ -905,13 +912,12 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
|||||||
|
|
||||||
// Count how many times this happens so we can figure out how
|
// Count how many times this happens so we can figure out how
|
||||||
// important this case is
|
// important this case is
|
||||||
static const size_t max_logical_ino_v1_refs = 2730; // (65536 - header_len) / (sizeof(uint64_t) * 3)
|
static size_t most_refs_ever = 2730;
|
||||||
static size_t most_refs_ever = max_logical_ino_v1_refs;
|
|
||||||
if (rv_count > most_refs_ever) {
|
if (rv_count > most_refs_ever) {
|
||||||
BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever);
|
BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever);
|
||||||
most_refs_ever = rv_count;
|
most_refs_ever = rv_count;
|
||||||
}
|
}
|
||||||
if (rv_count > max_logical_ino_v1_refs) {
|
if (rv_count > 2730) {
|
||||||
BEESCOUNT(resolve_large);
|
BEESCOUNT(resolve_large);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1054,13 +1060,9 @@ BeesContext::stop()
|
|||||||
BEESLOGDEBUG("Waiting for progress thread");
|
BEESLOGDEBUG("Waiting for progress thread");
|
||||||
m_progress_thread->join();
|
m_progress_thread->join();
|
||||||
|
|
||||||
// Write status once with this message...
|
// XXX: nobody can see this BEESNOTE because we are killing the
|
||||||
BEESNOTE("stopping status thread at " << stop_timer << " sec");
|
// thread that publishes it
|
||||||
lock.lock();
|
BEESNOTE("waiting for status thread");
|
||||||
m_stop_condvar.notify_all();
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
// then wake the thread up one more time to exit the while loop
|
|
||||||
BEESLOGDEBUG("Waiting for status thread");
|
BEESLOGDEBUG("Waiting for status thread");
|
||||||
lock.lock();
|
lock.lock();
|
||||||
m_stop_status = true;
|
m_stop_status = true;
|
||||||
|
@@ -3,9 +3,9 @@
|
|||||||
#include "crucible/city.h"
|
#include "crucible/city.h"
|
||||||
#include "crucible/crc64.h"
|
#include "crucible/crc64.h"
|
||||||
#include "crucible/string.h"
|
#include "crucible/string.h"
|
||||||
#include "crucible/uname.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
|||||||
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
||||||
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||||
// Copy the extent because we might be stuck writing for a while
|
// Copy the extent because we might be stuck writing for a while
|
||||||
ByteVector extent_copy(dirty_extent, dirty_extent_end);
|
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
|
||||||
|
|
||||||
// Mark extent non-dirty while we still hold the lock
|
// Mark extent non-dirty while we still hold the lock
|
||||||
m_extent_metadata.at(extent_index).m_dirty = false;
|
m_extent_metadata.at(extent_index).m_dirty = false;
|
||||||
@@ -206,11 +206,8 @@ BeesHashTable::writeback_loop()
|
|||||||
}
|
}
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
// trigger writeback on our way out
|
// trigger writeback on our way out
|
||||||
#if 0
|
|
||||||
// seems to trigger huge latency spikes
|
|
||||||
BEESTOOLONG("unreadahead hash table size " << pretty(m_size));
|
BEESTOOLONG("unreadahead hash table size " << pretty(m_size));
|
||||||
bees_unreadahead(m_fd, 0, m_size);
|
bees_unreadahead(m_fd, 0, m_size);
|
||||||
#endif
|
|
||||||
});
|
});
|
||||||
BEESLOGDEBUG("Exited hash table writeback_loop");
|
BEESLOGDEBUG("Exited hash table writeback_loop");
|
||||||
}
|
}
|
||||||
@@ -229,7 +226,6 @@ percent(size_t num, size_t den)
|
|||||||
void
|
void
|
||||||
BeesHashTable::prefetch_loop()
|
BeesHashTable::prefetch_loop()
|
||||||
{
|
{
|
||||||
Uname uname;
|
|
||||||
bool not_locked = true;
|
bool not_locked = true;
|
||||||
while (!m_stop_requested) {
|
while (!m_stop_requested) {
|
||||||
size_t width = 64;
|
size_t width = 64;
|
||||||
@@ -323,7 +319,6 @@ BeesHashTable::prefetch_loop()
|
|||||||
graph_blob << "Now: " << format_time(time(NULL)) << "\n";
|
graph_blob << "Now: " << format_time(time(NULL)) << "\n";
|
||||||
graph_blob << "Uptime: " << m_ctx->total_timer().age() << " seconds\n";
|
graph_blob << "Uptime: " << m_ctx->total_timer().age() << " seconds\n";
|
||||||
graph_blob << "Version: " << BEES_VERSION << "\n";
|
graph_blob << "Version: " << BEES_VERSION << "\n";
|
||||||
graph_blob << "Kernel: " << uname.sysname << " " << uname.release << " " << uname.machine << " " << uname.version << "\n";
|
|
||||||
|
|
||||||
graph_blob
|
graph_blob
|
||||||
<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
|
<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
|
||||||
@@ -543,8 +538,6 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
|||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_local uniform_int_distribution<size_t> BeesHashTable::tl_distribution(0, c_cells_per_bucket - 1);
|
|
||||||
|
|
||||||
/// Insert a hash entry at some unspecified point in the list.
|
/// Insert a hash entry at some unspecified point in the list.
|
||||||
/// If entry is already present in list, returns true and does not
|
/// If entry is already present in list, returns true and does not
|
||||||
/// modify list. If entry is not present in list, returns false and
|
/// modify list. If entry is not present in list, returns false and
|
||||||
@@ -562,7 +555,9 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
|||||||
Cell *ip = find(er.first, er.second, mv);
|
Cell *ip = find(er.first, er.second, mv);
|
||||||
bool found = (ip < er.second);
|
bool found = (ip < er.second);
|
||||||
|
|
||||||
const auto pos = tl_distribution(bees_generator);
|
thread_local default_random_engine generator;
|
||||||
|
thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
|
||||||
|
auto pos = distribution(generator);
|
||||||
|
|
||||||
int case_cond = 0;
|
int case_cond = 0;
|
||||||
#if 0
|
#if 0
|
||||||
|
@@ -385,15 +385,14 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
|
|||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesFileRange
|
||||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||||
{
|
{
|
||||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
BEESTRACE("replace_dst dst_bfr " << dst_bfr);
|
||||||
BEESCOUNT(replacedst_try);
|
BEESCOUNT(replacedst_try);
|
||||||
|
|
||||||
// Open dst, reuse it for all src
|
// Open dst, reuse it for all src
|
||||||
BEESNOTE("Opening dst bfr " << dst_bfr_in);
|
BEESNOTE("Opening dst bfr " << dst_bfr);
|
||||||
BEESTRACE("Opening dst bfr " << dst_bfr_in);
|
BEESTRACE("Opening dst bfr " << dst_bfr);
|
||||||
auto dst_bfr = dst_bfr_in;
|
|
||||||
dst_bfr.fd(m_ctx);
|
dst_bfr.fd(m_ctx);
|
||||||
|
|
||||||
BeesFileRange overlap_bfr;
|
BeesFileRange overlap_bfr;
|
||||||
@@ -401,11 +400,10 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
|
|
||||||
BeesBlockData bbd(dst_bfr);
|
BeesBlockData bbd(dst_bfr);
|
||||||
|
|
||||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
|
||||||
// Open src
|
// Open src
|
||||||
BEESNOTE("Opening src bfr " << src_bfr_in);
|
BEESNOTE("Opening src bfr " << src_bfr);
|
||||||
BEESTRACE("Opening src bfr " << src_bfr_in);
|
BEESTRACE("Opening src bfr " << src_bfr);
|
||||||
auto src_bfr = src_bfr_in;
|
|
||||||
src_bfr.fd(m_ctx);
|
src_bfr.fd(m_ctx);
|
||||||
|
|
||||||
if (dst_bfr.overlaps(src_bfr)) {
|
if (dst_bfr.overlaps(src_bfr)) {
|
||||||
@@ -420,7 +418,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
BEESCOUNT(replacedst_same);
|
BEESCOUNT(replacedst_same);
|
||||||
// stop looping here, all the other srcs will probably fail this test too
|
// stop looping here, all the other srcs will probably fail this test too
|
||||||
BeesTracer::set_silent();
|
BeesTracer::set_silent();
|
||||||
throw runtime_error("FIXME: too many duplicate candidates, bailing out here");
|
throw runtime_error("FIXME: bailing out here, need to fix this further up the call stack");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make pair(src, dst)
|
// Make pair(src, dst)
|
||||||
|
@@ -171,23 +171,15 @@ BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
|
|||||||
uint64_t
|
uint64_t
|
||||||
BeesRoots::transid_min()
|
BeesRoots::transid_min()
|
||||||
{
|
{
|
||||||
uint64_t rv = numeric_limits<uint64_t>::max();
|
BEESNOTE("Calculating transid_min");
|
||||||
uint64_t last_root = 0;
|
|
||||||
BEESNOTE("Calculating transid_min (" << rv << " so far, last_root " << last_root << ")");
|
|
||||||
unique_lock<mutex> lock(m_mutex);
|
unique_lock<mutex> lock(m_mutex);
|
||||||
if (m_root_crawl_map.empty()) {
|
if (m_root_crawl_map.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
uint64_t rv = numeric_limits<uint64_t>::max();
|
||||||
const uint64_t max_rv = rv;
|
const uint64_t max_rv = rv;
|
||||||
for (auto i : m_root_crawl_map) {
|
for (auto i : m_root_crawl_map) {
|
||||||
// Do not count subvols that are isolated by btrfs send workaround.
|
rv = min(rv, i.second->get_state_end().m_min_transid);
|
||||||
// They will not advance until the workaround is removed or they are set read-write.
|
|
||||||
catch_all([&](){
|
|
||||||
if (!is_root_ro(i.first)) {
|
|
||||||
rv = min(rv, i.second->get_state_end().m_min_transid);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
last_root = i.first;
|
|
||||||
}
|
}
|
||||||
// If we get through this loop without setting rv, we'll create broken crawlers due to integer overflow.
|
// If we get through this loop without setting rv, we'll create broken crawlers due to integer overflow.
|
||||||
THROW_CHECK2(runtime_error, rv, max_rv, max_rv > rv);
|
THROW_CHECK2(runtime_error, rv, max_rv, max_rv > rv);
|
||||||
@@ -209,7 +201,7 @@ BeesRoots::transid_max_nocache()
|
|||||||
sk.min_objectid = sk.max_objectid = BTRFS_EXTENT_TREE_OBJECTID;
|
sk.min_objectid = sk.max_objectid = BTRFS_EXTENT_TREE_OBJECTID;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
sk.nr_items = 4;
|
sk.nr_items = 1024;
|
||||||
BEESTRACE("transid_max search sk " << sk);
|
BEESTRACE("transid_max search sk " << sk);
|
||||||
sk.do_ioctl(m_ctx->root_fd());
|
sk.do_ioctl(m_ctx->root_fd());
|
||||||
|
|
||||||
@@ -220,7 +212,7 @@ BeesRoots::transid_max_nocache()
|
|||||||
// We are just looking for the highest transid on the filesystem.
|
// We are just looking for the highest transid on the filesystem.
|
||||||
// We don't care which object it comes from.
|
// We don't care which object it comes from.
|
||||||
for (auto i : sk.m_result) {
|
for (auto i : sk.m_result) {
|
||||||
sk.next_min(i, BTRFS_ROOT_ITEM_KEY);
|
sk.next_min(i);
|
||||||
if (i.transid > rv) {
|
if (i.transid > rv) {
|
||||||
rv = i.transid;
|
rv = i.transid;
|
||||||
}
|
}
|
||||||
@@ -229,8 +221,6 @@ BeesRoots::transid_max_nocache()
|
|||||||
|
|
||||||
// transid must be greater than zero, or we did something very wrong
|
// transid must be greater than zero, or we did something very wrong
|
||||||
THROW_CHECK1(runtime_error, rv, rv > 0);
|
THROW_CHECK1(runtime_error, rv, rv > 0);
|
||||||
// transid must be less than max, or we did something very wrong
|
|
||||||
THROW_CHECK1(runtime_error, rv, rv < numeric_limits<uint64_t>::max());
|
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -634,6 +624,7 @@ BeesRoots::open_root_nocache(uint64_t rootid)
|
|||||||
|
|
||||||
BEESTRACE("sk " << sk);
|
BEESTRACE("sk " << sk);
|
||||||
while (sk.min_objectid <= rootid) {
|
while (sk.min_objectid <= rootid) {
|
||||||
|
sk.nr_items = 1024;
|
||||||
sk.do_ioctl(m_ctx->root_fd());
|
sk.do_ioctl(m_ctx->root_fd());
|
||||||
|
|
||||||
if (sk.m_result.empty()) {
|
if (sk.m_result.empty()) {
|
||||||
@@ -641,16 +632,16 @@ BeesRoots::open_root_nocache(uint64_t rootid)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto i : sk.m_result) {
|
for (auto i : sk.m_result) {
|
||||||
sk.next_min(i, BTRFS_ROOT_BACKREF_KEY);
|
sk.next_min(i);
|
||||||
if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
|
if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
|
||||||
const auto dirid = btrfs_get_member(&btrfs_root_ref::dirid, i.m_data);
|
auto dirid = btrfs_get_member(&btrfs_root_ref::dirid, i.m_data);
|
||||||
const auto name_len = btrfs_get_member(&btrfs_root_ref::name_len, i.m_data);
|
auto name_len = btrfs_get_member(&btrfs_root_ref::name_len, i.m_data);
|
||||||
const auto name_start = sizeof(struct btrfs_root_ref);
|
auto name_start = sizeof(struct btrfs_root_ref);
|
||||||
const auto name_end = name_len + name_start;
|
auto name_end = name_len + name_start;
|
||||||
THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
|
THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
|
||||||
const string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
|
string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
|
||||||
|
|
||||||
const auto parent_rootid = i.offset;
|
auto parent_rootid = i.offset;
|
||||||
// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
||||||
BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
||||||
BEESCOUNT(root_parent_open_try);
|
BEESCOUNT(root_parent_open_try);
|
||||||
@@ -770,6 +761,7 @@ BeesRoots::next_root(uint64_t root)
|
|||||||
sk.min_objectid = root + 1;
|
sk.min_objectid = root + 1;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
sk.nr_items = 1024;
|
||||||
sk.do_ioctl(m_ctx->root_fd());
|
sk.do_ioctl(m_ctx->root_fd());
|
||||||
|
|
||||||
if (sk.m_result.empty()) {
|
if (sk.m_result.empty()) {
|
||||||
@@ -777,7 +769,7 @@ BeesRoots::next_root(uint64_t root)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto i : sk.m_result) {
|
for (auto i : sk.m_result) {
|
||||||
sk.next_min(i, BTRFS_ROOT_BACKREF_KEY);
|
sk.next_min(i);
|
||||||
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
|
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||||
// BEESLOGDEBUG("Found root " << i.objectid << " parent " << i.offset << " transid " << i.transid);
|
// BEESLOGDEBUG("Found root " << i.objectid << " parent " << i.offset << " transid " << i.transid);
|
||||||
return i.objectid;
|
return i.objectid;
|
||||||
@@ -955,8 +947,8 @@ BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state)
|
|||||||
bool
|
bool
|
||||||
BeesCrawl::next_transid()
|
BeesCrawl::next_transid()
|
||||||
{
|
{
|
||||||
const auto roots = m_ctx->roots();
|
auto roots = m_ctx->roots();
|
||||||
const auto next_transid = roots->transid_max();
|
auto next_transid = roots->transid_max();
|
||||||
auto crawl_state = get_state_end();
|
auto crawl_state = get_state_end();
|
||||||
|
|
||||||
// If we are already at transid_max then we are still finished
|
// If we are already at transid_max then we are still finished
|
||||||
@@ -966,7 +958,7 @@ BeesCrawl::next_transid()
|
|||||||
m_deferred = true;
|
m_deferred = true;
|
||||||
} else {
|
} else {
|
||||||
// Log performance stats from the old crawl
|
// Log performance stats from the old crawl
|
||||||
const auto current_time = time(NULL);
|
auto current_time = time(NULL);
|
||||||
|
|
||||||
// Start new crawl
|
// Start new crawl
|
||||||
crawl_state.m_min_transid = crawl_state.m_max_transid;
|
crawl_state.m_min_transid = crawl_state.m_max_transid;
|
||||||
@@ -1001,11 +993,25 @@ BeesCrawl::fetch_extents()
|
|||||||
return next_transid();
|
return next_transid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for btrfs send workaround: don't scan RO roots at all, pretend
|
||||||
|
// they are just empty. We can't free any space there, and we
|
||||||
|
// don't have the necessary analysis logic to be able to use
|
||||||
|
// them as dedupe src extents (yet).
|
||||||
|
//
|
||||||
|
// This will keep the max_transid up to date so if the root
|
||||||
|
// is ever switched back to read-write, it won't trigger big
|
||||||
|
// expensive in-kernel searches for ancient transids.
|
||||||
|
if (m_ctx->is_root_ro(old_state.m_root)) {
|
||||||
|
BEESLOGDEBUG("WORKAROUND: skipping scan of RO root " << old_state.m_root);
|
||||||
|
BEESCOUNT(root_workaround_btrfs_send);
|
||||||
|
return next_transid();
|
||||||
|
}
|
||||||
|
|
||||||
BEESNOTE("crawling " << get_state_end());
|
BEESNOTE("crawling " << get_state_end());
|
||||||
|
|
||||||
Timer crawl_timer;
|
Timer crawl_timer;
|
||||||
|
|
||||||
BtrfsIoctlSearchKey sk;
|
BtrfsIoctlSearchKey sk(BEES_MAX_CRAWL_BYTES);
|
||||||
sk.tree_id = old_state.m_root;
|
sk.tree_id = old_state.m_root;
|
||||||
sk.min_objectid = old_state.m_objectid;
|
sk.min_objectid = old_state.m_objectid;
|
||||||
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
|
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
|
||||||
@@ -1013,7 +1019,7 @@ BeesCrawl::fetch_extents()
|
|||||||
sk.min_transid = old_state.m_min_transid;
|
sk.min_transid = old_state.m_min_transid;
|
||||||
// Don't set max_transid to m_max_transid here. See below.
|
// Don't set max_transid to m_max_transid here. See below.
|
||||||
sk.max_transid = numeric_limits<uint64_t>::max();
|
sk.max_transid = numeric_limits<uint64_t>::max();
|
||||||
sk.nr_items = 4;
|
sk.nr_items = BEES_MAX_CRAWL_ITEMS;
|
||||||
|
|
||||||
// Lock in the old state
|
// Lock in the old state
|
||||||
set_state(old_state);
|
set_state(old_state);
|
||||||
@@ -1041,43 +1047,6 @@ BeesCrawl::fetch_extents()
|
|||||||
return next_transid();
|
return next_transid();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for btrfs send workaround: don't scan RO roots at all, pretend
|
|
||||||
// they are just empty. We can't free any space there, and we
|
|
||||||
// don't have the necessary analysis logic to be able to use
|
|
||||||
// them as dedupe src extents (yet).
|
|
||||||
bool ro_root = true;
|
|
||||||
catch_all([&](){
|
|
||||||
ro_root = m_ctx->is_root_ro(old_state.m_root);
|
|
||||||
});
|
|
||||||
if (ro_root) {
|
|
||||||
BEESLOGDEBUG("WORKAROUND: skipping scan of RO root " << old_state.m_root);
|
|
||||||
BEESCOUNT(root_workaround_btrfs_send);
|
|
||||||
// We would call next_transid() here, but we want to do a few things differently.
|
|
||||||
// We immediately defer further crawling on this subvol.
|
|
||||||
// We track max_transid if the subvol scan has never started.
|
|
||||||
// We postpone the started timestamp since we haven't started.
|
|
||||||
auto crawl_state = get_state_end();
|
|
||||||
if (crawl_state.m_objectid == 0) {
|
|
||||||
// This will keep the max_transid up to date so if the root
|
|
||||||
// is ever switched back to read-write, it won't trigger big
|
|
||||||
// expensive in-kernel searches for ancient transids.
|
|
||||||
// If the root is made RO while crawling is in progress, we will
|
|
||||||
// have the big expensive in-kernel searches (same as if we have
|
|
||||||
// been not running for a long time).
|
|
||||||
// Don't allow transid_max to ever move backwards.
|
|
||||||
const auto roots = m_ctx->roots();
|
|
||||||
const auto next_transid = roots->transid_max();
|
|
||||||
const auto current_time = time(NULL);
|
|
||||||
crawl_state.m_max_transid = max(next_transid, crawl_state.m_max_transid);
|
|
||||||
// Move the start time forward too, since we have not started crawling yet.
|
|
||||||
crawl_state.m_started = current_time;
|
|
||||||
set_state(crawl_state);
|
|
||||||
}
|
|
||||||
// Mark this root deferred so we won't see it until the next transid cycle
|
|
||||||
m_deferred = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// BEESLOGINFO("Crawling " << sk.m_result.size() << " results from " << get_state_end());
|
// BEESLOGINFO("Crawling " << sk.m_result.size() << " results from " << get_state_end());
|
||||||
auto results_left = sk.m_result.size();
|
auto results_left = sk.m_result.size();
|
||||||
BEESNOTE("crawling " << results_left << " results from " << get_state_end());
|
BEESNOTE("crawling " << results_left << " results from " << get_state_end());
|
||||||
@@ -1089,7 +1058,7 @@ BeesCrawl::fetch_extents()
|
|||||||
size_t count_high = 0;
|
size_t count_high = 0;
|
||||||
BeesFileRange last_bfr;
|
BeesFileRange last_bfr;
|
||||||
for (auto i : sk.m_result) {
|
for (auto i : sk.m_result) {
|
||||||
sk.next_min(i, BTRFS_EXTENT_DATA_KEY);
|
sk.next_min(i);
|
||||||
--results_left;
|
--results_left;
|
||||||
BEESCOUNT(crawl_items);
|
BEESCOUNT(crawl_items);
|
||||||
|
|
||||||
|
@@ -287,7 +287,7 @@ BeesFileRange::fd() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
Fd
|
Fd
|
||||||
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx)
|
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
|
||||||
{
|
{
|
||||||
// If we don't have a fid we can't do much here
|
// If we don't have a fid we can't do much here
|
||||||
if (m_fid) {
|
if (m_fid) {
|
||||||
|
21
src/bees.cc
21
src/bees.cc
@@ -231,23 +231,17 @@ bees_readahead(int const fd, off_t offset, size_t size)
|
|||||||
Timer readahead_timer;
|
Timer readahead_timer;
|
||||||
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
#if 1
|
|
||||||
// In the kernel, readahead() is identical to posix_fadvise(..., POSIX_FADV_DONTNEED)
|
// In the kernel, readahead() is identical to posix_fadvise(..., POSIX_FADV_DONTNEED)
|
||||||
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
||||||
#else
|
#if 0
|
||||||
// Make sure this data is in page cache by brute force
|
// Make sure this data is in page cache by brute force
|
||||||
// This isn't necessary and it might even be slower,
|
// This isn't necessary and it might even be slower
|
||||||
// but the btrfs kernel code does readahead with lower ioprio
|
|
||||||
// and might discard the readahead request entirely,
|
|
||||||
// so it's maybe, *maybe*, worth doing both.
|
|
||||||
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
while (size) {
|
while (size) {
|
||||||
// don't care about multithreaded writes to this buffer--it is garbage anyway
|
|
||||||
static uint8_t dummy[BEES_READAHEAD_SIZE];
|
static uint8_t dummy[BEES_READAHEAD_SIZE];
|
||||||
size_t this_read_size = min(size, sizeof(dummy));
|
size_t this_read_size = min(size, sizeof(dummy));
|
||||||
// Ignore errors and short reads. It turns out our size
|
// Ignore errors and short reads.
|
||||||
// parameter isn't all that accurate, so we can't use
|
// It turns out our size parameter isn't all that accurate.
|
||||||
// the pread_or_die template.
|
|
||||||
(void)!pread(fd, dummy, this_read_size, offset);
|
(void)!pread(fd, dummy, this_read_size, offset);
|
||||||
BEESCOUNT(readahead_count);
|
BEESCOUNT(readahead_count);
|
||||||
BEESCOUNTADD(readahead_bytes, this_read_size);
|
BEESCOUNTADD(readahead_bytes, this_read_size);
|
||||||
@@ -268,13 +262,6 @@ bees_unreadahead(int const fd, off_t offset, size_t size)
|
|||||||
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_local random_device bees_random_device;
|
|
||||||
thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
|
|
||||||
numeric_limits<default_random_engine::result_type>::min(),
|
|
||||||
numeric_limits<default_random_engine::result_type>::max()
|
|
||||||
);
|
|
||||||
thread_local default_random_engine bees_generator(bees_random_seed_dist(bees_random_device));
|
|
||||||
|
|
||||||
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
||||||
m_dir_fd(dir_fd),
|
m_dir_fd(dir_fd),
|
||||||
m_name(name),
|
m_name(name),
|
||||||
|
28
src/bees.h
28
src/bees.h
@@ -13,15 +13,15 @@
|
|||||||
#include "crucible/time.h"
|
#include "crucible/time.h"
|
||||||
#include "crucible/task.h"
|
#include "crucible/task.h"
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <random>
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include <endian.h>
|
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
|
#include <endian.h>
|
||||||
|
|
||||||
using namespace crucible;
|
using namespace crucible;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@@ -101,6 +101,12 @@ const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
|||||||
// Stop growing the work queue after we have this many tasks queued
|
// Stop growing the work queue after we have this many tasks queued
|
||||||
const size_t BEES_MAX_QUEUE_SIZE = 128;
|
const size_t BEES_MAX_QUEUE_SIZE = 128;
|
||||||
|
|
||||||
|
// Read this many items at a time in SEARCHv2
|
||||||
|
const size_t BEES_MAX_CRAWL_ITEMS = 8;
|
||||||
|
|
||||||
|
// Read this many bytes at a time in SEARCHv2 (one maximum-sized metadata page)
|
||||||
|
const size_t BEES_MAX_CRAWL_BYTES = 64 * 1024;
|
||||||
|
|
||||||
// Insert this many items before switching to a new subvol
|
// Insert this many items before switching to a new subvol
|
||||||
const size_t BEES_MAX_CRAWL_BATCH = 128;
|
const size_t BEES_MAX_CRAWL_BATCH = 128;
|
||||||
|
|
||||||
@@ -110,6 +116,9 @@ const size_t BEES_TRANSID_FACTOR = 10;
|
|||||||
// Wait this long for a balance to stop
|
// Wait this long for a balance to stop
|
||||||
const double BEES_BALANCE_POLL_INTERVAL = 60.0;
|
const double BEES_BALANCE_POLL_INTERVAL = 60.0;
|
||||||
|
|
||||||
|
// Workaround for backref bugs
|
||||||
|
const bool BEES_SERIALIZE_RESOLVE = false;
|
||||||
|
|
||||||
// Workaround for tree mod log bugs
|
// Workaround for tree mod log bugs
|
||||||
const bool BEES_SERIALIZE_BALANCE = false;
|
const bool BEES_SERIALIZE_BALANCE = false;
|
||||||
|
|
||||||
@@ -260,7 +269,7 @@ ostream& operator<<(ostream &os, const BeesFileId &bfi);
|
|||||||
|
|
||||||
class BeesFileRange {
|
class BeesFileRange {
|
||||||
protected:
|
protected:
|
||||||
Fd m_fd;
|
mutable Fd m_fd;
|
||||||
mutable BeesFileId m_fid;
|
mutable BeesFileId m_fid;
|
||||||
off_t m_begin = 0, m_end = 0;
|
off_t m_begin = 0, m_end = 0;
|
||||||
mutable off_t m_file_size = -1;
|
mutable off_t m_file_size = -1;
|
||||||
@@ -301,7 +310,7 @@ public:
|
|||||||
Fd fd() const;
|
Fd fd() const;
|
||||||
|
|
||||||
// Get the fd, opening it if necessary
|
// Get the fd, opening it if necessary
|
||||||
Fd fd(const shared_ptr<BeesContext> &ctx);
|
Fd fd(const shared_ptr<BeesContext> &ctx) const;
|
||||||
|
|
||||||
BeesFileRange copy_closed() const;
|
BeesFileRange copy_closed() const;
|
||||||
|
|
||||||
@@ -336,7 +345,6 @@ public:
|
|||||||
BeesAddress(Type addr = ZERO) : m_addr(addr) {}
|
BeesAddress(Type addr = ZERO) : m_addr(addr) {}
|
||||||
BeesAddress(MagicValue addr) : m_addr(addr) {}
|
BeesAddress(MagicValue addr) : m_addr(addr) {}
|
||||||
BeesAddress& operator=(const BeesAddress &that) = default;
|
BeesAddress& operator=(const BeesAddress &that) = default;
|
||||||
BeesAddress(const BeesAddress &that) = default;
|
|
||||||
operator Type() const { return m_addr; }
|
operator Type() const { return m_addr; }
|
||||||
bool operator==(const BeesAddress &that) const;
|
bool operator==(const BeesAddress &that) const;
|
||||||
bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
|
bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
|
||||||
@@ -397,7 +405,6 @@ public:
|
|||||||
HashType e_hash;
|
HashType e_hash;
|
||||||
AddrType e_addr;
|
AddrType e_addr;
|
||||||
Cell(const Cell &) = default;
|
Cell(const Cell &) = default;
|
||||||
Cell &operator=(const Cell &) = default;
|
|
||||||
Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
|
Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
|
||||||
bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
|
bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
|
||||||
bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
|
bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
|
||||||
@@ -461,7 +468,7 @@ private:
|
|||||||
// Mutex/condvar for the writeback thread
|
// Mutex/condvar for the writeback thread
|
||||||
mutex m_dirty_mutex;
|
mutex m_dirty_mutex;
|
||||||
condition_variable m_dirty_condvar;
|
condition_variable m_dirty_condvar;
|
||||||
bool m_dirty = false;
|
bool m_dirty;
|
||||||
|
|
||||||
// Mutex/condvar to stop
|
// Mutex/condvar to stop
|
||||||
mutex m_stop_mutex;
|
mutex m_stop_mutex;
|
||||||
@@ -495,8 +502,6 @@ private:
|
|||||||
|
|
||||||
BeesHashTable(const BeesHashTable &) = delete;
|
BeesHashTable(const BeesHashTable &) = delete;
|
||||||
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
||||||
|
|
||||||
static thread_local uniform_int_distribution<size_t> tl_distribution;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
|
ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
|
||||||
@@ -634,7 +639,7 @@ private:
|
|||||||
ostream & operator<<(ostream &os, const BeesHash &bh);
|
ostream & operator<<(ostream &os, const BeesHash &bh);
|
||||||
|
|
||||||
class BeesBlockData {
|
class BeesBlockData {
|
||||||
using Blob = ByteVector;
|
using Blob = vector<uint8_t>;
|
||||||
|
|
||||||
mutable Fd m_fd;
|
mutable Fd m_fd;
|
||||||
off_t m_offset;
|
off_t m_offset;
|
||||||
@@ -807,7 +812,7 @@ class BeesResolver {
|
|||||||
BeesAddress m_addr;
|
BeesAddress m_addr;
|
||||||
vector<BtrfsInodeOffsetRoot> m_biors;
|
vector<BtrfsInodeOffsetRoot> m_biors;
|
||||||
set<BeesFileRange> m_ranges;
|
set<BeesFileRange> m_ranges;
|
||||||
size_t m_bior_count;
|
unsigned m_bior_count;
|
||||||
|
|
||||||
// We found matching data, so we can dedupe
|
// We found matching data, so we can dedupe
|
||||||
bool m_found_data = false;
|
bool m_found_data = false;
|
||||||
@@ -882,7 +887,6 @@ public:
|
|||||||
extern int bees_log_level;
|
extern int bees_log_level;
|
||||||
extern const char *BEES_USAGE;
|
extern const char *BEES_USAGE;
|
||||||
extern const char *BEES_VERSION;
|
extern const char *BEES_VERSION;
|
||||||
extern thread_local default_random_engine bees_generator;
|
|
||||||
string pretty(double d);
|
string pretty(double d);
|
||||||
void bees_sync(int fd);
|
void bees_sync(int fd);
|
||||||
void bees_readahead(int fd, off_t offset, size_t size);
|
void bees_readahead(int fd, off_t offset, size_t size);
|
||||||
|
263
src/btrsame.cc
Normal file
263
src/btrsame.cc
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
#include "crucible/error.h"
|
||||||
|
#include "crucible/fd.h"
|
||||||
|
#include "crucible/fs.h"
|
||||||
|
#include "crucible/string.h"
|
||||||
|
#include "crucible/time.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE /* for readahead() */
|
||||||
|
#endif
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define EXTENT_SAME_CLASS BtrfsExtentSame
|
||||||
|
static const bool ALWAYS_ALIGN = false;
|
||||||
|
static const bool OPEN_RDONLY = true;
|
||||||
|
|
||||||
|
static const int EXTENT_ALIGNMENT = 4096;
|
||||||
|
|
||||||
|
// const off_t max_step_size = BTRFS_MAX_DEDUPE_LEN;
|
||||||
|
// btrfs maximum extent size is 128M, there is nothing to gain by going larger;
|
||||||
|
// however, going smaller will create a bunch of adjacent split extent refs.
|
||||||
|
const off_t max_step_size = 128 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Not a good idea to go below 4K
|
||||||
|
const off_t min_step_size = 4096;
|
||||||
|
|
||||||
|
struct PhysicalBlockRange {
|
||||||
|
uint64_t m_start, m_end;
|
||||||
|
|
||||||
|
PhysicalBlockRange(int fd, uint64_t offset, uint64_t len = 4096);
|
||||||
|
};
|
||||||
|
|
||||||
|
PhysicalBlockRange::PhysicalBlockRange(int fd, uint64_t offset, uint64_t len) :
|
||||||
|
m_start(0), m_end(0)
|
||||||
|
{
|
||||||
|
Fiemap emap(offset, len);
|
||||||
|
emap.do_ioctl(fd);
|
||||||
|
|
||||||
|
if (emap.m_extents.empty()) {
|
||||||
|
// No extents in range, we are in a hole after the last extent
|
||||||
|
m_start = 0;
|
||||||
|
m_end = len;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FiemapExtent &fe = emap.m_extents.at(0);
|
||||||
|
|
||||||
|
if (offset < fe.fe_logical) {
|
||||||
|
// Extent begins after offset, we are in a hole before the extent
|
||||||
|
m_start = 0;
|
||||||
|
m_end = fe.fe_logical - offset;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: reject preallocated and delallocated extents too
|
||||||
|
// TODO: well preallocated might be OK for dedup
|
||||||
|
|
||||||
|
uint64_t extent_offset = offset - fe.fe_logical;
|
||||||
|
|
||||||
|
m_start = fe.fe_physical + extent_offset;
|
||||||
|
uint64_t phys_length = fe.fe_length - extent_offset;
|
||||||
|
m_end = m_start + phys_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool
|
||||||
|
verbose()
|
||||||
|
{
|
||||||
|
static bool done = false;
|
||||||
|
static bool verbose;
|
||||||
|
if (!done) {
|
||||||
|
verbose = getenv("BTRSAME_VERBOSE");
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
return verbose;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
string
|
||||||
|
pretty(double d)
|
||||||
|
{
|
||||||
|
static const char * units[] = { "", "K", "M", "G", "T", "P", "E" };
|
||||||
|
static const char * *units_stop = units + sizeof(units) / sizeof(units[0]) - 1;
|
||||||
|
const char * *unit = units;
|
||||||
|
while (d >= 1024 && unit < units_stop) {
|
||||||
|
d /= 1024;
|
||||||
|
++unit;
|
||||||
|
}
|
||||||
|
ostringstream oss;
|
||||||
|
oss << (round(d * 1000.0) / 1000.0) << *unit;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void
|
||||||
|
bees_same_file(Fd incumbent_fd, Fd candidate_fd)
|
||||||
|
{
|
||||||
|
Stat incumbent_stat(incumbent_fd);
|
||||||
|
Stat candidate_stat(candidate_fd);
|
||||||
|
off_t common_size = min(incumbent_stat.st_size, candidate_stat.st_size);
|
||||||
|
|
||||||
|
// If we are using clone instead of extent-same then we can ignore
|
||||||
|
// the alignment restriction for the last block of the dest file.
|
||||||
|
// This only works when both files are the same size.
|
||||||
|
if (ALWAYS_ALIGN || candidate_stat.st_size != incumbent_stat.st_size) {
|
||||||
|
common_size &= ~(EXTENT_ALIGNMENT - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose()) {
|
||||||
|
cerr << "A size " << incumbent_stat.st_size << ", B size " << candidate_stat.st_size << ", common size " << common_size << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t total_deduped = 0;
|
||||||
|
int status_ok = 0, status_err = 0, status_different = 0;
|
||||||
|
off_t step_size = max_step_size;
|
||||||
|
uint64_t total_differences = 0;
|
||||||
|
uint64_t total_shared = 0;
|
||||||
|
uint64_t total_holes = 0;
|
||||||
|
|
||||||
|
bool fatal_error = false;
|
||||||
|
|
||||||
|
vector<uint8_t> silly_buffer(max_step_size);
|
||||||
|
|
||||||
|
off_t p;
|
||||||
|
off_t len = 0;
|
||||||
|
ostringstream oss;
|
||||||
|
Timer elapsed;
|
||||||
|
Timer timer;
|
||||||
|
for (p = 0; p < common_size && !fatal_error; ) {
|
||||||
|
off_t this_step_size = step_size;
|
||||||
|
len = min(common_size - p, step_size);
|
||||||
|
|
||||||
|
if (timer > 1.0) {
|
||||||
|
cerr << oss.str() << flush;
|
||||||
|
timer.reset();
|
||||||
|
}
|
||||||
|
oss.str("");
|
||||||
|
oss << "\r"
|
||||||
|
<< "total " << pretty(common_size)
|
||||||
|
<< (total_deduped ? " **DUP** " : " dup ") << pretty(total_deduped)
|
||||||
|
<< " diff " << pretty(total_differences)
|
||||||
|
<< " shared " << pretty(total_shared)
|
||||||
|
<< " holes " << pretty(total_holes)
|
||||||
|
<< " off " << pretty(p)
|
||||||
|
<< " len " << pretty(len)
|
||||||
|
<< " elapsed " << elapsed
|
||||||
|
<< " \b\b\b";
|
||||||
|
|
||||||
|
PhysicalBlockRange incumbent_pbr(incumbent_fd, p, len);
|
||||||
|
PhysicalBlockRange candidate_pbr(candidate_fd, p, len);
|
||||||
|
|
||||||
|
if (incumbent_pbr.m_start == candidate_pbr.m_start) {
|
||||||
|
off_t shared_len = min(incumbent_pbr.m_end - incumbent_pbr.m_start, candidate_pbr.m_end - candidate_pbr.m_start);
|
||||||
|
this_step_size = max(min_step_size, min(shared_len, common_size - p));
|
||||||
|
total_shared += this_step_size;
|
||||||
|
len = shared_len;
|
||||||
|
// At this point, if we see anything shared, it's because we already deduped the whole thing
|
||||||
|
// unless it's a hole. We do have those.
|
||||||
|
if (!incumbent_pbr.m_start) {
|
||||||
|
total_holes += shared_len;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
DIE_IF_MINUS_ONE(readahead(incumbent_fd, p, len));
|
||||||
|
DIE_IF_MINUS_ONE(readahead(candidate_fd, p, len));
|
||||||
|
// The above kernel calls request readahead, same as posix_fadvise ... MADV_WILLNEED
|
||||||
|
// but in btrfs the readahead iops are scheduled at idle priority.
|
||||||
|
// This is not what we want, so here we can use read to force non-idle priority
|
||||||
|
// (or just use a scheduler that doesn't support io priority).
|
||||||
|
// DIE_IF_MINUS_ONE(pread(incumbent_fd, silly_buffer.data(), len, p));
|
||||||
|
// DIE_IF_MINUS_ONE(pread(candidate_fd, silly_buffer.data(), len, p));
|
||||||
|
|
||||||
|
EXTENT_SAME_CLASS bes(incumbent_fd, p, len);
|
||||||
|
bes.add(candidate_fd, p);
|
||||||
|
bes.do_ioctl();
|
||||||
|
|
||||||
|
// Don't need it any more, might either speed up page reclaim, or
|
||||||
|
// make us block waiting for writeback.
|
||||||
|
DIE_IF_MINUS_ONE(posix_fadvise(incumbent_fd, p, len, POSIX_FADV_DONTNEED));
|
||||||
|
DIE_IF_MINUS_ONE(posix_fadvise(candidate_fd, p, len, POSIX_FADV_DONTNEED));
|
||||||
|
|
||||||
|
int status = bes.m_info[0].status;
|
||||||
|
|
||||||
|
if (status == 0) {
|
||||||
|
++status_ok;
|
||||||
|
total_deduped += bes.m_info[0].bytes_deduped;
|
||||||
|
if (step_size * 2 <= max_step_size) {
|
||||||
|
step_size *= 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (status < 0) {
|
||||||
|
oss << " (" << strerror(-status) << ", errno = " << -status << ")" << endl;
|
||||||
|
++status_err;
|
||||||
|
switch (-status) {
|
||||||
|
case EXDEV:
|
||||||
|
oss << " (fatal error, paths are not on the same mount point?)" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else if (status == BTRFS_SAME_DATA_DIFFERS) {
|
||||||
|
++status_different;
|
||||||
|
} else {
|
||||||
|
++status_err;
|
||||||
|
}
|
||||||
|
if (step_size > min_step_size) {
|
||||||
|
step_size = min_step_size;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
total_differences += step_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p += len;
|
||||||
|
}
|
||||||
|
cerr << oss.str() << "\r"
|
||||||
|
<< "total " << pretty(common_size)
|
||||||
|
<< (total_deduped ? " **DUP** " : " dup ") << pretty(total_deduped)
|
||||||
|
<< " diff " << pretty(total_differences)
|
||||||
|
<< " shared " << pretty(total_shared)
|
||||||
|
<< " holes " << pretty(total_holes)
|
||||||
|
<< " off " << pretty(p)
|
||||||
|
<< " len " << pretty(len)
|
||||||
|
<< " elapsed " << elapsed
|
||||||
|
<< " "
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc != 3) {
|
||||||
|
cerr << "Usage: " << argv[0] << " file1 file2" << endl;
|
||||||
|
cerr << "Uses the BTRFS_EXTENT_SAME ioctl to deduplicate file1 and file2" << endl;
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose()) {
|
||||||
|
cerr << "A: " << argv[1] << endl;
|
||||||
|
}
|
||||||
|
Fd incumbent_fd = open_or_die(argv[1], O_RDONLY);
|
||||||
|
|
||||||
|
if (verbose()) {
|
||||||
|
cerr << "B: " << argv[2] << endl;
|
||||||
|
}
|
||||||
|
Fd candidate_fd = open_or_die(argv[2], OPEN_RDONLY ? O_RDONLY : O_RDWR);
|
||||||
|
|
||||||
|
bees_same_file(incumbent_fd, candidate_fd);
|
||||||
|
|
||||||
|
// any run that doesn't end with terminate() is success,
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
35
src/clone-cat.cc
Normal file
35
src/clone-cat.cc
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
#include "crucible/error.h"
|
||||||
|
#include "crucible/fd.h"
|
||||||
|
#include "crucible/fs.h"
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc <= 2) {
|
||||||
|
cerr << "Usage: " << argv[0] << " FILE1 FILE2 [...FILEn] > OUTFILE" << endl;
|
||||||
|
cerr << "Catenates FILE1..FILEN using copy_file_range" << endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t out_pos = 0;
|
||||||
|
|
||||||
|
while (*++argv) {
|
||||||
|
string filename(*argv);
|
||||||
|
|
||||||
|
Fd input_fd = open_or_die(filename, O_RDONLY);
|
||||||
|
|
||||||
|
Stat st(input_fd);
|
||||||
|
|
||||||
|
off_t len = st.st_size;
|
||||||
|
|
||||||
|
cerr << "clone_range(" << filename << ", 0, " << len << ", STDOUT_FILENO, " << out_pos << ")" << flush;
|
||||||
|
btrfs_clone_range(input_fd, 0, len, STDOUT_FILENO, out_pos);
|
||||||
|
out_pos += len;
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
35
src/clone-split.cc
Normal file
35
src/clone-split.cc
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
#include "crucible/error.h"
|
||||||
|
#include "crucible/fd.h"
|
||||||
|
#include "crucible/fs.h"
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc != 3) {
|
||||||
|
cerr << "Usage: " << argv[0] << " FILE SIZE" << endl;
|
||||||
|
cerr << "Splits FILE into SIZE-byte pieces using copy_file_range" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
string filename(argv[1]);
|
||||||
|
off_t out_size(stoull(argv[2], 0, 0));
|
||||||
|
|
||||||
|
Fd input_fd = open_or_die(filename, O_RDONLY);
|
||||||
|
|
||||||
|
Stat st(input_fd);
|
||||||
|
|
||||||
|
for (off_t pos = 0; pos < st.st_size; pos += out_size) {
|
||||||
|
char pos_name[64];
|
||||||
|
off_t len = min(st.st_size - pos, out_size);
|
||||||
|
snprintf(pos_name, sizeof(pos_name), "0x%016llx", static_cast<long long>(pos));
|
||||||
|
string out_name = filename + '.' + pos_name;
|
||||||
|
cout << out_name << flush;
|
||||||
|
Fd output_fd = open_or_die(out_name, O_WRONLY | O_EXCL | O_CREAT);
|
||||||
|
btrfs_clone_range(input_fd, pos, len, output_fd, 0);
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -22,21 +22,19 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
cout << "File: " << filename << endl;
|
cout << "File: " << filename << endl;
|
||||||
Fd fd = open_or_die(filename, O_RDONLY);
|
Fd fd = open_or_die(filename, O_RDONLY);
|
||||||
uint64_t start = 0;
|
Fiemap fm;
|
||||||
uint64_t length = Fiemap::s_fiemap_max_offset;
|
fm.fm_flags &= ~(FIEMAP_FLAG_SYNC);
|
||||||
if (argc > 2) { start = stoull(argv[2], nullptr, 0); }
|
|
||||||
if (argc > 3) { length = stoull(argv[3], nullptr, 0); }
|
|
||||||
length = min(length, Fiemap::s_fiemap_max_offset - start);
|
|
||||||
Fiemap fm(start, length);
|
|
||||||
fm.m_flags &= ~(FIEMAP_FLAG_SYNC);
|
|
||||||
fm.m_max_count = 100;
|
fm.m_max_count = 100;
|
||||||
if (argc > 4) { fm.m_flags = stoull(argv[4], nullptr, 0); }
|
if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
|
||||||
uint64_t stop_at = start + length;
|
if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
|
||||||
uint64_t last_byte = start;
|
if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
|
||||||
|
fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
|
||||||
|
uint64_t stop_at = fm.fm_start + fm.fm_length;
|
||||||
|
uint64_t last_byte = fm.fm_start;
|
||||||
do {
|
do {
|
||||||
fm.do_ioctl(fd);
|
fm.do_ioctl(fd);
|
||||||
// cerr << fm;
|
// cerr << fm;
|
||||||
uint64_t last_logical = Fiemap::s_fiemap_max_offset;
|
uint64_t last_logical = FIEMAP_MAX_OFFSET;
|
||||||
for (auto &extent : fm.m_extents) {
|
for (auto &extent : fm.m_extents) {
|
||||||
if (extent.fe_logical > last_byte) {
|
if (extent.fe_logical > last_byte) {
|
||||||
cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
|
cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
|
||||||
@@ -47,8 +45,8 @@ main(int argc, char **argv)
|
|||||||
last_logical = extent.fe_logical + extent.fe_length;
|
last_logical = extent.fe_logical + extent.fe_length;
|
||||||
last_byte = last_logical;
|
last_byte = last_logical;
|
||||||
}
|
}
|
||||||
fm.m_start = last_logical;
|
fm.fm_start = last_logical;
|
||||||
} while (fm.m_start < stop_at);
|
} while (fm.fm_start < stop_at);
|
||||||
});
|
});
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user