mirror of
https://github.com/Zygo/bees.git
synced 2025-08-03 14:23:29 +02:00
Compare commits
50 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
69e9bdfb0f | ||
|
7a197e2f33 | ||
|
43d38ca536 | ||
|
7b0ed6a411 | ||
|
8d4d153d1d | ||
|
d5a6c30623 | ||
|
25f7ced27b | ||
|
c1af219246 | ||
|
9c183c2c22 | ||
|
59f8a467c3 | ||
|
9987aa8583 | ||
|
da32667e02 | ||
|
8080abac97 | ||
|
1e139d0ccc | ||
|
6542917ffa | ||
|
b99d80b40f | ||
|
099ad2ce7c | ||
|
a59a02174f | ||
|
e22653e2c6 | ||
|
44810d6df8 | ||
|
8f92b1dacc | ||
|
0b974b5485 | ||
|
ce0367dafe | ||
|
54ed6e1cff | ||
|
24b08ef7b7 | ||
|
97eab9655c | ||
|
05bf1ebf76 | ||
|
606ac01d56 | ||
|
72c3bf8438 | ||
|
72958a5e47 | ||
|
f25b4c81ba | ||
|
a64603568b | ||
|
33cde5de97 | ||
|
5414c7344f | ||
|
8bac00433d | ||
|
088cbc951a | ||
|
e78e05e212 | ||
|
8d08a3c06f | ||
|
cdcdf8e218 | ||
|
37f5b1bfa8 | ||
|
abe2afaeb2 | ||
|
792fdbbb13 | ||
|
30a4fb52cb | ||
|
90d7075358 | ||
|
faac895568 | ||
|
a7baa565e4 | ||
|
b408eac98e | ||
|
75131f396f | ||
|
cfb7592859 | ||
|
3839690ba3 |
@@ -67,8 +67,10 @@ These bugs are particularly popular among bees users, though not all are specifi
|
|||||||
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
||||||
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
||||||
| 5.12 | 6.0 | space cache corruption and potential double allocations | 5.15.65, 5.19.6, 6.0 and later | ced8ecf026fd btrfs: fix space cache corruption and potential double allocations
|
| 5.12 | 6.0 | space cache corruption and potential double allocations | 5.15.65, 5.19.6, 6.0 and later | ced8ecf026fd btrfs: fix space cache corruption and potential double allocations
|
||||||
|
| 6.0 | 6.5 | suboptimal allocation in multi-device filesystems due to chunk allocator regression | 6.1.60, 6.5.9, 6.6 and later | 8a540e990d7d btrfs: fix stripe length calculation for non-zoned data chunk allocation
|
||||||
| 6.3, backported to 5.15.107, 6.1.24, 6.2.11 | 6.3 | vmalloc error, failed to allocate pages | 6.3.10, 6.4 and later. Bug (f349b15e183d "mm: vmalloc: avoid warn_alloc noise caused by fatal signal" in v6.3-rc6) backported to 6.1.24, 6.2.11, and 5.15.107. | 95a301eefa82 mm/vmalloc: do not output a spurious warning when huge vmalloc() fails
|
| 6.3, backported to 5.15.107, 6.1.24, 6.2.11 | 6.3 | vmalloc error, failed to allocate pages | 6.3.10, 6.4 and later. Bug (f349b15e183d "mm: vmalloc: avoid warn_alloc noise caused by fatal signal" in v6.3-rc6) backported to 6.1.24, 6.2.11, and 5.15.107. | 95a301eefa82 mm/vmalloc: do not output a spurious warning when huge vmalloc() fails
|
||||||
| 6.2 | 6.3 | `IGNORE_OFFSET` flag ignored in `LOGICAL_INO` ioctl | 6.2.16, 6.3.3, 6.4 and later | 0cad8f14d70c btrfs: fix backref walking not returning all inode refs
|
| 6.2 | 6.3 | `IGNORE_OFFSET` flag ignored in `LOGICAL_INO` ioctl | 6.2.16, 6.3.3, 6.4 and later | 0cad8f14d70c btrfs: fix backref walking not returning all inode refs
|
||||||
|
| 6.10 | 6.11 | `adding refs to an existing tree ref`, `failed to run delayed ref`, then read-only | 6.11.10, 6.12 and later | 7d493a5ecc26 btrfs: fix incorrect comparison for delayed refs
|
||||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl on the same extent | - | workaround: avoid doing that
|
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl on the same extent | - | workaround: avoid doing that
|
||||||
|
|
||||||
"Last bad kernel" refers to that version's last stable update from
|
"Last bad kernel" refers to that version's last stable update from
|
||||||
|
201
docs/config.md
201
docs/config.md
@@ -98,27 +98,72 @@ code files over and over, so it will need a smaller hash table than a
|
|||||||
backup server which has to refer to the oldest data on the filesystem
|
backup server which has to refer to the oldest data on the filesystem
|
||||||
every time a new client machine's data is added to the server.
|
every time a new client machine's data is added to the server.
|
||||||
|
|
||||||
Scanning modes for multiple subvols
|
Scanning modes
|
||||||
-----------------------------------
|
--------------
|
||||||
|
|
||||||
The `--scan-mode` option affects how bees schedules worker threads
|
The `--scan-mode` option affects how bees iterates over the filesystem,
|
||||||
between subvolumes. Scan modes are an experimental feature and will
|
schedules extents for scanning, and tracks progress.
|
||||||
likely be deprecated in favor of a better solution.
|
|
||||||
|
|
||||||
Scan mode can be changed at any time by restarting bees with a different
|
There are now two kinds of scan mode: the legacy **subvol** scan modes,
|
||||||
mode option. Scan state tracking is the same for all of the currently
|
and the new **extent** scan mode.
|
||||||
implemented modes. The difference between the modes is the order in
|
|
||||||
which subvols are selected.
|
|
||||||
|
|
||||||
If a filesystem has only one subvolume with data in it, then the
|
Scan mode can be changed by restarting bees with a different scan mode
|
||||||
`--scan-mode` option has no effect. In this case, there is only one
|
option.
|
||||||
subvolume to scan, so worker threads will all scan that one.
|
|
||||||
|
|
||||||
Within a subvol, there is a single optimal scan order: files are scanned
|
Extent scan mode:
|
||||||
in ascending numerical inode order. Each worker will scan a different
|
|
||||||
inode to avoid having the threads contend with each other for locks.
|
* Works with 4.15 and later kernels.
|
||||||
File data is read sequentially and in order, but old blocks from earlier
|
* Can estimate progress and provide an ETA.
|
||||||
scans are skipped.
|
* Can optimize scanning order to dedupe large extents first.
|
||||||
|
* Cannot avoid modifying read-only subvols.
|
||||||
|
* Can keep up with frequent creation and deletion of snapshots.
|
||||||
|
|
||||||
|
Subvol scan modes:
|
||||||
|
|
||||||
|
* Work with 4.14 and earlier kernels.
|
||||||
|
* Cannot estimate or report progress.
|
||||||
|
* Cannot optimize scanning order by extent size.
|
||||||
|
* Can avoid modifying read-only subvols (for `btrfs send` workaround).
|
||||||
|
* Have problems keeping up with snapshots created during a scan.
|
||||||
|
|
||||||
|
The default scan mode is 4, "extent".
|
||||||
|
|
||||||
|
If you are using bees for the first time on a filesystem with many
|
||||||
|
existing snapshots, you should read about [snapshot gotchas](gotchas.md).
|
||||||
|
|
||||||
|
Subvol scan modes
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Subvol scan modes are maintained for compatibility with existing
|
||||||
|
installations, but will not be developed further. New installations
|
||||||
|
should use extent scan mode instead.
|
||||||
|
|
||||||
|
The _quantity_ of text below detailing the shortcomings of each subvol
|
||||||
|
scan mode should be informative all by itself.
|
||||||
|
|
||||||
|
Subvol scan modes work on any kernel version supported by bees. They
|
||||||
|
are the only scan modes usable on kernel 4.14 and earlier.
|
||||||
|
|
||||||
|
The difference between the subvol scan modes is the order in which the
|
||||||
|
files from different subvols are fed into the scanner. They all scan
|
||||||
|
files in inode number order, from low to high offset within each inode,
|
||||||
|
the same way that a program like `cat` would read files (but skipping
|
||||||
|
over old data from earlier btrfs transactions).
|
||||||
|
|
||||||
|
If a filesystem has only one subvolume with data in it, then all of
|
||||||
|
the subvol scan modes are equivalent. In this case, there is only one
|
||||||
|
subvolume to scan, so every possible ordering of subvols is the same.
|
||||||
|
|
||||||
|
The `--workaround-btrfs-send` option pauses scanning subvols that are
|
||||||
|
read-only. If the subvol is made read-write (e.g. with `btrfs prop set
|
||||||
|
$subvol ro false`), or if the `--workaround-btrfs-send` option is removed,
|
||||||
|
then the scan of that subvol is unpaused and dedupe proceeds normally.
|
||||||
|
Space will only be recovered when the last read-only subvol is deleted.
|
||||||
|
|
||||||
|
Subvol scan modes cannot efficiently or accurately calculate an ETA for
|
||||||
|
completion or estimate progress through the data. They simply request
|
||||||
|
"the next new inode" from btrfs, and they are completed when btrfs says
|
||||||
|
there is no next new inode.
|
||||||
|
|
||||||
Between subvols, there are several scheduling algorithms with different
|
Between subvols, there are several scheduling algorithms with different
|
||||||
trade-offs:
|
trade-offs:
|
||||||
@@ -126,53 +171,99 @@ trade-offs:
|
|||||||
Scan mode 0, "lockstep", scans the same inode number in each subvol at
|
Scan mode 0, "lockstep", scans the same inode number in each subvol at
|
||||||
close to the same time. This is useful if the subvols are snapshots
|
close to the same time. This is useful if the subvols are snapshots
|
||||||
with a common ancestor, since the same inode number in each subvol will
|
with a common ancestor, since the same inode number in each subvol will
|
||||||
have similar or identical contents. This maximizes the likelihood
|
have similar or identical contents. This maximizes the likelihood that
|
||||||
that all of the references to a snapshot of a file are scanned at
|
all of the references to a snapshot of a file are scanned at close to
|
||||||
close to the same time, improving dedupe hit rate and possibly taking
|
the same time, improving dedupe hit rate. If the subvols are unrelated
|
||||||
advantage of VFS caching in the Linux kernel. If the subvols are
|
(i.e. not snapshots of a single subvol) then this mode does not provide
|
||||||
unrelated (i.e. not snapshots of a single subvol) then this mode does
|
any significant advantage. This mode uses smaller amounts of temporary
|
||||||
not provide significant benefit over random selection. This mode uses
|
space for shorter periods of time when most subvols are snapshots. When a
|
||||||
smaller amounts of temporary space for shorter periods of time when most
|
new snapshot is created, this mode will stop scanning other subvols and
|
||||||
subvols are snapshots. When a new snapshot is created, this mode will
|
scan the new snapshot until the same inode number is reached in each
|
||||||
stop scanning other subvols and scan the new snapshot until the same
|
subvol, which will effectively stop dedupe temporarily as this data has
|
||||||
inode number is reached in each subvol, which will effectively stop
|
already been scanned and deduped in the other snapshots.
|
||||||
dedupe temporarily as this data has already been scanned and deduped
|
|
||||||
in the other snapshots.
|
|
||||||
|
|
||||||
Scan mode 1, "independent", scans the next inode with new data in each
|
Scan mode 1, "independent", scans the next inode with new data in
|
||||||
subvol. Each subvol's scanner shares inodes uniformly with all other
|
each subvol. There is no coordination between the subvols, other than
|
||||||
subvol scanners until the subvol has no new inodes left. This mode makes
|
round-robin distribution of files from each subvol to each worker thread.
|
||||||
continuous forward progress across the filesystem and provides average
|
This mode makes continuous forward progress in all subvols. When a new
|
||||||
performance across a variety of workloads, but is slow to respond to new
|
snapshot is created, previous subvol scans continue as before, but the
|
||||||
data, and may spend a lot of time deduping short-lived subvols that will
|
worker threads are now divided among one more subvol.
|
||||||
soon be deleted when it is preferable to dedupe long-lived subvols that
|
|
||||||
will be the origin of future snapshots. When a new snapshot is created,
|
|
||||||
previous subvol scans continue as before, but the time is now divided
|
|
||||||
among one more subvol.
|
|
||||||
|
|
||||||
Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
|
Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
|
||||||
ID order, processing each subvol completely before proceeding to the
|
ID order, processing each subvol completely before proceeding to the next
|
||||||
next subvol. This avoids spending time scanning short-lived snapshots
|
subvol. This avoids spending time scanning short-lived snapshots that
|
||||||
that will be deleted before they can be fully deduped (e.g. those used
|
will be deleted before they can be fully deduped (e.g. those used for
|
||||||
for `btrfs send`). Scanning is concentrated on older subvols that are
|
`btrfs send`). Scanning starts on older subvols that are more likely
|
||||||
more likely to be origin subvols for future snapshots, eliminating the
|
to be origin subvols for future snapshots, eliminating the need to
|
||||||
need to dedupe future snapshots separately. This mode uses the largest
|
dedupe future snapshots separately. This mode uses the largest amount
|
||||||
amount of temporary space for the longest time, and typically requires
|
of temporary space for the longest time, and typically requires a larger
|
||||||
a larger hash table to maintain dedupe hit rate.
|
hash table to maintain dedupe hit rate.
|
||||||
|
|
||||||
Scan mode 3, "recent", scans the subvols with the highest `min_transid`
|
Scan mode 3, "recent", scans the subvols with the highest `min_transid`
|
||||||
value first (i.e. the ones that were most recently completely scanned),
|
value first (i.e. the ones that were most recently completely scanned),
|
||||||
then falls back to "independent" mode to break ties. This interrupts
|
then falls back to "independent" mode to break ties. This interrupts
|
||||||
long scans of old subvols to give a rapid dedupe response to new data,
|
long scans of old subvols to give a rapid dedupe response to new data
|
||||||
then returns to the old subvols after the new data is scanned. It is
|
in previously scanned subvols, then returns to the old subvols after
|
||||||
useful for large filesystems with multiple active subvols and rotating
|
the new data is scanned.
|
||||||
snapshots, where the first-pass scan can take months, but new duplicate
|
|
||||||
data appears every day.
|
|
||||||
|
|
||||||
The default scan mode is 1, "independent".
|
Extent scan mode
|
||||||
|
----------------
|
||||||
|
|
||||||
If you are using bees for the first time on a filesystem with many
|
Scan mode 4, "extent", scans the extent tree instead of the subvol trees.
|
||||||
existing snapshots, you should read about [snapshot gotchas](gotchas.md).
|
Extent scan mode reads each extent once, regardless of the number of
|
||||||
|
reflinks or snapshots. It adapts to the creation of new snapshots
|
||||||
|
immediately, without having to revisit old data.
|
||||||
|
|
||||||
|
In the extent scan mode, extents are separated into multiple size tiers
|
||||||
|
to prioritize large extents over small ones. Deduping large extents
|
||||||
|
keeps the metadata update cost low per block saved, resulting in faster
|
||||||
|
dedupe at the start of a scan cycle. This is important for maximizing
|
||||||
|
performance in use cases where bees runs for a limited time, such as
|
||||||
|
during an overnight maintenance window.
|
||||||
|
|
||||||
|
Once the larger size tiers are completed, dedupe space recovery speeds
|
||||||
|
slow down significantly. It may be desirable to stop bees running once
|
||||||
|
the larger size tiers are finished, then start bees running some time
|
||||||
|
later after new data has appeared.
|
||||||
|
|
||||||
|
Each extent is mapped in physical address order, and all extent references
|
||||||
|
are submitted to the scanner at the same time, resulting in much better
|
||||||
|
cache behavior and dedupe performance compared to the subvol scan modes.
|
||||||
|
|
||||||
|
The "extent" scan mode is not usable on kernels before 4.15 because
|
||||||
|
it relies on the `LOGICAL_INO_V2` ioctl added in that kernel release.
|
||||||
|
When using bees with an older kernel, only subvol scan modes will work.
|
||||||
|
|
||||||
|
Extents are divided into virtual subvols by size, using reserved btrfs
|
||||||
|
subvol IDs 250..255. The size tier groups are:
|
||||||
|
* 250: 32M+1 and larger
|
||||||
|
* 251: 8M+1..32M
|
||||||
|
* 252: 2M+1..8M
|
||||||
|
* 253: 512K+1..2M
|
||||||
|
* 254: 128K+1..512K
|
||||||
|
* 255: 128K and smaller (includes all compressed extents)
|
||||||
|
|
||||||
|
Extent scan mode can efficiently calculate dedupe progress within
|
||||||
|
the filesystem and estimate an ETA for completion within each size
|
||||||
|
tier; however, the accuracy of the ETA can be questionable due to the
|
||||||
|
non-uniform distribution of block addresses in a typical user filesystem.
|
||||||
|
|
||||||
|
Older versions of bees do not recognize the virtual subvols, so running
|
||||||
|
an old bees version after running a new bees version will reset the
|
||||||
|
"extent" scan mode's progress in `beescrawl.dat` to the beginning.
|
||||||
|
This may change in future bees releases, i.e. extent scans will store
|
||||||
|
their checkpoint data somewhere else.
|
||||||
|
|
||||||
|
The `--workaround-btrfs-send` option behaves differently in extent
|
||||||
|
scan modes: In extent scan mode, dedupe proceeds on all subvols that are
|
||||||
|
read-write, but all subvols that are read-only are excluded from dedupe.
|
||||||
|
Space will only be recovered when the last read-only subvol is deleted.
|
||||||
|
|
||||||
|
During `btrfs send` all duplicate extents in the sent subvol will not be
|
||||||
|
removed (the kernel will reject dedupe commands while send is active,
|
||||||
|
and bees currently will not re-issue them after the send is complete).
|
||||||
|
It may be preferable to terminate the bees process while running `btrfs
|
||||||
|
send` in extent scan mode, and restart bees after the `send` is complete.
|
||||||
|
|
||||||
Threads and load management
|
Threads and load management
|
||||||
---------------------------
|
---------------------------
|
||||||
|
@@ -120,9 +120,12 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
|||||||
|
|
||||||
* `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
|
* `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
|
||||||
* `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
|
* `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
|
||||||
* `crawl_create`: A new subvol crawler was created.
|
* `crawl_create`: A new subvol or extent crawler was created.
|
||||||
* `crawl_done`: One pass over all subvols on the filesystem was completed.
|
* `crawl_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
|
||||||
|
* `crawl_done`: One pass over a subvol was completed.
|
||||||
|
* `crawl_discard`: An extent that didn't match the crawler's size tier was discarded.
|
||||||
* `crawl_empty`: A `TREE_SEARCH_V2` ioctl call failed or returned an empty set (usually because all data in the subvol was scanned).
|
* `crawl_empty`: A `TREE_SEARCH_V2` ioctl call failed or returned an empty set (usually because all data in the subvol was scanned).
|
||||||
|
* `crawl_extent`: The extent crawler queued all references to an extent for processing.
|
||||||
* `crawl_fail`: A `TREE_SEARCH_V2` ioctl call failed.
|
* `crawl_fail`: A `TREE_SEARCH_V2` ioctl call failed.
|
||||||
* `crawl_gen_high`: An extent item in the search results refers to an extent that is newer than the current crawl's `max_transid` allows.
|
* `crawl_gen_high`: An extent item in the search results refers to an extent that is newer than the current crawl's `max_transid` allows.
|
||||||
* `crawl_gen_low`: An extent item in the search results refers to an extent that is older than the current crawl's `min_transid` allows.
|
* `crawl_gen_low`: An extent item in the search results refers to an extent that is older than the current crawl's `min_transid` allows.
|
||||||
@@ -136,7 +139,10 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
|||||||
* `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
|
* `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
|
||||||
* `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
|
* `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
|
||||||
* `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
|
* `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
|
||||||
|
* `crawl_throttled`: Extent scan created too many work queue items and was prevented from creating any more.
|
||||||
|
* `crawl_tree_block`: Extent scan found and skipped a metadata tree block.
|
||||||
* `crawl_unknown`: An extent item in the search results has an unrecognized type.
|
* `crawl_unknown`: An extent item in the search results has an unrecognized type.
|
||||||
|
* `crawl_unthrottled`: Extent scan allowed to create work queue items again.
|
||||||
|
|
||||||
dedup
|
dedup
|
||||||
-----
|
-----
|
||||||
@@ -162,6 +168,25 @@ The `exception` event group consists of C++ exceptions. C++ exceptions are thro
|
|||||||
* `exception_caught`: Total number of C++ exceptions thrown and caught by a generic exception handler.
|
* `exception_caught`: Total number of C++ exceptions thrown and caught by a generic exception handler.
|
||||||
* `exception_caught_silent`: Total number of "silent" C++ exceptions thrown and caught by a generic exception handler. These are exceptions which are part of the correct and normal operation of bees. The exceptions are logged at a lower log level.
|
* `exception_caught_silent`: Total number of "silent" C++ exceptions thrown and caught by a generic exception handler. These are exceptions which are part of the correct and normal operation of bees. The exceptions are logged at a lower log level.
|
||||||
|
|
||||||
|
extent
|
||||||
|
------
|
||||||
|
|
||||||
|
The `extent` event group consists of events that occur within the extent scanner.
|
||||||
|
|
||||||
|
* `extent_deferred_inode`: A lock conflict was detected when two worker threads attempted to manipulate the same inode at the same time.
|
||||||
|
* `extent_empty`: A complete list of references to an extent was created but the list was empty, e.g. because all refs are in deleted inodes or snapshots.
|
||||||
|
* `extent_fail`: An ioctl call to `LOGICAL_INO` failed.
|
||||||
|
* `extent_forward`: An extent reference was submitted for scanning.
|
||||||
|
* `extent_mapped`: A complete map of references to an extent was created and added to the crawl queue.
|
||||||
|
* `extent_ok`: An ioctl call to `LOGICAL_INO` completed successfully.
|
||||||
|
* `extent_overflow`: A complete map of references to an extent exceeded `BEES_MAX_EXTENT_REF_COUNT`, so the extent was dropped.
|
||||||
|
* `extent_ref_missing`: An extent reference reported by `LOGICAL_INO` was not found by later `TREE_SEARCH_V2` calls.
|
||||||
|
* `extent_ref_ok`: One extent reference was queued for scanning.
|
||||||
|
* `extent_restart`: An extent reference was requeued to be scanned again after an active extent lock is released.
|
||||||
|
* `extent_retry`: An extent reference was requeued to be scanned again after an active inode lock is released.
|
||||||
|
* `extent_skip`: A 4K extent with more than 1000 refs was skipped.
|
||||||
|
* `extent_zero`: An ioctl call to `LOGICAL_INO` succeeded, but reported an empty list of extents.
|
||||||
|
|
||||||
hash
|
hash
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -180,24 +205,6 @@ The `hash` event group consists of operations related to the bees hash table.
|
|||||||
* `hash_insert`: A `(hash, address)` pair was inserted by `BeesHashTable::push_random_hash_addr`.
|
* `hash_insert`: A `(hash, address)` pair was inserted by `BeesHashTable::push_random_hash_addr`.
|
||||||
* `hash_lookup`: The hash table was searched for `(hash, address)` pairs matching a given `hash`.
|
* `hash_lookup`: The hash table was searched for `(hash, address)` pairs matching a given `hash`.
|
||||||
|
|
||||||
inserted
|
|
||||||
--------
|
|
||||||
|
|
||||||
The `inserted` event group consists of operations related to storing hash and address data in the hash table (i.e. the hash table client).
|
|
||||||
|
|
||||||
* `inserted_block`: Total number of data block references scanned and inserted into the hash table.
|
|
||||||
* `inserted_clobbered`: Total number of data block references scanned and eliminated from the filesystem.
|
|
||||||
|
|
||||||
matched
|
|
||||||
-------
|
|
||||||
|
|
||||||
The `matched` event group consists of events related to matching incoming data blocks against existing hash table entries.
|
|
||||||
|
|
||||||
* `matched_0`: A data block was scanned, hash table entries found, but no matching data blocks on the filesytem located.
|
|
||||||
* `matched_1_or_more`: A data block was scanned, hash table entries found, and one or more matching data blocks on the filesystem located.
|
|
||||||
* `matched_2_or_more`: A data block was scanned, hash table entries found, and two or more matching data blocks on the filesystem located.
|
|
||||||
* `matched_3_or_more`: A data block was scanned, hash table entries found, and three or more matching data blocks on the filesystem located.
|
|
||||||
|
|
||||||
open
|
open
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -259,12 +266,26 @@ The `pairforward` event group consists of events related to extending matching b
|
|||||||
* `pairforward_try`: Started extending a pair of matching block ranges forward.
|
* `pairforward_try`: Started extending a pair of matching block ranges forward.
|
||||||
* `pairforward_zero`: A pair of matching block ranges could not be extended backward by one block because the src block contained all zeros and was not compressed.
|
* `pairforward_zero`: A pair of matching block ranges could not be extended backward by one block because the src block contained all zeros and was not compressed.
|
||||||
|
|
||||||
|
progress
|
||||||
|
--------
|
||||||
|
|
||||||
|
The `progress` event group consists of events related to progress estimation.
|
||||||
|
|
||||||
|
* `progress_no_data_bg`: Failed to retrieve any data block groups from the filesystem.
|
||||||
|
* `progress_not_created`: A crawler for one size tier had not been created for the extent scanner.
|
||||||
|
* `progress_complete`: A crawler for one size tier has completed a scan.
|
||||||
|
* `progress_not_found`: The extent position for a crawler does not correspond to any block group.
|
||||||
|
* `progress_out_of_bg`: The extent position for a crawler does not correspond to any data block group.
|
||||||
|
* `progress_ok`: Table of progress and ETA created successfully.
|
||||||
|
|
||||||
readahead
|
readahead
|
||||||
---------
|
---------
|
||||||
|
|
||||||
The `readahead` event group consists of events related to calls to `posix_fadvise`.
|
The `readahead` event group consists of events related to calls to `posix_fadvise`.
|
||||||
|
|
||||||
* `readahead_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_WILLNEED)` aka `readahead()`.
|
* `readahead_clear`: Number of times the duplicate read cache was cleared.
|
||||||
|
* `readahead_skip`: Number of times a duplicate read was identified in the cache and skipped.
|
||||||
|
* `readahead_ms`: Total time spent emulating readahead in user-space (kernel readahead is not measured).
|
||||||
* `readahead_unread_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_DONTNEED)`.
|
* `readahead_unread_ms`: Total time spent running `posix_fadvise(..., POSIX_FADV_DONTNEED)`.
|
||||||
|
|
||||||
replacedst
|
replacedst
|
||||||
@@ -301,7 +322,7 @@ The `resolve` event group consists of operations related to translating a btrfs
|
|||||||
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
||||||
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
||||||
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
||||||
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned more than 655050 extents (the limit of the v2 ioctl).
|
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned 9999 or more extents (the limit configured in `bees.h`).
|
||||||
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
||||||
|
|
||||||
root
|
root
|
||||||
@@ -329,35 +350,38 @@ The `scan` event group consists of operations related to scanning incoming data.
|
|||||||
|
|
||||||
* `scan_blacklisted`: A blacklisted extent was passed to `scan_forward` and dropped.
|
* `scan_blacklisted`: A blacklisted extent was passed to `scan_forward` and dropped.
|
||||||
* `scan_block`: A block of data was scanned.
|
* `scan_block`: A block of data was scanned.
|
||||||
* `scan_bump`: After deduping a block range, the scan pointer had to be moved past the end of the deduped byte range.
|
* `scan_compressed_no_dedup`: An extent that was compressed contained non-zero, non-duplicate data.
|
||||||
* `scan_dup_block`: Number of duplicate blocks deduped.
|
* `scan_dup_block`: Number of duplicate block references deduped.
|
||||||
* `scan_dup_hit`: A pair of duplicate block ranges was found and removed.
|
* `scan_dup_hit`: A pair of duplicate block ranges was found.
|
||||||
* `scan_dup_miss`: A pair of duplicate blocks was found in the hash table but not in the filesystem.
|
* `scan_dup_miss`: A pair of duplicate blocks was found in the hash table but not in the filesystem.
|
||||||
* `scan_eof`: Scan past EOF was attempted.
|
|
||||||
* `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
|
|
||||||
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
||||||
* `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned. No action is currently taken for these--they are merely counted.
|
|
||||||
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
||||||
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
||||||
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
||||||
* `scan_hash_miss`: A block was not found on the filesystem corresponding to a block found in the hash table.
|
* `scan_hash_miss`: A block was not found on the filesystem corresponding to a block found in the hash table.
|
||||||
* `scan_hash_preinsert`: A block was prepared for insertion into the hash table.
|
* `scan_hash_preinsert`: A non-zero data block's hash was prepared for possible insertion into the hash table.
|
||||||
|
* `scan_hash_insert`: A non-zero data block's hash was inserted into the hash table.
|
||||||
* `scan_hole`: A hole extent was found during scan and ignored.
|
* `scan_hole`: A hole extent was found during scan and ignored.
|
||||||
* `scan_interesting`: An extent had flags that were not recognized by bees and was ignored.
|
* `scan_interesting`: An extent had flags that were not recognized by bees and was ignored.
|
||||||
* `scan_lookup`: A hash was looked up in the hash table.
|
* `scan_lookup`: A hash was looked up in the hash table.
|
||||||
* `scan_malign`: A block being scanned matched a hash at EOF in the hash table, but the EOF was not aligned to a block boundary and the two blocks did not have the same length.
|
* `scan_malign`: A block being scanned matched a hash at EOF in the hash table, but the EOF was not aligned to a block boundary and the two blocks did not have the same length.
|
||||||
* `scan_no_fd`: References to a block from the hash table were found, but a FD could not be opened.
|
|
||||||
* `scan_no_rewrite`: All blocks in an extent were removed by dedupe (i.e. no copies).
|
|
||||||
* `scan_push_front`: An entry in the hash table matched a duplicate block, so the entry was moved to the head of its LRU list.
|
* `scan_push_front`: An entry in the hash table matched a duplicate block, so the entry was moved to the head of its LRU list.
|
||||||
* `scan_reinsert`: A copied block's hash and block address was inserted into the hash table.
|
* `scan_reinsert`: A copied block's hash and block address was inserted into the hash table.
|
||||||
* `scan_resolve_hit`: A block address in the hash table was successfully resolved to an open FD and offset pair.
|
* `scan_resolve_hit`: A block address in the hash table was successfully resolved to an open FD and offset pair.
|
||||||
* `scan_resolve_zero`: A block address in the hash table was not resolved to any subvol/inode pair, so the corresponding hash table entry was removed.
|
* `scan_resolve_zero`: A block address in the hash table was not resolved to any subvol/inode pair, so the corresponding hash table entry was removed.
|
||||||
* `scan_rewrite`: A range of bytes in a file was copied, then the copy deduped over the original data.
|
* `scan_rewrite`: A range of bytes in a file was copied, then the copy deduped over the original data.
|
||||||
|
* `scan_root_dead`: A deleted subvol was detected.
|
||||||
|
* `scan_seen_clear`: The list of recently scanned extents reached maximum size and was cleared.
|
||||||
|
* `scan_seen_erase`: An extent reference was modified by scan, so all future references to the extent must be scanned.
|
||||||
|
* `scan_seen_hit`: A scan was skipped because the same extent had recently been scanned.
|
||||||
|
* `scan_seen_insert`: An extent reference was not modified by scan and its hashes have been inserted into the hash table, so all future references to the extent can be ignored.
|
||||||
|
* `scan_seen_miss`: A scan was not skipped because the same extent had not recently been scanned (i.e. the extent was scanned normally).
|
||||||
|
* `scan_skip_bytes`: Nuisance dedupe or hole-punching would save less than half of the data in an extent.
|
||||||
|
* `scan_skip_ops`: Nuisance dedupe or hole-punching would require too many dedupe/copy/hole-punch operations in an extent.
|
||||||
* `scan_toxic_hash`: A scanned block has the same hash as a hash table entry that is marked toxic.
|
* `scan_toxic_hash`: A scanned block has the same hash as a hash table entry that is marked toxic.
|
||||||
* `scan_toxic_match`: A hash table entry points to a block that is discovered to be toxic.
|
* `scan_toxic_match`: A hash table entry points to a block that is discovered to be toxic.
|
||||||
* `scan_twice`: Two references to the same block have been found in the hash table.
|
* `scan_twice`: Two references to the same block have been found in the hash table.
|
||||||
* `scan_zero_compressed`: An extent that was compressed and contained only zero bytes was found.
|
* `scan_zero`: A data block containing only zero bytes was detected.
|
||||||
* `scan_zero_uncompressed`: A block that contained only zero bytes was found in an uncompressed extent.
|
|
||||||
|
|
||||||
scanf
|
scanf
|
||||||
-----
|
-----
|
||||||
@@ -365,9 +389,10 @@ scanf
|
|||||||
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
|
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
|
||||||
|
|
||||||
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
|
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
|
||||||
* `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
|
* `scanf_eof`: Scan past EOF was attempted.
|
||||||
* `scanf_extent`: A btrfs extent item was scanned.
|
* `scanf_extent`: A btrfs extent item was scanned.
|
||||||
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
|
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
|
||||||
|
* `scanf_no_fd`: References to a block from the hash table were found, but a FD could not be opened.
|
||||||
* `scanf_total`: A logical byte range of a file was scanned.
|
* `scanf_total`: A logical byte range of a file was scanned.
|
||||||
* `scanf_total_ms`: Total thread-seconds spent scanning logical byte ranges.
|
* `scanf_total_ms`: Total thread-seconds spent scanning logical byte ranges.
|
||||||
|
|
||||||
|
@@ -205,7 +205,7 @@ Other Gotchas
|
|||||||
|
|
||||||
* bees avoids the [slow backrefs kernel bug](btrfs-kernel.md) by
|
* bees avoids the [slow backrefs kernel bug](btrfs-kernel.md) by
|
||||||
measuring the time required to perform `LOGICAL_INO` operations.
|
measuring the time required to perform `LOGICAL_INO` operations.
|
||||||
If an extent requires over 0.1 kernel CPU seconds to perform a
|
If an extent requires over 5.0 kernel CPU seconds to perform a
|
||||||
`LOGICAL_INO` ioctl, then bees blacklists the extent and avoids
|
`LOGICAL_INO` ioctl, then bees blacklists the extent and avoids
|
||||||
referencing it in future operations. In most cases, fewer than 0.1%
|
referencing it in future operations. In most cases, fewer than 0.1%
|
||||||
of extents in a filesystem must be avoided this way. This results
|
of extents in a filesystem must be avoided this way. This results
|
||||||
|
@@ -15,16 +15,9 @@ specific files (patches welcome).
|
|||||||
* PREALLOC extents and extents containing blocks filled with zeros will
|
* PREALLOC extents and extents containing blocks filled with zeros will
|
||||||
be replaced by holes. There is no way to turn this off.
|
be replaced by holes. There is no way to turn this off.
|
||||||
|
|
||||||
* Consecutive runs of duplicate blocks that are less than 12K in length
|
* The fundamental unit of deduplication is the extent _reference_, when
|
||||||
can take 30% of the processing time while saving only 3% of the disk
|
it should be the _extent_ itself. This is an architectural limitation
|
||||||
space. There should be an option to just not bother with those, but it's
|
that results in excess reads of extent data, even in the Extent scan mode.
|
||||||
complicated by the btrfs requirement to always dedupe complete extents.
|
|
||||||
|
|
||||||
* There is a lot of duplicate reading of blocks in snapshots. bees will
|
|
||||||
scan all snapshots at close to the same time to try to get better
|
|
||||||
performance by caching, but really fixing this requires rewriting the
|
|
||||||
crawler to scan the btrfs extent tree directly instead of the subvol
|
|
||||||
FS trees.
|
|
||||||
|
|
||||||
* Block reads are currently more allocation- and CPU-intensive than they
|
* Block reads are currently more allocation- and CPU-intensive than they
|
||||||
should be, especially for filesystems on SSD where the IO overhead is
|
should be, especially for filesystems on SSD where the IO overhead is
|
||||||
@@ -33,8 +26,9 @@ much smaller. This is a problem for CPU-power-constrained environments
|
|||||||
|
|
||||||
* bees can currently fragment extents when required to remove duplicate
|
* bees can currently fragment extents when required to remove duplicate
|
||||||
blocks, but has no defragmentation capability yet. When possible, bees
|
blocks, but has no defragmentation capability yet. When possible, bees
|
||||||
will attempt to work with existing extent boundaries, but it will not
|
will attempt to work with existing extent boundaries and choose the
|
||||||
aggregate blocks together from multiple extents to create larger ones.
|
largest fragments available, but it will not aggregate blocks together
|
||||||
|
from multiple extents to create larger ones.
|
||||||
|
|
||||||
* When bees fragments an extent, the copied data is compressed. There
|
* When bees fragments an extent, the copied data is compressed. There
|
||||||
is currently no way (other than by modifying the source) to select a
|
is currently no way (other than by modifying the source) to select a
|
||||||
|
@@ -47,6 +47,7 @@
|
|||||||
* Mode 1: independent
|
* Mode 1: independent
|
||||||
* Mode 2: sequential
|
* Mode 2: sequential
|
||||||
* Mode 3: recent
|
* Mode 3: recent
|
||||||
|
* Mode 4: extent
|
||||||
|
|
||||||
For details of the different scanning modes and the default value of
|
For details of the different scanning modes and the default value of
|
||||||
this option, see [bees configuration](config.md).
|
this option, see [bees configuration](config.md).
|
||||||
|
@@ -64,11 +64,13 @@ namespace crucible {
|
|||||||
/// @{ Extent items (EXTENT_ITEM)
|
/// @{ Extent items (EXTENT_ITEM)
|
||||||
uint64_t extent_begin() const;
|
uint64_t extent_begin() const;
|
||||||
uint64_t extent_end() const;
|
uint64_t extent_end() const;
|
||||||
|
uint64_t extent_flags() const;
|
||||||
uint64_t extent_generation() const;
|
uint64_t extent_generation() const;
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @{ Root items
|
/// @{ Root items
|
||||||
uint64_t root_flags() const;
|
uint64_t root_flags() const;
|
||||||
|
uint64_t root_refs() const;
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @{ Root backref items.
|
/// @{ Root backref items.
|
||||||
@@ -108,7 +110,9 @@ namespace crucible {
|
|||||||
virtual ~BtrfsTreeFetcher() = default;
|
virtual ~BtrfsTreeFetcher() = default;
|
||||||
BtrfsTreeFetcher(Fd new_fd);
|
BtrfsTreeFetcher(Fd new_fd);
|
||||||
void type(uint8_t type);
|
void type(uint8_t type);
|
||||||
|
uint8_t type();
|
||||||
void tree(uint64_t tree);
|
void tree(uint64_t tree);
|
||||||
|
uint64_t tree();
|
||||||
void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
|
void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
|
||||||
/// Block size (sectorsize) of filesystem
|
/// Block size (sectorsize) of filesystem
|
||||||
uint64_t block_size() const;
|
uint64_t block_size() const;
|
||||||
|
@@ -197,6 +197,10 @@ namespace crucible {
|
|||||||
|
|
||||||
size_t m_buf_size;
|
size_t m_buf_size;
|
||||||
set<BtrfsIoctlSearchHeader> m_result;
|
set<BtrfsIoctlSearchHeader> m_result;
|
||||||
|
|
||||||
|
static thread_local size_t s_calls;
|
||||||
|
static thread_local size_t s_loops;
|
||||||
|
static thread_local size_t s_loops_empty;
|
||||||
};
|
};
|
||||||
|
|
||||||
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
||||||
|
@@ -14,6 +14,7 @@ namespace crucible {
|
|||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
condition_variable m_cv;
|
condition_variable m_cv;
|
||||||
map<string, size_t> m_counters;
|
map<string, size_t> m_counters;
|
||||||
|
bool m_do_locking = true;
|
||||||
|
|
||||||
class LockHandle {
|
class LockHandle {
|
||||||
const string m_type;
|
const string m_type;
|
||||||
@@ -33,6 +34,7 @@ namespace crucible {
|
|||||||
shared_ptr<LockHandle> get_lock_private(const string &type);
|
shared_ptr<LockHandle> get_lock_private(const string &type);
|
||||||
public:
|
public:
|
||||||
static shared_ptr<LockHandle> get_lock(const string &type);
|
static shared_ptr<LockHandle> get_lock(const string &type);
|
||||||
|
static void enable_locking(bool enabled);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
106
include/crucible/table.h
Normal file
106
include/crucible/table.h
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
#ifndef CRUCIBLE_TABLE_H
|
||||||
|
#define CRUCIBLE_TABLE_H
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
namespace Table {
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
using Content = function<string(size_t width, size_t height)>;
|
||||||
|
const size_t endpos = numeric_limits<size_t>::max();
|
||||||
|
|
||||||
|
Content Fill(const char c);
|
||||||
|
Content Text(const string& s);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
Content Number(const T& num)
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << num;
|
||||||
|
return Text(oss.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
class Cell {
|
||||||
|
Content m_content;
|
||||||
|
public:
|
||||||
|
Cell(const Content &fn = [](size_t, size_t) { return string(); } );
|
||||||
|
Cell& operator=(const Content &fn);
|
||||||
|
string text(size_t width, size_t height) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Dimension {
|
||||||
|
size_t m_next_pos = 0;
|
||||||
|
vector<size_t> m_elements;
|
||||||
|
friend class Table;
|
||||||
|
size_t at(size_t) const;
|
||||||
|
public:
|
||||||
|
size_t size() const;
|
||||||
|
size_t insert(size_t pos);
|
||||||
|
void erase(size_t pos);
|
||||||
|
};
|
||||||
|
|
||||||
|
class Table {
|
||||||
|
Dimension m_rows, m_cols;
|
||||||
|
map<pair<size_t, size_t>, Cell> m_cells;
|
||||||
|
string m_left = "|";
|
||||||
|
string m_mid = "|";
|
||||||
|
string m_right = "|";
|
||||||
|
public:
|
||||||
|
Dimension &rows();
|
||||||
|
const Dimension& rows() const;
|
||||||
|
Dimension &cols();
|
||||||
|
const Dimension& cols() const;
|
||||||
|
Cell& at(size_t row, size_t col);
|
||||||
|
const Cell& at(size_t row, size_t col) const;
|
||||||
|
template <class T> void insert_row(size_t pos, const T& container);
|
||||||
|
template <class T> void insert_col(size_t pos, const T& container);
|
||||||
|
void left(const string &s);
|
||||||
|
void mid(const string &s);
|
||||||
|
void right(const string &s);
|
||||||
|
const string& left() const;
|
||||||
|
const string& mid() const;
|
||||||
|
const string& right() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
ostream& operator<<(ostream &os, const Table &table);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
Table::insert_row(size_t pos, const T& container)
|
||||||
|
{
|
||||||
|
const auto new_pos = m_rows.insert(pos);
|
||||||
|
size_t col = 0;
|
||||||
|
for (const auto &i : container) {
|
||||||
|
if (col >= cols().size()) {
|
||||||
|
cols().insert(col);
|
||||||
|
}
|
||||||
|
at(new_pos, col++) = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void
|
||||||
|
Table::insert_col(size_t pos, const T& container)
|
||||||
|
{
|
||||||
|
const auto new_pos = m_cols.insert(pos);
|
||||||
|
size_t row = 0;
|
||||||
|
for (const auto &i : container) {
|
||||||
|
if (row >= rows().size()) {
|
||||||
|
rows().insert(row);
|
||||||
|
}
|
||||||
|
at(row++, new_pos) = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CRUCIBLE_TABLE_H
|
@@ -40,6 +40,9 @@ namespace crucible {
|
|||||||
/// after the current instance exits.
|
/// after the current instance exits.
|
||||||
void run() const;
|
void run() const;
|
||||||
|
|
||||||
|
/// Schedule task to run when no other Task is available.
|
||||||
|
void idle() const;
|
||||||
|
|
||||||
/// Schedule Task to run after this Task has run or
|
/// Schedule Task to run after this Task has run or
|
||||||
/// been destroyed.
|
/// been destroyed.
|
||||||
void append(const Task &task) const;
|
void append(const Task &task) const;
|
||||||
@@ -163,9 +166,9 @@ namespace crucible {
|
|||||||
/// (it is the ExclusionLock that owns the lock, so it can
|
/// (it is the ExclusionLock that owns the lock, so it can
|
||||||
/// be passed to other Tasks or threads, but this is not
|
/// be passed to other Tasks or threads, but this is not
|
||||||
/// recommended practice).
|
/// recommended practice).
|
||||||
/// If not successful, current Task is appended to the
|
/// If not successful, the argument Task is appended to the
|
||||||
/// task that currently holds the lock. Current task is
|
/// task that currently holds the lock. Current task is
|
||||||
/// expected to release any other ExclusionLock
|
/// expected to immediately release any other ExclusionLock
|
||||||
/// objects it holds, and exit its Task function.
|
/// objects it holds, and exit its Task function.
|
||||||
ExclusionLock try_lock(const Task &task);
|
ExclusionLock try_lock(const Task &task);
|
||||||
|
|
||||||
|
@@ -17,6 +17,7 @@ CRUCIBLE_OBJS = \
|
|||||||
path.o \
|
path.o \
|
||||||
process.o \
|
process.o \
|
||||||
string.o \
|
string.o \
|
||||||
|
table.o \
|
||||||
task.o \
|
task.o \
|
||||||
time.o \
|
time.o \
|
||||||
uname.o \
|
uname.o \
|
||||||
|
@@ -22,6 +22,13 @@ namespace crucible {
|
|||||||
return m_objectid + m_offset;
|
return m_objectid + m_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeItem::extent_flags() const
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
|
||||||
|
return btrfs_get_member(&btrfs_extent_item::flags, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
BtrfsTreeItem::extent_generation() const
|
BtrfsTreeItem::extent_generation() const
|
||||||
{
|
{
|
||||||
@@ -61,6 +68,13 @@ namespace crucible {
|
|||||||
return btrfs_get_member(&btrfs_root_item::flags, m_data);
|
return btrfs_get_member(&btrfs_root_item::flags, m_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeItem::root_refs() const
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_ITEM_KEY);
|
||||||
|
return btrfs_get_member(&btrfs_root_item::refs, m_data);
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const BtrfsTreeItem &bti)
|
operator<<(ostream &os, const BtrfsTreeItem &bti)
|
||||||
{
|
{
|
||||||
@@ -269,12 +283,24 @@ namespace crucible {
|
|||||||
m_type = type;
|
m_type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint8_t
|
||||||
|
BtrfsTreeFetcher::type()
|
||||||
|
{
|
||||||
|
return m_type;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsTreeFetcher::tree(uint64_t tree)
|
BtrfsTreeFetcher::tree(uint64_t tree)
|
||||||
{
|
{
|
||||||
m_tree = tree;
|
m_tree = tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
BtrfsTreeFetcher::tree()
|
||||||
|
{
|
||||||
|
return m_tree;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
|
BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
|
||||||
{
|
{
|
||||||
|
25
lib/fs.cc
25
lib/fs.cc
@@ -159,12 +159,13 @@ namespace crucible {
|
|||||||
{
|
{
|
||||||
THROW_CHECK1(invalid_argument, src_length, src_length > 0);
|
THROW_CHECK1(invalid_argument, src_length, src_length > 0);
|
||||||
while (src_length > 0) {
|
while (src_length > 0) {
|
||||||
off_t length = min(off_t(BTRFS_MAX_DEDUPE_LEN), src_length);
|
BtrfsExtentSame bes(src_fd, src_offset, src_length);
|
||||||
BtrfsExtentSame bes(src_fd, src_offset, length);
|
|
||||||
bes.add(dst_fd, dst_offset);
|
bes.add(dst_fd, dst_offset);
|
||||||
bes.do_ioctl();
|
bes.do_ioctl();
|
||||||
auto status = bes.m_info.at(0).status;
|
const auto status = bes.m_info.at(0).status;
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
|
const off_t length = bes.m_info.at(0).bytes_deduped;
|
||||||
|
THROW_CHECK0(invalid_argument, length > 0);
|
||||||
src_offset += length;
|
src_offset += length;
|
||||||
dst_offset += length;
|
dst_offset += length;
|
||||||
src_length -= length;
|
src_length -= length;
|
||||||
@@ -333,7 +334,7 @@ namespace crucible {
|
|||||||
btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
|
btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
|
||||||
.logical = m_logical,
|
.logical = m_logical,
|
||||||
.size = m_container_size,
|
.size = m_container_size,
|
||||||
.inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size)),
|
.inodes = reinterpret_cast<uintptr_t>(m_container.prepare(m_container_size)),
|
||||||
};
|
};
|
||||||
// We are still supporting building with old headers that don't have .flags yet
|
// We are still supporting building with old headers that don't have .flags yet
|
||||||
*(&args.reserved[0] + 3) = m_flags;
|
*(&args.reserved[0] + 3) = m_flags;
|
||||||
@@ -416,7 +417,7 @@ namespace crucible {
|
|||||||
{
|
{
|
||||||
btrfs_ioctl_ino_path_args *p = static_cast<btrfs_ioctl_ino_path_args *>(this);
|
btrfs_ioctl_ino_path_args *p = static_cast<btrfs_ioctl_ino_path_args *>(this);
|
||||||
BtrfsDataContainer container(m_container_size);
|
BtrfsDataContainer container(m_container_size);
|
||||||
fspath = reinterpret_cast<uint64_t>(container.prepare(m_container_size));
|
fspath = reinterpret_cast<uintptr_t>(container.prepare(m_container_size));
|
||||||
size = container.get_size();
|
size = container.get_size();
|
||||||
|
|
||||||
m_paths.clear();
|
m_paths.clear();
|
||||||
@@ -753,6 +754,10 @@ namespace crucible {
|
|||||||
return offset + len;
|
return offset + len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_calls = 0;
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_loops = 0;
|
||||||
|
thread_local size_t BtrfsIoctlSearchKey::s_loops_empty = 0;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
||||||
{
|
{
|
||||||
@@ -773,6 +778,12 @@ namespace crucible {
|
|||||||
ioctl_ptr->buf_size = buf_size;
|
ioctl_ptr->buf_size = buf_size;
|
||||||
// Don't bother supporting V1. Kernels that old have other problems.
|
// Don't bother supporting V1. Kernels that old have other problems.
|
||||||
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
||||||
|
++s_calls;
|
||||||
|
if (rv != 0 && errno == ENOENT) {
|
||||||
|
// If we are searching a tree that is deleted or no longer exists, just return an empty list
|
||||||
|
nr_items = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (rv != 0 && errno != EOVERFLOW) {
|
if (rv != 0 && errno != EOVERFLOW) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -794,6 +805,10 @@ namespace crucible {
|
|||||||
buf_size *= 2;
|
buf_size *= 2;
|
||||||
}
|
}
|
||||||
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
||||||
|
++s_loops;
|
||||||
|
if (ioctl_ptr->key.nr_items == 0) {
|
||||||
|
++s_loops_empty;
|
||||||
|
}
|
||||||
} while (buf_size < 65536);
|
} while (buf_size < 65536);
|
||||||
|
|
||||||
// ioctl changes nr_items, this has to be copied back
|
// ioctl changes nr_items, this has to be copied back
|
||||||
|
@@ -62,11 +62,22 @@ namespace crucible {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static MultiLocker s_process_instance;
|
||||||
|
|
||||||
shared_ptr<MultiLocker::LockHandle>
|
shared_ptr<MultiLocker::LockHandle>
|
||||||
MultiLocker::get_lock(const string &type)
|
MultiLocker::get_lock(const string &type)
|
||||||
{
|
{
|
||||||
static MultiLocker s_process_instance;
|
if (s_process_instance.m_do_locking) {
|
||||||
return s_process_instance.get_lock_private(type);
|
return s_process_instance.get_lock_private(type);
|
||||||
|
} else {
|
||||||
|
return shared_ptr<MultiLocker::LockHandle>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
MultiLocker::enable_locking(const bool enabled)
|
||||||
|
{
|
||||||
|
s_process_instance.m_do_locking = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
254
lib/table.cc
Normal file
254
lib/table.cc
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
#include "crucible/table.h"
|
||||||
|
|
||||||
|
#include "crucible/string.h"
|
||||||
|
|
||||||
|
namespace crucible {
|
||||||
|
namespace Table {
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
Content
|
||||||
|
Fill(const char c)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
string rv;
|
||||||
|
while (height--) {
|
||||||
|
rv += string(width, c);
|
||||||
|
if (height) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Content
|
||||||
|
Text(const string &s)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
const auto lines = split("\n", s);
|
||||||
|
string rv;
|
||||||
|
size_t line_count = 0;
|
||||||
|
for (const auto &i : lines) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
if (i.length() < width) {
|
||||||
|
rv += string(width - i.length(), ' ');
|
||||||
|
}
|
||||||
|
rv += i;
|
||||||
|
}
|
||||||
|
while (line_count < height) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
rv += string(width, ' ');
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Content
|
||||||
|
Number(const string &s)
|
||||||
|
{
|
||||||
|
return [=](size_t width, size_t height) -> string {
|
||||||
|
const auto lines = split("\n", s);
|
||||||
|
string rv;
|
||||||
|
size_t line_count = 0;
|
||||||
|
for (const auto &i : lines) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
if (i.length() < width) {
|
||||||
|
rv += string(width - i.length(), ' ');
|
||||||
|
}
|
||||||
|
rv += i;
|
||||||
|
}
|
||||||
|
while (line_count < height) {
|
||||||
|
if (line_count++) {
|
||||||
|
rv += "\n";
|
||||||
|
}
|
||||||
|
rv += string(width, ' ');
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Cell::Cell(const Content &fn) :
|
||||||
|
m_content(fn)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Cell&
|
||||||
|
Cell::operator=(const Content &fn)
|
||||||
|
{
|
||||||
|
m_content = fn;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
string
|
||||||
|
Cell::text(size_t width, size_t height) const
|
||||||
|
{
|
||||||
|
return m_content(width, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::size() const
|
||||||
|
{
|
||||||
|
return m_elements.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::insert(size_t pos)
|
||||||
|
{
|
||||||
|
++m_next_pos;
|
||||||
|
const auto insert_pos = min(m_elements.size(), pos);
|
||||||
|
const auto it = m_elements.begin() + insert_pos;
|
||||||
|
m_elements.insert(it, m_next_pos);
|
||||||
|
return insert_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Dimension::erase(size_t pos)
|
||||||
|
{
|
||||||
|
const auto it = m_elements.begin() + min(m_elements.size(), pos);
|
||||||
|
m_elements.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Dimension::at(size_t pos) const
|
||||||
|
{
|
||||||
|
return m_elements.at(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
Dimension&
|
||||||
|
Table::rows()
|
||||||
|
{
|
||||||
|
return m_rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Dimension&
|
||||||
|
Table::rows() const
|
||||||
|
{
|
||||||
|
return m_rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
Dimension&
|
||||||
|
Table::cols()
|
||||||
|
{
|
||||||
|
return m_cols;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Dimension&
|
||||||
|
Table::cols() const
|
||||||
|
{
|
||||||
|
return m_cols;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Cell&
|
||||||
|
Table::at(size_t row, size_t col) const
|
||||||
|
{
|
||||||
|
const auto row_idx = m_rows.at(row);
|
||||||
|
const auto col_idx = m_cols.at(col);
|
||||||
|
const auto found = m_cells.find(make_pair(row_idx, col_idx));
|
||||||
|
if (found == m_cells.end()) {
|
||||||
|
static const Cell s_empty(Fill('.'));
|
||||||
|
return s_empty;
|
||||||
|
}
|
||||||
|
return found->second;
|
||||||
|
};
|
||||||
|
|
||||||
|
Cell&
|
||||||
|
Table::at(size_t row, size_t col)
|
||||||
|
{
|
||||||
|
const auto row_idx = m_rows.at(row);
|
||||||
|
const auto col_idx = m_cols.at(col);
|
||||||
|
return m_cells[make_pair(row_idx, col_idx)];
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
pair<size_t, size_t>
|
||||||
|
text_size(const string &s)
|
||||||
|
{
|
||||||
|
const auto s_split = split("\n", s);
|
||||||
|
size_t width = 0;
|
||||||
|
for (const auto &i : s_split) {
|
||||||
|
width = max(width, i.length());
|
||||||
|
}
|
||||||
|
return make_pair(width, s_split.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
ostream& operator<<(ostream &os, const Table &table)
|
||||||
|
{
|
||||||
|
const auto rows = table.rows().size();
|
||||||
|
const auto cols = table.cols().size();
|
||||||
|
vector<size_t> row_heights(rows, 1);
|
||||||
|
vector<size_t> col_widths(cols, 1);
|
||||||
|
// Get the size of all fixed- and minimum-sized content cells
|
||||||
|
for (size_t row = 0; row < table.rows().size(); ++row) {
|
||||||
|
vector<string> col_text;
|
||||||
|
for (size_t col = 0; col < table.cols().size(); ++col) {
|
||||||
|
col_text.push_back(table.at(row, col).text(0, 0));
|
||||||
|
const auto tsize = text_size(*col_text.rbegin());
|
||||||
|
row_heights[row] = max(row_heights[row], tsize.second);
|
||||||
|
col_widths[col] = max(col_widths[col], tsize.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Render the table
|
||||||
|
for (size_t row = 0; row < table.rows().size(); ++row) {
|
||||||
|
vector<string> lines(row_heights[row], "");
|
||||||
|
for (size_t col = 0; col < table.cols().size(); ++col) {
|
||||||
|
const auto& table_cell = table.at(row, col);
|
||||||
|
const auto table_text = table_cell.text(col_widths[col], row_heights[row]);
|
||||||
|
auto col_lines = split("\n", table_text);
|
||||||
|
col_lines.resize(row_heights[row], "");
|
||||||
|
for (size_t line = 0; line < row_heights[row]; ++line) {
|
||||||
|
if (col > 0) {
|
||||||
|
lines[line] += table.mid();
|
||||||
|
}
|
||||||
|
lines[line] += col_lines[line];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto &line : lines) {
|
||||||
|
os << table.left() << line << table.right() << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::left(const string &s)
|
||||||
|
{
|
||||||
|
m_left = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::mid(const string &s)
|
||||||
|
{
|
||||||
|
m_mid = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Table::right(const string &s)
|
||||||
|
{
|
||||||
|
m_right = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::left() const
|
||||||
|
{
|
||||||
|
return m_left;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::mid() const
|
||||||
|
{
|
||||||
|
return m_mid;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string&
|
||||||
|
Table::right() const
|
||||||
|
{
|
||||||
|
return m_right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
61
lib/task.cc
61
lib/task.cc
@@ -124,6 +124,9 @@ namespace crucible {
|
|||||||
/// instance at the end of TaskMaster's global queue.
|
/// instance at the end of TaskMaster's global queue.
|
||||||
void run();
|
void run();
|
||||||
|
|
||||||
|
/// Run the task when there are no more Tasks on the main queue.
|
||||||
|
void idle();
|
||||||
|
|
||||||
/// Execute task immediately in current thread if it is not already
|
/// Execute task immediately in current thread if it is not already
|
||||||
/// executing in another thread; otherwise, append the current task
|
/// executing in another thread; otherwise, append the current task
|
||||||
/// to itself to be executed immediately in the other thread.
|
/// to itself to be executed immediately in the other thread.
|
||||||
@@ -150,6 +153,7 @@ namespace crucible {
|
|||||||
mutex m_mutex;
|
mutex m_mutex;
|
||||||
condition_variable m_condvar;
|
condition_variable m_condvar;
|
||||||
TaskQueue m_queue;
|
TaskQueue m_queue;
|
||||||
|
TaskQueue m_idle_queue;
|
||||||
size_t m_thread_max;
|
size_t m_thread_max;
|
||||||
size_t m_thread_min = 0;
|
size_t m_thread_min = 0;
|
||||||
set<TaskConsumerPtr> m_threads;
|
set<TaskConsumerPtr> m_threads;
|
||||||
@@ -184,6 +188,7 @@ namespace crucible {
|
|||||||
TaskMasterState(size_t thread_max = thread::hardware_concurrency());
|
TaskMasterState(size_t thread_max = thread::hardware_concurrency());
|
||||||
|
|
||||||
static void push_back(const TaskStatePtr &task);
|
static void push_back(const TaskStatePtr &task);
|
||||||
|
static void push_back_idle(const TaskStatePtr &task);
|
||||||
static void push_front(TaskQueue &queue);
|
static void push_front(TaskQueue &queue);
|
||||||
size_t get_queue_count();
|
size_t get_queue_count();
|
||||||
size_t get_thread_count();
|
size_t get_thread_count();
|
||||||
@@ -367,6 +372,17 @@ namespace crucible {
|
|||||||
TaskMasterState::push_back(shared_from_this());
|
TaskMasterState::push_back(shared_from_this());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TaskState::idle()
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_mutex);
|
||||||
|
if (m_run_now) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
m_run_now = true;
|
||||||
|
TaskMasterState::push_back_idle(shared_from_this());
|
||||||
|
}
|
||||||
|
|
||||||
TaskMasterState::TaskMasterState(size_t thread_max) :
|
TaskMasterState::TaskMasterState(size_t thread_max) :
|
||||||
m_thread_max(thread_max),
|
m_thread_max(thread_max),
|
||||||
m_configured_thread_max(thread_max),
|
m_configured_thread_max(thread_max),
|
||||||
@@ -410,6 +426,20 @@ namespace crucible {
|
|||||||
s_tms->start_threads_nolock();
|
s_tms->start_threads_nolock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TaskMasterState::push_back_idle(const TaskStatePtr &task)
|
||||||
|
{
|
||||||
|
THROW_CHECK0(runtime_error, task);
|
||||||
|
unique_lock<mutex> lock(s_tms->m_mutex);
|
||||||
|
if (s_tms->m_cancelled) {
|
||||||
|
task->clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
s_tms->m_idle_queue.push_back(task);
|
||||||
|
s_tms->m_condvar.notify_all();
|
||||||
|
s_tms->start_threads_nolock();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
TaskMasterState::push_front(TaskQueue &queue)
|
TaskMasterState::push_front(TaskQueue &queue)
|
||||||
{
|
{
|
||||||
@@ -456,12 +486,26 @@ namespace crucible {
|
|||||||
TaskMaster::print_queue(ostream &os)
|
TaskMaster::print_queue(ostream &os)
|
||||||
{
|
{
|
||||||
unique_lock<mutex> lock(s_tms->m_mutex);
|
unique_lock<mutex> lock(s_tms->m_mutex);
|
||||||
os << "Queue (size " << s_tms->m_queue.size() << "):" << endl;
|
auto queue_copy = s_tms->m_queue;
|
||||||
|
lock.unlock();
|
||||||
|
os << "Queue (size " << queue_copy.size() << "):" << endl;
|
||||||
size_t counter = 0;
|
size_t counter = 0;
|
||||||
for (auto i : s_tms->m_queue) {
|
for (auto i : queue_copy) {
|
||||||
os << "Queue #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
os << "Queue #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
||||||
}
|
}
|
||||||
return os << "Queue End" << endl;
|
os << "Queue End" << endl;
|
||||||
|
|
||||||
|
lock.lock();
|
||||||
|
queue_copy = s_tms->m_idle_queue;
|
||||||
|
lock.unlock();
|
||||||
|
os << "Idle (size " << queue_copy.size() << "):" << endl;
|
||||||
|
counter = 0;
|
||||||
|
for (const auto &i : queue_copy) {
|
||||||
|
os << "Idle #" << ++counter << " Task ID " << i->id() << " " << i->title() << endl;
|
||||||
|
}
|
||||||
|
os << "Idle End" << endl;
|
||||||
|
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
@@ -583,6 +627,7 @@ namespace crucible {
|
|||||||
m_cancelled = true;
|
m_cancelled = true;
|
||||||
decltype(m_queue) empty_queue;
|
decltype(m_queue) empty_queue;
|
||||||
m_queue.swap(empty_queue);
|
m_queue.swap(empty_queue);
|
||||||
|
empty_queue.splice(empty_queue.end(), m_idle_queue);
|
||||||
m_condvar.notify_all();
|
m_condvar.notify_all();
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
TaskState::clear_queue(empty_queue);
|
TaskState::clear_queue(empty_queue);
|
||||||
@@ -682,6 +727,13 @@ namespace crucible {
|
|||||||
m_task_state->run();
|
m_task_state->run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Task::idle() const
|
||||||
|
{
|
||||||
|
THROW_CHECK0(runtime_error, m_task_state);
|
||||||
|
m_task_state->idle();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Task::append(const Task &that) const
|
Task::append(const Task &that) const
|
||||||
{
|
{
|
||||||
@@ -772,6 +824,9 @@ namespace crucible {
|
|||||||
} else if (!master_copy->m_queue.empty()) {
|
} else if (!master_copy->m_queue.empty()) {
|
||||||
m_current_task = *master_copy->m_queue.begin();
|
m_current_task = *master_copy->m_queue.begin();
|
||||||
master_copy->m_queue.pop_front();
|
master_copy->m_queue.pop_front();
|
||||||
|
} else if (!master_copy->m_idle_queue.empty()) {
|
||||||
|
m_current_task = *master_copy->m_idle_queue.begin();
|
||||||
|
master_copy->m_idle_queue.pop_front();
|
||||||
} else {
|
} else {
|
||||||
master_copy->m_condvar.wait(lock);
|
master_copy->m_condvar.wait(lock);
|
||||||
continue;
|
continue;
|
||||||
|
@@ -98,6 +98,8 @@ BeesContext::dump_status()
|
|||||||
TaskMaster::print_queue(ofs);
|
TaskMaster::print_queue(ofs);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ofs << get_progress();
|
||||||
|
|
||||||
ofs.close();
|
ofs.close();
|
||||||
|
|
||||||
BEESNOTE("renaming status file '" << status_file << "'");
|
BEESNOTE("renaming status file '" << status_file << "'");
|
||||||
@@ -112,6 +114,20 @@ BeesContext::dump_status()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesContext::set_progress(const string &str)
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_progress_mtx);
|
||||||
|
m_progress_str = str;
|
||||||
|
}
|
||||||
|
|
||||||
|
string
|
||||||
|
BeesContext::get_progress()
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lock(m_progress_mtx);
|
||||||
|
return m_progress_str;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BeesContext::show_progress()
|
BeesContext::show_progress()
|
||||||
{
|
{
|
||||||
@@ -159,6 +175,8 @@ BeesContext::show_progress()
|
|||||||
BEESLOGINFO("\ttid " << t.first << ": " << t.second);
|
BEESLOGINFO("\ttid " << t.first << ": " << t.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// No need to log progress here, it is logged when set
|
||||||
|
|
||||||
lastStats = thisStats;
|
lastStats = thisStats;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -182,7 +200,7 @@ BeesContext::home_fd()
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BeesContext::is_root_ro(uint64_t root)
|
BeesContext::is_root_ro(uint64_t const root)
|
||||||
{
|
{
|
||||||
return roots()->is_root_ro(root);
|
return roots()->is_root_ro(root);
|
||||||
}
|
}
|
||||||
@@ -264,6 +282,7 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
|||||||
// BEESLOG("BeesResolver br(..., " << bfr << ")");
|
// BEESLOG("BeesResolver br(..., " << bfr << ")");
|
||||||
BEESTRACE("BeesContext::rewrite_file_range calling BeesResolver " << bfr);
|
BEESTRACE("BeesContext::rewrite_file_range calling BeesResolver " << bfr);
|
||||||
BeesResolver br(m_ctx, BeesAddress(bfr.fd(), bfr.begin()));
|
BeesResolver br(m_ctx, BeesAddress(bfr.fd(), bfr.begin()));
|
||||||
|
BEESTRACE("BeesContext::rewrite_file_range calling replace_src " << dup_bbd);
|
||||||
// BEESLOG("\treplace_src " << dup_bbd);
|
// BEESLOG("\treplace_src " << dup_bbd);
|
||||||
br.replace_src(dup_bbd);
|
br.replace_src(dup_bbd);
|
||||||
BEESCOUNT(scan_rewrite);
|
BEESCOUNT(scan_rewrite);
|
||||||
@@ -291,24 +310,37 @@ BeesContext::rewrite_file_range(const BeesFileRange &bfr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
struct BeesSeenRange {
|
||||||
|
uint64_t bytenr;
|
||||||
|
off_t offset;
|
||||||
|
off_t length;
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
bool
|
||||||
|
operator<(const BeesSeenRange &bsr1, const BeesSeenRange &bsr2)
|
||||||
|
{
|
||||||
|
return tie(bsr1.bytenr, bsr1.offset, bsr1.length) < tie(bsr2.bytenr, bsr2.offset, bsr2.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
__attribute__((unused))
|
||||||
|
ostream&
|
||||||
|
operator<<(ostream &os, const BeesSeenRange &tup)
|
||||||
|
{
|
||||||
|
return os << "BeesSeenRange { " << to_hex(tup.bytenr) << ", " << to_hex(tup.offset) << "+" << pretty(tup.length) << " }";
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||||
{
|
{
|
||||||
BEESNOTE("Scanning " << pretty(e.size()) << " "
|
BEESNOTE("Scanning " << pretty(e.size()) << " "
|
||||||
<< to_hex(e.begin()) << ".." << to_hex(e.end())
|
<< to_hex(e.begin()) << ".." << to_hex(e.end())
|
||||||
<< " " << name_fd(bfr.fd()) );
|
<< " " << name_fd(bfr.fd()) );
|
||||||
BEESTRACE("scan extent " << e);
|
BEESTRACE("scan extent " << e);
|
||||||
|
BEESTRACE("scan bfr " << bfr);
|
||||||
BEESCOUNT(scan_extent);
|
BEESCOUNT(scan_extent);
|
||||||
|
|
||||||
// EXPERIMENT: Don't bother with tiny extents unless they are the entire file.
|
|
||||||
// We'll take a tiny extent at BOF or EOF but not in between.
|
|
||||||
if (e.begin() && e.size() < 128 * 1024 && e.end() != Stat(bfr.fd()).st_size) {
|
|
||||||
BEESCOUNT(scan_extent_tiny);
|
|
||||||
// This doesn't work properly with the current architecture,
|
|
||||||
// so we don't do an early return here.
|
|
||||||
// return bfr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We keep moving this method around
|
// We keep moving this method around
|
||||||
auto m_ctx = shared_from_this();
|
auto m_ctx = shared_from_this();
|
||||||
|
|
||||||
@@ -328,7 +360,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
if (e.flags() & Extent::HOLE) {
|
if (e.flags() & Extent::HOLE) {
|
||||||
// Nothing here, dispose of this early
|
// Nothing here, dispose of this early
|
||||||
BEESCOUNT(scan_hole);
|
BEESCOUNT(scan_hole);
|
||||||
return bfr;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (e.flags() & Extent::PREALLOC) {
|
if (e.flags() & Extent::PREALLOC) {
|
||||||
@@ -347,38 +379,57 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
if (m_ctx->dedup(brp)) {
|
if (m_ctx->dedup(brp)) {
|
||||||
BEESCOUNT(dedup_prealloc_hit);
|
BEESCOUNT(dedup_prealloc_hit);
|
||||||
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
||||||
return bfr;
|
return;
|
||||||
} else {
|
} else {
|
||||||
BEESCOUNT(dedup_prealloc_miss);
|
BEESCOUNT(dedup_prealloc_miss);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we already read this extent and inserted it into the hash table, no need to read it again
|
||||||
|
static mutex s_seen_mutex;
|
||||||
|
unique_lock<mutex> lock_seen(s_seen_mutex);
|
||||||
|
const BeesSeenRange tup = {
|
||||||
|
.bytenr = e.bytenr(),
|
||||||
|
.offset = e.offset(),
|
||||||
|
.length = e.size(),
|
||||||
|
};
|
||||||
|
static set<BeesSeenRange> s_seen;
|
||||||
|
if (s_seen.size() > BEES_MAX_EXTENT_REF_COUNT) {
|
||||||
|
s_seen.clear();
|
||||||
|
BEESCOUNT(scan_seen_clear);
|
||||||
|
}
|
||||||
|
const auto seen_rv = s_seen.find(tup) != s_seen.end();
|
||||||
|
if (!seen_rv) {
|
||||||
|
BEESCOUNT(scan_seen_miss);
|
||||||
|
} else {
|
||||||
|
// BEESLOGDEBUG("Skip " << tup << " " << e);
|
||||||
|
BEESCOUNT(scan_seen_hit);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lock_seen.unlock();
|
||||||
|
|
||||||
// OK we need to read extent now
|
// OK we need to read extent now
|
||||||
bees_readahead(bfr.fd(), bfr.begin(), bfr.size());
|
bees_readahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||||
|
|
||||||
map<off_t, pair<BeesHash, BeesAddress>> insert_map;
|
map<off_t, pair<BeesHash, BeesAddress>> insert_map;
|
||||||
set<off_t> noinsert_set;
|
set<off_t> dedupe_set;
|
||||||
|
set<off_t> zero_set;
|
||||||
// Hole handling
|
|
||||||
bool extent_compressed = e.flags() & FIEMAP_EXTENT_ENCODED;
|
|
||||||
bool extent_contains_zero = false;
|
|
||||||
bool extent_contains_nonzero = false;
|
|
||||||
|
|
||||||
// Need to replace extent
|
|
||||||
bool rewrite_extent = false;
|
|
||||||
|
|
||||||
// Pretty graphs
|
// Pretty graphs
|
||||||
off_t block_count = ((e.size() + BLOCK_MASK_SUMS) & ~BLOCK_MASK_SUMS) / BLOCK_SIZE_SUMS;
|
off_t block_count = ((e.size() + BLOCK_MASK_SUMS) & ~BLOCK_MASK_SUMS) / BLOCK_SIZE_SUMS;
|
||||||
BEESTRACE(e << " block_count " << block_count);
|
BEESTRACE(e << " block_count " << block_count);
|
||||||
string bar(block_count, '#');
|
string bar(block_count, '#');
|
||||||
|
|
||||||
for (off_t next_p = e.begin(); next_p < e.end(); ) {
|
// List of dedupes found
|
||||||
|
list<BeesRangePair> dedupe_list;
|
||||||
|
list<BeesFileRange> copy_list;
|
||||||
|
list<pair<BeesHash, BeesAddress>> front_hash_list;
|
||||||
|
list<uint64_t> invalidate_addr_list;
|
||||||
|
|
||||||
// Guarantee forward progress
|
off_t next_p = e.begin();
|
||||||
off_t p = next_p;
|
for (off_t p = e.begin(); p < e.end(); p += BLOCK_SIZE_SUMS) {
|
||||||
next_p += BLOCK_SIZE_SUMS;
|
|
||||||
|
|
||||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
const off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||||
BeesAddress addr(e, p);
|
BeesAddress addr(e, p);
|
||||||
|
|
||||||
// This extent should consist entirely of non-magic blocks
|
// This extent should consist entirely of non-magic blocks
|
||||||
@@ -393,69 +444,68 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
|
|
||||||
// Calculate the hash first because it lets us shortcut on is_data_zero
|
// Calculate the hash first because it lets us shortcut on is_data_zero
|
||||||
BEESNOTE("scan hash " << bbd);
|
BEESNOTE("scan hash " << bbd);
|
||||||
BeesHash hash = bbd.hash();
|
const BeesHash hash = bbd.hash();
|
||||||
|
|
||||||
|
// Weed out zero blocks
|
||||||
|
BEESNOTE("is_data_zero " << bbd);
|
||||||
|
const bool data_is_zero = bbd.is_data_zero();
|
||||||
|
if (data_is_zero) {
|
||||||
|
bar.at(bar_p) = '0';
|
||||||
|
zero_set.insert(p);
|
||||||
|
BEESCOUNT(scan_zero);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule this block for insertion if we decide to keep this extent.
|
// Schedule this block for insertion if we decide to keep this extent.
|
||||||
BEESCOUNT(scan_hash_preinsert);
|
BEESCOUNT(scan_hash_preinsert);
|
||||||
BEESTRACE("Pushing hash " << hash << " addr " << addr << " bbd " << bbd);
|
BEESTRACE("Pushing hash " << hash << " addr " << addr << " bbd " << bbd);
|
||||||
insert_map.insert(make_pair(p, make_pair(hash, addr)));
|
insert_map.insert(make_pair(p, make_pair(hash, addr)));
|
||||||
bar.at(bar_p) = 'R';
|
bar.at(bar_p) = 'i';
|
||||||
|
|
||||||
// Weed out zero blocks
|
// Ensure we fill in the entire insert_map without skipping any non-zero blocks
|
||||||
BEESNOTE("is_data_zero " << bbd);
|
if (p < next_p) continue;
|
||||||
bool extent_is_zero = bbd.is_data_zero();
|
|
||||||
if (extent_is_zero) {
|
|
||||||
bar.at(bar_p) = '0';
|
|
||||||
if (extent_compressed) {
|
|
||||||
if (!extent_contains_zero) {
|
|
||||||
// BEESLOG("compressed zero bbd " << bbd << "\n\tin extent " << e);
|
|
||||||
}
|
|
||||||
extent_contains_zero = true;
|
|
||||||
// Do not attempt to lookup hash of zero block
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
BEESLOGINFO("zero bbd " << bbd << "\n\tin extent " << e);
|
|
||||||
BEESCOUNT(scan_zero_uncompressed);
|
|
||||||
rewrite_extent = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (extent_contains_zero && !extent_contains_nonzero) {
|
|
||||||
// BEESLOG("compressed nonzero bbd " << bbd << "\n\tin extent " << e);
|
|
||||||
}
|
|
||||||
extent_contains_nonzero = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
BEESNOTE("lookup hash " << bbd);
|
BEESNOTE("lookup hash " << bbd);
|
||||||
auto found = hash_table->find_cell(hash);
|
const auto found = hash_table->find_cell(hash);
|
||||||
BEESCOUNT(scan_lookup);
|
BEESCOUNT(scan_lookup);
|
||||||
|
|
||||||
set<BeesResolver> resolved_addrs;
|
|
||||||
set<BeesAddress> found_addrs;
|
set<BeesAddress> found_addrs;
|
||||||
|
list<BeesAddress> ordered_addrs;
|
||||||
|
|
||||||
// We know that there is at least one copy of the data and where it is,
|
for (const auto &i : found) {
|
||||||
// but we don't want to do expensive LOGICAL_INO operations unless there
|
|
||||||
// are at least two distinct addresses to look at.
|
|
||||||
found_addrs.insert(addr);
|
|
||||||
|
|
||||||
for (auto i : found) {
|
|
||||||
BEESTRACE("found (hash, address): " << i);
|
BEESTRACE("found (hash, address): " << i);
|
||||||
BEESCOUNT(scan_found);
|
BEESCOUNT(scan_found);
|
||||||
|
|
||||||
// Hash has to match
|
// Hash has to match
|
||||||
THROW_CHECK2(runtime_error, i.e_hash, hash, i.e_hash == hash);
|
THROW_CHECK2(runtime_error, i.e_hash, hash, i.e_hash == hash);
|
||||||
|
|
||||||
|
// We know that there is at least one copy of the data and where it is.
|
||||||
|
// Filter out anything that can't possibly match before we pull out the
|
||||||
|
// LOGICAL_INO hammer.
|
||||||
BeesAddress found_addr(i.e_addr);
|
BeesAddress found_addr(i.e_addr);
|
||||||
|
|
||||||
#if 0
|
|
||||||
// If address already in hash table, move on to next extent.
|
// If address already in hash table, move on to next extent.
|
||||||
// We've already seen this block and may have made additional references to it.
|
// Only extents that are scanned but not modified are inserted, so if there's
|
||||||
// The current extent is effectively "pinned" and can't be modified any more.
|
// a matching hash:address pair in the hash table:
|
||||||
|
// 1. We have already scanned this extent.
|
||||||
|
// 2. We may have already created references to this extent.
|
||||||
|
// 3. We won't scan this extent again.
|
||||||
|
// The current extent is effectively "pinned" and can't be modified
|
||||||
|
// without rescanning all the existing references.
|
||||||
if (found_addr.get_physical_or_zero() == addr.get_physical_or_zero()) {
|
if (found_addr.get_physical_or_zero() == addr.get_physical_or_zero()) {
|
||||||
|
// No log message because this happens to many thousands of blocks
|
||||||
|
// when bees is interrupted.
|
||||||
|
// BEESLOGDEBUG("Found matching hash " << hash << " at same address " << addr << ", skipping " << bfr);
|
||||||
BEESCOUNT(scan_already);
|
BEESCOUNT(scan_already);
|
||||||
return bfr;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Address is a duplicate.
|
||||||
|
// Check this early so we don't have duplicate counts.
|
||||||
|
if (!found_addrs.insert(found_addr).second) {
|
||||||
|
BEESCOUNT(scan_twice);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// Block must have matching EOF alignment
|
// Block must have matching EOF alignment
|
||||||
if (found_addr.is_unaligned_eof() != addr.is_unaligned_eof()) {
|
if (found_addr.is_unaligned_eof() != addr.is_unaligned_eof()) {
|
||||||
@@ -463,12 +513,6 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Address is a duplicate
|
|
||||||
if (!found_addrs.insert(found_addr).second) {
|
|
||||||
BEESCOUNT(scan_twice);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hash is toxic
|
// Hash is toxic
|
||||||
if (found_addr.is_toxic()) {
|
if (found_addr.is_toxic()) {
|
||||||
BEESLOGWARN("WORKAROUND: abandoned toxic match for hash " << hash << " addr " << found_addr << " matching bbd " << bbd);
|
BEESLOGWARN("WORKAROUND: abandoned toxic match for hash " << hash << " addr " << found_addr << " matching bbd " << bbd);
|
||||||
@@ -476,201 +520,345 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
|||||||
// Extents may become non-toxic so give them a chance to expire.
|
// Extents may become non-toxic so give them a chance to expire.
|
||||||
// hash_table->push_front_hash_addr(hash, found_addr);
|
// hash_table->push_front_hash_addr(hash, found_addr);
|
||||||
BEESCOUNT(scan_toxic_hash);
|
BEESCOUNT(scan_toxic_hash);
|
||||||
return bfr;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Distinct address, go resolve it
|
// Put this address in the list without changing hash table order
|
||||||
bool abandon_extent = false;
|
ordered_addrs.push_back(found_addr);
|
||||||
catch_all([&]() {
|
}
|
||||||
BEESNOTE("resolving " << found_addr << " matched " << bbd);
|
|
||||||
BEESTRACE("resolving " << found_addr << " matched " << bbd);
|
|
||||||
BEESTRACE("BeesContext::scan_one_extent calling BeesResolver " << found_addr);
|
|
||||||
BeesResolver resolved(m_ctx, found_addr);
|
|
||||||
// Toxic extents are really toxic
|
|
||||||
if (resolved.is_toxic()) {
|
|
||||||
BEESLOGWARN("WORKAROUND: discovered toxic match at found_addr " << found_addr << " matching bbd " << bbd);
|
|
||||||
BEESCOUNT(scan_toxic_match);
|
|
||||||
// Make sure we never see this hash again.
|
|
||||||
// It has become toxic since it was inserted into the hash table.
|
|
||||||
found_addr.set_toxic();
|
|
||||||
hash_table->push_front_hash_addr(hash, found_addr);
|
|
||||||
abandon_extent = true;
|
|
||||||
} else if (!resolved.count()) {
|
|
||||||
BEESCOUNT(scan_resolve_zero);
|
|
||||||
// Didn't find anything, address is dead
|
|
||||||
BEESTRACE("matched hash " << hash << " addr " << addr << " count zero");
|
|
||||||
hash_table->erase_hash_addr(hash, found_addr);
|
|
||||||
} else {
|
|
||||||
resolved_addrs.insert(resolved);
|
|
||||||
BEESCOUNT(scan_resolve_hit);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (abandon_extent) {
|
// Cheap filtering is now out of the way, now for some heavy lifting
|
||||||
return bfr;
|
for (auto found_addr : ordered_addrs) {
|
||||||
|
// Hash table says there's a matching block on the filesystem.
|
||||||
|
// Go find refs to it.
|
||||||
|
BEESNOTE("resolving " << found_addr << " matched " << bbd);
|
||||||
|
BEESTRACE("resolving " << found_addr << " matched " << bbd);
|
||||||
|
BEESTRACE("BeesContext::scan_one_extent calling BeesResolver " << found_addr);
|
||||||
|
BeesResolver resolved(m_ctx, found_addr);
|
||||||
|
// Toxic extents are really toxic
|
||||||
|
if (resolved.is_toxic()) {
|
||||||
|
BEESLOGWARN("WORKAROUND: discovered toxic match at found_addr " << found_addr << " matching bbd " << bbd);
|
||||||
|
BEESCOUNT(scan_toxic_match);
|
||||||
|
// Make sure we never see this hash again.
|
||||||
|
// It has become toxic since it was inserted into the hash table.
|
||||||
|
found_addr.set_toxic();
|
||||||
|
hash_table->push_front_hash_addr(hash, found_addr);
|
||||||
|
return;
|
||||||
|
} else if (!resolved.count()) {
|
||||||
|
BEESCOUNT(scan_resolve_zero);
|
||||||
|
// Didn't find a block at the table address, address is dead
|
||||||
|
BEESLOGDEBUG("Erasing stale addr " << addr << " hash " << hash);
|
||||||
|
hash_table->erase_hash_addr(hash, found_addr);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
BEESCOUNT(scan_resolve_hit);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// This shouldn't happen (often), so let's count it separately
|
// `resolved` contains references to a block on the filesystem that still exists.
|
||||||
if (resolved_addrs.size() > 2) {
|
|
||||||
BEESCOUNT(matched_3_or_more);
|
|
||||||
}
|
|
||||||
if (resolved_addrs.size() > 1) {
|
|
||||||
BEESCOUNT(matched_2_or_more);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No need to do all this unless there are two or more distinct matches
|
|
||||||
if (!resolved_addrs.empty()) {
|
|
||||||
bar.at(bar_p) = 'M';
|
bar.at(bar_p) = 'M';
|
||||||
BEESCOUNT(matched_1_or_more);
|
|
||||||
BEESTRACE("resolved_addrs.size() = " << resolved_addrs.size());
|
|
||||||
BEESNOTE("resolving " << resolved_addrs.size() << " matches for hash " << hash);
|
|
||||||
|
|
||||||
BeesFileRange replaced_bfr;
|
BEESNOTE("finding one match (out of " << resolved.count() << ") at " << resolved.addr() << " for " << bbd);
|
||||||
|
BEESTRACE("finding one match (out of " << resolved.count() << ") at " << resolved.addr() << " for " << bbd);
|
||||||
|
auto replaced_brp = resolved.replace_dst(bbd);
|
||||||
|
BeesFileRange &replaced_bfr = replaced_brp.second;
|
||||||
|
BEESTRACE("next_p " << to_hex(next_p) << " -> replaced_bfr " << replaced_bfr);
|
||||||
|
|
||||||
BeesAddress last_replaced_addr;
|
// If we did find a block, but not this hash, correct the hash table and move on
|
||||||
for (auto it = resolved_addrs.begin(); it != resolved_addrs.end(); ++it) {
|
if (resolved.found_hash()) {
|
||||||
// FIXME: Need to terminate this loop on replace_dst exception condition
|
BEESCOUNT(scan_hash_hit);
|
||||||
// catch_all([&]() {
|
} else {
|
||||||
auto it_copy = *it;
|
BEESLOGDEBUG("Erasing stale hash " << hash << " addr " << resolved.addr());
|
||||||
BEESNOTE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
hash_table->erase_hash_addr(hash, resolved.addr());
|
||||||
BEESTRACE("finding one match (out of " << it_copy.count() << ") at " << it_copy.addr() << " for " << bbd);
|
BEESCOUNT(scan_hash_miss);
|
||||||
replaced_bfr = it_copy.replace_dst(bbd);
|
continue;
|
||||||
BEESTRACE("next_p " << to_hex(next_p) << " -> replaced_bfr " << replaced_bfr);
|
|
||||||
|
|
||||||
// If we didn't find this hash where the hash table said it would be,
|
|
||||||
// correct the hash table.
|
|
||||||
if (it_copy.found_hash()) {
|
|
||||||
BEESCOUNT(scan_hash_hit);
|
|
||||||
} else {
|
|
||||||
// BEESLOGDEBUG("erase src hash " << hash << " addr " << it_copy.addr());
|
|
||||||
BEESCOUNT(scan_hash_miss);
|
|
||||||
hash_table->erase_hash_addr(hash, it_copy.addr());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (it_copy.found_dup()) {
|
|
||||||
BEESCOUNT(scan_dup_hit);
|
|
||||||
|
|
||||||
// FIXME: we will thrash if we let multiple references to identical blocks
|
|
||||||
// exist in the hash table. Erase all but the last one.
|
|
||||||
if (last_replaced_addr) {
|
|
||||||
BEESLOGINFO("Erasing redundant hash " << hash << " addr " << last_replaced_addr);
|
|
||||||
hash_table->erase_hash_addr(hash, last_replaced_addr);
|
|
||||||
BEESCOUNT(scan_erase_redundant);
|
|
||||||
}
|
|
||||||
last_replaced_addr = it_copy.addr();
|
|
||||||
|
|
||||||
// Invalidate resolve cache so we can count refs correctly
|
|
||||||
m_ctx->invalidate_addr(it_copy.addr());
|
|
||||||
m_ctx->invalidate_addr(bbd.addr());
|
|
||||||
|
|
||||||
// Remove deduped blocks from insert map
|
|
||||||
THROW_CHECK0(runtime_error, replaced_bfr);
|
|
||||||
for (off_t ip = replaced_bfr.begin(); ip < replaced_bfr.end(); ip += BLOCK_SIZE_SUMS) {
|
|
||||||
BEESCOUNT(scan_dup_block);
|
|
||||||
noinsert_set.insert(ip);
|
|
||||||
if (ip >= e.begin() && ip < e.end()) {
|
|
||||||
off_t bar_p = (ip - e.begin()) / BLOCK_SIZE_SUMS;
|
|
||||||
bar.at(bar_p) = 'd';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// next_p may be past EOF so check p only
|
|
||||||
THROW_CHECK2(runtime_error, p, replaced_bfr, p < replaced_bfr.end());
|
|
||||||
|
|
||||||
BEESCOUNT(scan_bump);
|
|
||||||
next_p = replaced_bfr.end();
|
|
||||||
} else {
|
|
||||||
BEESCOUNT(scan_dup_miss);
|
|
||||||
}
|
|
||||||
// });
|
|
||||||
}
|
}
|
||||||
if (last_replaced_addr) {
|
|
||||||
// If we replaced extents containing the incoming addr,
|
// We found a block and it was a duplicate
|
||||||
// push the addr we kept to the front of the hash LRU.
|
if (resolved.found_dup()) {
|
||||||
hash_table->push_front_hash_addr(hash, last_replaced_addr);
|
THROW_CHECK0(runtime_error, replaced_bfr);
|
||||||
BEESCOUNT(scan_push_front);
|
BEESCOUNT(scan_dup_hit);
|
||||||
|
|
||||||
|
// Save this match. If a better match is found later,
|
||||||
|
// it will be replaced.
|
||||||
|
dedupe_list.push_back(replaced_brp);
|
||||||
|
|
||||||
|
// Push matching block to front of LRU
|
||||||
|
front_hash_list.push_back(make_pair(hash, resolved.addr()));
|
||||||
|
|
||||||
|
// This is the block that matched in the replaced bfr
|
||||||
|
bar.at(bar_p) = '=';
|
||||||
|
|
||||||
|
// Invalidate resolve cache so we can count refs correctly
|
||||||
|
invalidate_addr_list.push_back(resolved.addr());
|
||||||
|
invalidate_addr_list.push_back(bbd.addr());
|
||||||
|
|
||||||
|
// next_p may be past EOF so check p only
|
||||||
|
THROW_CHECK2(runtime_error, p, replaced_bfr, p < replaced_bfr.end());
|
||||||
|
|
||||||
|
// We may find duplicate ranges of various lengths, so make sure
|
||||||
|
// we don't pick a smaller one
|
||||||
|
next_p = max(next_p, replaced_bfr.end());
|
||||||
|
|
||||||
|
// Stop after one dedupe is found. If there's a longer matching range
|
||||||
|
// out there, we'll find a matching block after the end of this range,
|
||||||
|
// since the longer range is longer than this one.
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
BEESCOUNT(scan_dup_miss);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
BEESCOUNT(matched_0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the extent was compressed and all zeros, nuke entire thing
|
bool force_insert = false;
|
||||||
if (!rewrite_extent && (extent_contains_zero && !extent_contains_nonzero)) {
|
|
||||||
rewrite_extent = true;
|
// We don't want to punch holes into compressed extents, unless:
|
||||||
BEESCOUNT(scan_zero_compressed);
|
// 1. There was dedupe of non-zero blocks, so we always have to copy the rest of the extent
|
||||||
|
// 2. The entire extent is zero and the whole thing can be replaced with a single hole
|
||||||
|
const bool extent_compressed = e.flags() & FIEMAP_EXTENT_ENCODED;
|
||||||
|
if (extent_compressed && dedupe_list.empty() && !insert_map.empty()) {
|
||||||
|
// BEESLOGDEBUG("Compressed extent with non-zero data and no dedupe, skipping");
|
||||||
|
BEESCOUNT(scan_compressed_no_dedup);
|
||||||
|
force_insert = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we deduped any blocks then we must rewrite the remainder of the extent
|
// FIXME: dedupe_list contains a lot of overlapping matches. Get rid of all but one.
|
||||||
if (!noinsert_set.empty()) {
|
list<BeesRangePair> dedupe_list_out;
|
||||||
rewrite_extent = true;
|
dedupe_list.sort([](const BeesRangePair &a, const BeesRangePair &b) {
|
||||||
|
return b.second.size() < a.second.size();
|
||||||
|
});
|
||||||
|
// Shorten each dedupe brp by removing any overlap with earlier (longer) extents in list
|
||||||
|
for (auto i : dedupe_list) {
|
||||||
|
bool insert_i = true;
|
||||||
|
BEESTRACE("i = " << i << " insert_i " << insert_i);
|
||||||
|
for (const auto &j : dedupe_list_out) {
|
||||||
|
BEESTRACE("j = " << j);
|
||||||
|
// No overlap, try next one
|
||||||
|
if (j.second.end() <= i.second.begin() || j.second.begin() >= i.second.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// j fully overlaps or is the same as i, drop i
|
||||||
|
if (j.second.begin() <= i.second.begin() && j.second.end() >= i.second.end()) {
|
||||||
|
insert_i = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// i begins outside j, i ends inside j, remove the end of i
|
||||||
|
if (i.second.end() > j.second.begin() && i.second.begin() <= j.second.begin()) {
|
||||||
|
const auto delta = i.second.end() - j.second.begin();
|
||||||
|
if (delta == i.second.size()) {
|
||||||
|
insert_i = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i.shrink_end(delta);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// i begins inside j, ends outside j, remove the begin of i
|
||||||
|
if (i.second.begin() < j.second.end() && i.second.end() >= j.second.end()) {
|
||||||
|
const auto delta = j.second.end() - i.second.begin();
|
||||||
|
if (delta == i.second.size()) {
|
||||||
|
insert_i = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i.shrink_begin(delta);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// i fully overlaps j, split i into two parts, push the other part onto dedupe_list
|
||||||
|
if (j.second.begin() > i.second.begin() && j.second.end() < i.second.end()) {
|
||||||
|
auto other_i = i;
|
||||||
|
const auto end_left_delta = i.second.end() - j.second.begin();
|
||||||
|
const auto begin_right_delta = i.second.begin() - j.second.end();
|
||||||
|
i.shrink_end(end_left_delta);
|
||||||
|
other_i.shrink_begin(begin_right_delta);
|
||||||
|
dedupe_list.push_back(other_i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// None of the sbove. Oops!
|
||||||
|
THROW_CHECK0(runtime_error, false);
|
||||||
|
}
|
||||||
|
if (insert_i) {
|
||||||
|
dedupe_list_out.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dedupe_list = dedupe_list_out;
|
||||||
|
dedupe_list_out.clear();
|
||||||
|
|
||||||
|
// Count total dedupes
|
||||||
|
uint64_t bytes_deduped = 0;
|
||||||
|
for (const auto &i : dedupe_list) {
|
||||||
|
// Remove deduped blocks from insert map and zero map
|
||||||
|
for (off_t ip = i.second.begin(); ip < i.second.end(); ip += BLOCK_SIZE_SUMS) {
|
||||||
|
BEESCOUNT(scan_dup_block);
|
||||||
|
dedupe_set.insert(ip);
|
||||||
|
zero_set.erase(ip);
|
||||||
|
}
|
||||||
|
bytes_deduped += i.second.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we need to replace part of the extent, rewrite all instances of it
|
// Copy all blocks of the extent that were not deduped or zero, but don't copy an entire extent
|
||||||
if (rewrite_extent) {
|
uint64_t bytes_zeroed = 0;
|
||||||
bool blocks_rewritten = false;
|
if (!force_insert) {
|
||||||
BEESTRACE("Rewriting extent " << e);
|
BEESTRACE("Rewriting extent " << e);
|
||||||
off_t last_p = e.begin();
|
off_t last_p = e.begin();
|
||||||
off_t p = last_p;
|
off_t p = last_p;
|
||||||
off_t next_p;
|
off_t next_p = last_p;
|
||||||
BEESTRACE("next_p " << to_hex(next_p) << " p " << to_hex(p) << " last_p " << to_hex(last_p));
|
BEESTRACE("next_p " << to_hex(next_p) << " p " << to_hex(p) << " last_p " << to_hex(last_p));
|
||||||
for (next_p = e.begin(); next_p < e.end(); ) {
|
for (next_p = e.begin(); next_p < e.end(); ) {
|
||||||
p = next_p;
|
p = next_p;
|
||||||
next_p += BLOCK_SIZE_SUMS;
|
next_p = min(next_p + BLOCK_SIZE_SUMS, e.end());
|
||||||
|
|
||||||
// BEESLOG("noinsert_set.count(" << to_hex(p) << ") " << noinsert_set.count(p));
|
// Can't be both dedupe and zero
|
||||||
if (noinsert_set.count(p)) {
|
THROW_CHECK2(runtime_error, zero_set.count(p), dedupe_set.count(p), zero_set.count(p) + dedupe_set.count(p) < 2);
|
||||||
|
if (zero_set.count(p)) {
|
||||||
|
bytes_zeroed += next_p - p;
|
||||||
|
}
|
||||||
|
// BEESLOG("dedupe_set.count(" << to_hex(p) << ") " << dedupe_set.count(p));
|
||||||
|
if (dedupe_set.count(p)) {
|
||||||
if (p - last_p > 0) {
|
if (p - last_p > 0) {
|
||||||
rewrite_file_range(BeesFileRange(bfr.fd(), last_p, p));
|
THROW_CHECK2(runtime_error, p, e.end(), p <= e.end());
|
||||||
blocks_rewritten = true;
|
copy_list.push_back(BeesFileRange(bfr.fd(), last_p, p));
|
||||||
}
|
}
|
||||||
last_p = next_p;
|
last_p = next_p;
|
||||||
} else {
|
|
||||||
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
|
||||||
bar.at(bar_p) = '+';
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BEESTRACE("last");
|
BEESTRACE("last");
|
||||||
if (next_p - last_p > 0) {
|
if (next_p > last_p) {
|
||||||
rewrite_file_range(BeesFileRange(bfr.fd(), last_p, next_p));
|
THROW_CHECK2(runtime_error, next_p, e.end(), next_p <= e.end());
|
||||||
blocks_rewritten = true;
|
copy_list.push_back(BeesFileRange(bfr.fd(), last_p, next_p));
|
||||||
}
|
|
||||||
if (blocks_rewritten) {
|
|
||||||
// Nothing left to insert, all blocks clobbered
|
|
||||||
insert_map.clear();
|
|
||||||
} else {
|
|
||||||
// BEESLOG("No blocks rewritten");
|
|
||||||
BEESCOUNT(scan_no_rewrite);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We did not rewrite the extent and it contained data, so insert it.
|
// Don't copy an entire extent
|
||||||
for (auto i : insert_map) {
|
if (!bytes_zeroed && copy_list.size() == 1 && copy_list.begin()->size() == e.size()) {
|
||||||
off_t bar_p = (i.first - e.begin()) / BLOCK_SIZE_SUMS;
|
copy_list.clear();
|
||||||
BEESTRACE("e " << e << "bar_p = " << bar_p << " i.first-e.begin() " << i.first - e.begin() << " i.second " << i.second.first << ", " << i.second.second);
|
}
|
||||||
if (noinsert_set.count(i.first)) {
|
|
||||||
// FIXME: we removed one reference to this copy. Avoid thrashing?
|
// Count total copies
|
||||||
hash_table->erase_hash_addr(i.second.first, i.second.second);
|
uint64_t bytes_copied = 0;
|
||||||
// Block was clobbered, do not insert
|
for (const auto &i : copy_list) {
|
||||||
// Will look like 'Ddddd' because we skip deduped blocks
|
bytes_copied += i.size();
|
||||||
bar.at(bar_p) = 'D';
|
}
|
||||||
BEESCOUNT(inserted_clobbered);
|
|
||||||
|
BEESTRACE("bar: " << bar);
|
||||||
|
|
||||||
|
// Don't do nuisance dedupes part 1: free more blocks than we create
|
||||||
|
THROW_CHECK3(runtime_error, bytes_copied, bytes_zeroed, bytes_deduped, bytes_copied >= bytes_zeroed);
|
||||||
|
const auto cost_copy = bytes_copied - bytes_zeroed;
|
||||||
|
const auto gain_dedupe = bytes_deduped + bytes_zeroed;
|
||||||
|
if (cost_copy > gain_dedupe) {
|
||||||
|
BEESLOGDEBUG("Too many bytes copied (" << pretty(bytes_copied) << ") for bytes deduped (" << pretty(bytes_deduped) << ") and holes punched (" << pretty(bytes_zeroed) << "), skipping extent");
|
||||||
|
BEESCOUNT(scan_skip_bytes);
|
||||||
|
force_insert = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't do nuisance dedupes part 2: nobody needs more than 100 dedupe/copy ops in one extent
|
||||||
|
if (dedupe_list.size() + copy_list.size() > 100) {
|
||||||
|
BEESLOGDEBUG("Too many dedupe (" << dedupe_list.size() << ") and copy (" << copy_list.size() << ") operations, skipping extent");
|
||||||
|
BEESCOUNT(scan_skip_ops);
|
||||||
|
force_insert = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track whether we rewrote anything
|
||||||
|
bool extent_modified = false;
|
||||||
|
|
||||||
|
// If we didn't delete the dedupe list, do the dedupes now
|
||||||
|
for (const auto &i : dedupe_list) {
|
||||||
|
BEESNOTE("dedup " << i);
|
||||||
|
if (force_insert || m_ctx->dedup(i)) {
|
||||||
|
BEESCOUNT(replacedst_dedup_hit);
|
||||||
|
THROW_CHECK0(runtime_error, i.second);
|
||||||
|
for (off_t ip = i.second.begin(); ip < i.second.end(); ip += BLOCK_SIZE_SUMS) {
|
||||||
|
if (ip >= e.begin() && ip < e.end()) {
|
||||||
|
off_t bar_p = (ip - e.begin()) / BLOCK_SIZE_SUMS;
|
||||||
|
if (bar.at(bar_p) != '=') {
|
||||||
|
if (ip == i.second.begin()) {
|
||||||
|
bar.at(bar_p) = '<';
|
||||||
|
} else if (ip + BLOCK_SIZE_SUMS >= i.second.end()) {
|
||||||
|
bar.at(bar_p) = '>';
|
||||||
|
} else {
|
||||||
|
bar.at(bar_p) = 'd';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
extent_modified = !force_insert;
|
||||||
} else {
|
} else {
|
||||||
|
BEESLOGINFO("dedup failed: " << i);
|
||||||
|
BEESCOUNT(replacedst_dedup_miss);
|
||||||
|
// User data changed while we were looking up the extent, or we have a bug.
|
||||||
|
// We can't fix this, but we can immediately stop wasting effort.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then the copy/rewrites
|
||||||
|
for (const auto &i : copy_list) {
|
||||||
|
if (!force_insert) {
|
||||||
|
rewrite_file_range(i);
|
||||||
|
extent_modified = true;
|
||||||
|
}
|
||||||
|
for (auto p = i.begin(); p < i.end(); p += BLOCK_SIZE_SUMS) {
|
||||||
|
off_t bar_p = (p - e.begin()) / BLOCK_SIZE_SUMS;
|
||||||
|
// Leave zeros as-is because they aren't really copies
|
||||||
|
if (bar.at(bar_p) != '0') {
|
||||||
|
bar.at(bar_p) = '+';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!force_insert) {
|
||||||
|
// Push matched hashes to front
|
||||||
|
for (const auto &i : front_hash_list) {
|
||||||
|
hash_table->push_front_hash_addr(i.first, i.second);
|
||||||
|
BEESCOUNT(scan_push_front);
|
||||||
|
}
|
||||||
|
// Invalidate cached resolves
|
||||||
|
for (const auto &i : invalidate_addr_list) {
|
||||||
|
m_ctx->invalidate_addr(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't insert hashes pointing to an extent we just deleted
|
||||||
|
if (!extent_modified) {
|
||||||
|
// We did not rewrite the extent and it contained data, so insert it.
|
||||||
|
// BEESLOGDEBUG("Inserting " << insert_map.size() << " hashes from " << bfr);
|
||||||
|
for (const auto &i : insert_map) {
|
||||||
hash_table->push_random_hash_addr(i.second.first, i.second.second);
|
hash_table->push_random_hash_addr(i.second.first, i.second.second);
|
||||||
bar.at(bar_p) = '.';
|
off_t bar_p = (i.first - e.begin()) / BLOCK_SIZE_SUMS;
|
||||||
BEESCOUNT(inserted_block);
|
if (bar.at(bar_p) == 'i') {
|
||||||
|
bar.at(bar_p) = '.';
|
||||||
|
}
|
||||||
|
BEESCOUNT(scan_hash_insert);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visualize
|
// Visualize
|
||||||
if (bar != string(block_count, '.')) {
|
if (bar != string(block_count, '.')) {
|
||||||
BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
BEESLOGINFO(
|
||||||
|
(force_insert ? "skip" : "scan") << ": "
|
||||||
|
<< pretty(e.size()) << " "
|
||||||
|
<< dedupe_list.size() << "d" << copy_list.size() << "c"
|
||||||
|
<< ((bytes_zeroed + BLOCK_SIZE_SUMS - 1) / BLOCK_SIZE_SUMS) << "p"
|
||||||
|
<< (extent_compressed ? "z {" : " {")
|
||||||
|
<< to_hex(e.bytenr()) << "+" << to_hex(e.offset()) << "} "
|
||||||
|
<< to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end())
|
||||||
|
<< ' ' << name_fd(bfr.fd())
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Costs 10% on benchmarks
|
// Put this extent into the recently seen list if we didn't rewrite it,
|
||||||
|
// and remove it if we did.
|
||||||
|
lock_seen.lock();
|
||||||
|
if (extent_modified) {
|
||||||
|
s_seen.erase(tup);
|
||||||
|
BEESCOUNT(scan_seen_erase);
|
||||||
|
} else {
|
||||||
|
// BEESLOGDEBUG("Seen " << tup << " " << e);
|
||||||
|
s_seen.insert(tup);
|
||||||
|
BEESCOUNT(scan_seen_insert);
|
||||||
|
}
|
||||||
|
lock_seen.unlock();
|
||||||
|
|
||||||
|
// Now causes 75% loss of performance in benchmarks
|
||||||
// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||||
return bfr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<Exclusion>
|
shared_ptr<Exclusion>
|
||||||
@@ -703,14 +891,14 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
|||||||
// No FD? Well, that was quick.
|
// No FD? Well, that was quick.
|
||||||
if (!bfr.fd()) {
|
if (!bfr.fd()) {
|
||||||
// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
|
// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
|
||||||
BEESCOUNT(scan_no_fd);
|
BEESCOUNT(scanf_no_fd);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sanity check
|
// Sanity check
|
||||||
if (bfr.begin() >= bfr.file_size()) {
|
if (bfr.begin() >= bfr.file_size()) {
|
||||||
BEESLOGWARN("past EOF: " << bfr);
|
BEESLOGWARN("past EOF: " << bfr);
|
||||||
BEESCOUNT(scan_eof);
|
BEESCOUNT(scanf_eof);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -730,9 +918,11 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
|||||||
// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
|
// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
|
||||||
BEESCOUNT(scanf_deferred_extent);
|
BEESCOUNT(scanf_deferred_extent);
|
||||||
start_over = true;
|
start_over = true;
|
||||||
|
return; // from closure
|
||||||
}
|
}
|
||||||
Timer one_extent_timer;
|
Timer one_extent_timer;
|
||||||
scan_one_extent(bfr, e);
|
scan_one_extent(bfr, e);
|
||||||
|
// BEESLOGDEBUG("Scanned " << e << " " << bfr);
|
||||||
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
|
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
|
||||||
BEESCOUNT(scanf_extent);
|
BEESCOUNT(scanf_extent);
|
||||||
});
|
});
|
||||||
@@ -925,7 +1115,8 @@ BeesContext::start()
|
|||||||
return make_shared<BeesTempFile>(shared_from_this());
|
return make_shared<BeesTempFile>(shared_from_this());
|
||||||
});
|
});
|
||||||
m_logical_ino_pool.generator([]() {
|
m_logical_ino_pool.generator([]() {
|
||||||
return make_shared<BtrfsIoctlLogicalInoArgs>(0);
|
const auto extent_ref_size = sizeof(uint64_t) * 3;
|
||||||
|
return make_shared<BtrfsIoctlLogicalInoArgs>(0, BEES_MAX_EXTENT_REF_COUNT * extent_ref_size + sizeof(btrfs_data_container));
|
||||||
});
|
});
|
||||||
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
|
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
|
||||||
catch_all([&](){
|
catch_all([&](){
|
||||||
|
@@ -356,6 +356,8 @@ BeesHashTable::prefetch_loop()
|
|||||||
auto avg_rates = thisStats / m_ctx->total_timer().age();
|
auto avg_rates = thisStats / m_ctx->total_timer().age();
|
||||||
graph_blob << "\t" << avg_rates << "\n";
|
graph_blob << "\t" << avg_rates << "\n";
|
||||||
|
|
||||||
|
graph_blob << m_ctx->get_progress();
|
||||||
|
|
||||||
BEESLOGINFO(graph_blob.str());
|
BEESLOGINFO(graph_blob.str());
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
m_stats_file.write(graph_blob.str());
|
m_stats_file.write(graph_blob.str());
|
||||||
@@ -446,10 +448,38 @@ BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index)
|
|||||||
|
|
||||||
// If we are in prefetch, give the kernel a hint about the next extent
|
// If we are in prefetch, give the kernel a hint about the next extent
|
||||||
if (m_prefetch_running) {
|
if (m_prefetch_running) {
|
||||||
// XXX: don't call this if bees_readahead is implemented by pread()
|
// Use the kernel readahead here, because it might work for this use case
|
||||||
bees_readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size);
|
readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Cell *cell = m_extent_ptr[extent_index ].p_buckets[0].p_cells;
|
||||||
|
Cell *cell_end = m_extent_ptr[extent_index + 1].p_buckets[0].p_cells;
|
||||||
|
size_t toxic_cleared_count = 0;
|
||||||
|
set<BeesHashTable::Cell> seen_it(cell, cell_end);
|
||||||
|
while (cell < cell_end) {
|
||||||
|
if (cell->e_addr & BeesAddress::c_toxic_mask) {
|
||||||
|
++toxic_cleared_count;
|
||||||
|
cell->e_addr &= ~BeesAddress::c_toxic_mask;
|
||||||
|
// Clearing the toxic bit might mean we now have a duplicate.
|
||||||
|
// This could be due to a race between two
|
||||||
|
// inserts, one finds the extent toxic while the
|
||||||
|
// other does not. That's arguably a bug elsewhere,
|
||||||
|
// but we should rewrite the whole extent lookup/insert
|
||||||
|
// loop, not spend time fixing code that will be
|
||||||
|
// thrown out later anyway.
|
||||||
|
// If there is a cell that is identical to this one
|
||||||
|
// except for the toxic bit, then we don't need this one.
|
||||||
|
if (seen_it.count(*cell)) {
|
||||||
|
cell->e_addr = 0;
|
||||||
|
cell->e_hash = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++cell;
|
||||||
|
}
|
||||||
|
if (toxic_cleared_count) {
|
||||||
|
BEESLOGDEBUG("Cleared " << toxic_cleared_count << " hashes while fetching hash table extent " << extent_index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -384,7 +384,7 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
|
|||||||
return stop_now;
|
return stop_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesRangePair
|
||||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||||
{
|
{
|
||||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
||||||
@@ -400,6 +400,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
BEESTRACE("overlap_bfr " << overlap_bfr);
|
BEESTRACE("overlap_bfr " << overlap_bfr);
|
||||||
|
|
||||||
BeesBlockData bbd(dst_bfr);
|
BeesBlockData bbd(dst_bfr);
|
||||||
|
BeesRangePair rv = { BeesFileRange(), BeesFileRange() };
|
||||||
|
|
||||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
||||||
// Open src
|
// Open src
|
||||||
@@ -436,21 +437,12 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
|||||||
BEESCOUNT(replacedst_grown);
|
BEESCOUNT(replacedst_grown);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dedup
|
rv = brp;
|
||||||
BEESNOTE("dedup " << brp);
|
m_found_dup = true;
|
||||||
if (m_ctx->dedup(brp)) {
|
return true;
|
||||||
BEESCOUNT(replacedst_dedup_hit);
|
|
||||||
m_found_dup = true;
|
|
||||||
overlap_bfr = brp.second;
|
|
||||||
// FIXME: find best range first, then dedupe that
|
|
||||||
return true; // i.e. break
|
|
||||||
} else {
|
|
||||||
BEESCOUNT(replacedst_dedup_miss);
|
|
||||||
return false; // i.e. continue
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
||||||
return overlap_bfr.copy_closed();
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesFileRange
|
BeesFileRange
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -183,6 +183,24 @@ BeesFileRange::grow_begin(off_t delta)
|
|||||||
return m_begin;
|
return m_begin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
off_t
|
||||||
|
BeesFileRange::shrink_begin(off_t delta)
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, delta, delta > 0);
|
||||||
|
THROW_CHECK3(invalid_argument, delta, m_begin, m_end, delta + m_begin < m_end);
|
||||||
|
m_begin += delta;
|
||||||
|
return m_begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t
|
||||||
|
BeesFileRange::shrink_end(off_t delta)
|
||||||
|
{
|
||||||
|
THROW_CHECK1(invalid_argument, delta, delta > 0);
|
||||||
|
THROW_CHECK2(invalid_argument, delta, m_end, m_end >= delta);
|
||||||
|
m_end -= delta;
|
||||||
|
return m_end;
|
||||||
|
}
|
||||||
|
|
||||||
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
|
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
|
||||||
m_fd(bbd.fd()),
|
m_fd(bbd.fd()),
|
||||||
m_begin(bbd.begin()),
|
m_begin(bbd.begin()),
|
||||||
@@ -349,8 +367,8 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
BEESTRACE("e_second " << e_second);
|
BEESTRACE("e_second " << e_second);
|
||||||
|
|
||||||
// Preread entire extent
|
// Preread entire extent
|
||||||
bees_readahead(second.fd(), e_second.begin(), e_second.size());
|
bees_readahead_pair(second.fd(), e_second.begin(), e_second.size(),
|
||||||
bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
||||||
|
|
||||||
auto hash_table = ctx->hash_table();
|
auto hash_table = ctx->hash_table();
|
||||||
|
|
||||||
@@ -388,17 +406,6 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Source extent cannot be toxic
|
|
||||||
BeesAddress first_addr(first.fd(), new_first.begin());
|
|
||||||
if (!first_addr.is_magic()) {
|
|
||||||
auto first_resolved = ctx->resolve_addr(first_addr);
|
|
||||||
if (first_resolved.is_toxic()) {
|
|
||||||
BEESLOGWARN("WORKAROUND: not growing matching pair backward because src addr is toxic:\n" << *this);
|
|
||||||
BEESCOUNT(pairbackward_toxic_addr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extend second range. If we hit BOF we can go no further.
|
// Extend second range. If we hit BOF we can go no further.
|
||||||
BeesFileRange new_second = second;
|
BeesFileRange new_second = second;
|
||||||
BEESTRACE("new_second = " << new_second);
|
BEESTRACE("new_second = " << new_second);
|
||||||
@@ -434,6 +441,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Source block cannot be zero in a non-compressed non-magic extent
|
// Source block cannot be zero in a non-compressed non-magic extent
|
||||||
|
BeesAddress first_addr(first.fd(), new_first.begin());
|
||||||
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
||||||
BEESCOUNT(pairbackward_zero);
|
BEESCOUNT(pairbackward_zero);
|
||||||
break;
|
break;
|
||||||
@@ -491,17 +499,6 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Source extent cannot be toxic
|
|
||||||
BeesAddress first_addr(first.fd(), new_first.begin());
|
|
||||||
if (!first_addr.is_magic()) {
|
|
||||||
auto first_resolved = ctx->resolve_addr(first_addr);
|
|
||||||
if (first_resolved.is_toxic()) {
|
|
||||||
BEESLOGWARN("WORKAROUND: not growing matching pair forward because src is toxic:\n" << *this);
|
|
||||||
BEESCOUNT(pairforward_toxic);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extend second range. If we hit EOF we can go no further.
|
// Extend second range. If we hit EOF we can go no further.
|
||||||
BeesFileRange new_second = second;
|
BeesFileRange new_second = second;
|
||||||
BEESTRACE("new_second = " << new_second);
|
BEESTRACE("new_second = " << new_second);
|
||||||
@@ -545,6 +542,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Source block cannot be zero in a non-compressed non-magic extent
|
// Source block cannot be zero in a non-compressed non-magic extent
|
||||||
|
BeesAddress first_addr(first.fd(), new_first.begin());
|
||||||
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
|
||||||
BEESCOUNT(pairforward_zero);
|
BEESCOUNT(pairforward_zero);
|
||||||
break;
|
break;
|
||||||
@@ -589,6 +587,22 @@ BeesRangePair::copy_closed() const
|
|||||||
return BeesRangePair(first.copy_closed(), second.copy_closed());
|
return BeesRangePair(first.copy_closed(), second.copy_closed());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesRangePair::shrink_begin(off_t const delta)
|
||||||
|
{
|
||||||
|
first.shrink_begin(delta);
|
||||||
|
second.shrink_begin(delta);
|
||||||
|
THROW_CHECK2(runtime_error, first.size(), second.size(), first.size() == second.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesRangePair::shrink_end(off_t const delta)
|
||||||
|
{
|
||||||
|
first.shrink_end(delta);
|
||||||
|
second.shrink_end(delta);
|
||||||
|
THROW_CHECK2(runtime_error, first.size(), second.size(), first.size() == second.size());
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const BeesAddress &ba)
|
operator<<(ostream &os, const BeesAddress &ba)
|
||||||
{
|
{
|
||||||
|
@@ -14,7 +14,7 @@ Load management options:
|
|||||||
-g, --loadavg-target Target load average for worker threads (default none)
|
-g, --loadavg-target Target load average for worker threads (default none)
|
||||||
|
|
||||||
Filesystem tree traversal options:
|
Filesystem tree traversal options:
|
||||||
-m, --scan-mode Scanning mode (0..2, default 0)
|
-m, --scan-mode Scanning mode (0..4, default 4)
|
||||||
|
|
||||||
Workarounds:
|
Workarounds:
|
||||||
-a, --workaround-btrfs-send Workaround for btrfs send
|
-a, --workaround-btrfs-send Workaround for btrfs send
|
||||||
|
67
src/bees.cc
67
src/bees.cc
@@ -214,9 +214,35 @@ BeesTooLong::operator=(const func_type &f)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static
|
||||||
bees_readahead(int const fd, const off_t offset, const size_t size)
|
bool
|
||||||
|
bees_readahead_check(int const fd, off_t const offset, size_t const size)
|
||||||
{
|
{
|
||||||
|
// FIXME: the rest of the code calls this function more often than necessary,
|
||||||
|
// usually back-to-back calls on the same range in a loop.
|
||||||
|
// Simply discard requests that are identical to recent requests from the same thread.
|
||||||
|
const Stat stat_rv(fd);
|
||||||
|
auto tup = make_tuple(offset, size, stat_rv.st_dev, stat_rv.st_ino);
|
||||||
|
static mutex s_recent_mutex;
|
||||||
|
static set<decltype(tup)> s_recent;
|
||||||
|
unique_lock<mutex> lock(s_recent_mutex);
|
||||||
|
if (s_recent.size() > BEES_MAX_EXTENT_REF_COUNT) {
|
||||||
|
s_recent.clear();
|
||||||
|
BEESCOUNT(readahead_clear);
|
||||||
|
}
|
||||||
|
const auto rv = s_recent.insert(tup);
|
||||||
|
// If we recently did this readahead, we're done here
|
||||||
|
if (!rv.second) {
|
||||||
|
BEESCOUNT(readahead_skip);
|
||||||
|
}
|
||||||
|
return rv.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void
|
||||||
|
bees_readahead_nolock(int const fd, const off_t offset, const size_t size)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, size, offset)) return;
|
||||||
Timer readahead_timer;
|
Timer readahead_timer;
|
||||||
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
@@ -225,10 +251,8 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
|
|||||||
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
||||||
#else
|
#else
|
||||||
// Make sure this data is in page cache by brute force
|
// Make sure this data is in page cache by brute force
|
||||||
// This isn't necessary and it might even be slower,
|
// The btrfs kernel code does readahead with lower ioprio
|
||||||
// but the btrfs kernel code does readahead with lower ioprio
|
// and might discard the readahead request entirely.
|
||||||
// and might discard the readahead request entirely,
|
|
||||||
// so it's maybe, *maybe*, worth doing both.
|
|
||||||
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
auto working_size = size;
|
auto working_size = size;
|
||||||
auto working_offset = offset;
|
auto working_offset = offset;
|
||||||
@@ -249,6 +273,28 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
|
|||||||
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static mutex s_only_one;
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, size, offset) && !bees_readahead_check(fd2, offset2, size2)) return;
|
||||||
|
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size) << ","
|
||||||
|
<< "\n\t" << name_fd(fd2) << " offset " << to_hex(offset2) << " len " << pretty(size2));
|
||||||
|
unique_lock<mutex> m_lock(s_only_one);
|
||||||
|
bees_readahead_nolock(fd, offset, size);
|
||||||
|
bees_readahead_nolock(fd2, offset2, size2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||||
|
{
|
||||||
|
if (!bees_readahead_check(fd, size, offset)) return;
|
||||||
|
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
|
unique_lock<mutex> m_lock(s_only_one);
|
||||||
|
bees_readahead_nolock(fd, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bees_unreadahead(int const fd, off_t offset, size_t size)
|
bees_unreadahead(int const fd, off_t offset, size_t size)
|
||||||
{
|
{
|
||||||
@@ -603,7 +649,7 @@ bees_main(int argc, char *argv[])
|
|||||||
unsigned thread_min = 0;
|
unsigned thread_min = 0;
|
||||||
double load_target = 0;
|
double load_target = 0;
|
||||||
bool workaround_btrfs_send = false;
|
bool workaround_btrfs_send = false;
|
||||||
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_INDEPENDENT;
|
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_EXTENT;
|
||||||
|
|
||||||
// Configure getopt_long
|
// Configure getopt_long
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
@@ -751,6 +797,13 @@ bees_main(int argc, char *argv[])
|
|||||||
// Set root scan mode
|
// Set root scan mode
|
||||||
bc->roots()->set_scan_mode(root_scan_mode);
|
bc->roots()->set_scan_mode(root_scan_mode);
|
||||||
|
|
||||||
|
if (root_scan_mode == BeesRoots::SCAN_MODE_EXTENT) {
|
||||||
|
MultiLocker::enable_locking(false);
|
||||||
|
} else {
|
||||||
|
// Workaround for a kernel bug that the subvol-based crawlers keep triggering
|
||||||
|
MultiLocker::enable_locking(true);
|
||||||
|
}
|
||||||
|
|
||||||
// Start crawlers
|
// Start crawlers
|
||||||
bc->start();
|
bc->start();
|
||||||
|
|
||||||
|
48
src/bees.h
48
src/bees.h
@@ -78,13 +78,13 @@ const int BEES_PROGRESS_INTERVAL = BEES_STATS_INTERVAL;
|
|||||||
const int BEES_STATUS_INTERVAL = 1;
|
const int BEES_STATUS_INTERVAL = 1;
|
||||||
|
|
||||||
// Number of file FDs to cache when not in active use
|
// Number of file FDs to cache when not in active use
|
||||||
const size_t BEES_FILE_FD_CACHE_SIZE = 4096;
|
const size_t BEES_FILE_FD_CACHE_SIZE = 32768;
|
||||||
|
|
||||||
// Number of root FDs to cache when not in active use
|
// Number of root FDs to cache when not in active use
|
||||||
const size_t BEES_ROOT_FD_CACHE_SIZE = 1024;
|
const size_t BEES_ROOT_FD_CACHE_SIZE = 4096;
|
||||||
|
|
||||||
// Number of FDs to open (rlimit)
|
// Number of FDs to open (rlimit)
|
||||||
const size_t BEES_OPEN_FILE_LIMIT = (BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACHE_SIZE) * 2 + 100;
|
const size_t BEES_OPEN_FILE_LIMIT = BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACHE_SIZE + 100;
|
||||||
|
|
||||||
// Worker thread factor (multiplied by detected number of CPU cores)
|
// Worker thread factor (multiplied by detected number of CPU cores)
|
||||||
const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
||||||
@@ -93,10 +93,11 @@ const double BEES_DEFAULT_THREAD_FACTOR = 1.0;
|
|||||||
const double BEES_TOO_LONG = 5.0;
|
const double BEES_TOO_LONG = 5.0;
|
||||||
|
|
||||||
// Avoid any extent where LOGICAL_INO takes this much kernel CPU time
|
// Avoid any extent where LOGICAL_INO takes this much kernel CPU time
|
||||||
const double BEES_TOXIC_SYS_DURATION = 0.1;
|
const double BEES_TOXIC_SYS_DURATION = 5.0;
|
||||||
|
|
||||||
// Maximum number of refs to a single extent
|
// Maximum number of refs to a single extent before we have other problems
|
||||||
const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1;
|
// If we have more than 10K refs to an extent, adding another will save 0.01% space
|
||||||
|
const size_t BEES_MAX_EXTENT_REF_COUNT = 9999; // (16 * 1024 * 1024 / 24);
|
||||||
|
|
||||||
// How long between hash table histograms
|
// How long between hash table histograms
|
||||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
||||||
@@ -123,7 +124,7 @@ const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
|
|||||||
#define BEESLOG(lv,x) do { if (lv < bees_log_level) { Chatter __chatter(lv, BeesNote::get_name()); __chatter << x; } } while (0)
|
#define BEESLOG(lv,x) do { if (lv < bees_log_level) { Chatter __chatter(lv, BeesNote::get_name()); __chatter << x; } } while (0)
|
||||||
#define BEESLOGTRACE(x) do { BEESLOG(LOG_DEBUG, x); BeesTracer::trace_now(); } while (0)
|
#define BEESLOGTRACE(x) do { BEESLOG(LOG_DEBUG, x); BeesTracer::trace_now(); } while (0)
|
||||||
|
|
||||||
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(LOG_ERR, x); })
|
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(LOG_ERR, x << " at " << __FILE__ << ":" << __LINE__); })
|
||||||
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||||
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||||
|
|
||||||
@@ -299,6 +300,11 @@ public:
|
|||||||
off_t grow_begin(off_t delta);
|
off_t grow_begin(off_t delta);
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
|
/// @{ Make range smaller
|
||||||
|
off_t shrink_end(off_t delta);
|
||||||
|
off_t shrink_begin(off_t delta);
|
||||||
|
/// @}
|
||||||
|
|
||||||
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -515,7 +521,7 @@ class BeesCrawl {
|
|||||||
|
|
||||||
bool fetch_extents();
|
bool fetch_extents();
|
||||||
void fetch_extents_harder();
|
void fetch_extents_harder();
|
||||||
bool next_transid();
|
bool restart_crawl();
|
||||||
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -527,6 +533,8 @@ public:
|
|||||||
BeesCrawlState get_state_end() const;
|
BeesCrawlState get_state_end() const;
|
||||||
void set_state(const BeesCrawlState &bcs);
|
void set_state(const BeesCrawlState &bcs);
|
||||||
void deferred(bool def_setting);
|
void deferred(bool def_setting);
|
||||||
|
bool deferred() const;
|
||||||
|
bool finished() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class BeesScanMode;
|
class BeesScanMode;
|
||||||
@@ -555,16 +563,12 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
|||||||
bool m_stop_requested = false;
|
bool m_stop_requested = false;
|
||||||
|
|
||||||
void insert_new_crawl();
|
void insert_new_crawl();
|
||||||
void insert_root(const BeesCrawlState &bcs);
|
|
||||||
Fd open_root_nocache(uint64_t root);
|
Fd open_root_nocache(uint64_t root);
|
||||||
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
||||||
uint64_t transid_min();
|
|
||||||
uint64_t transid_max();
|
|
||||||
uint64_t transid_max_nocache();
|
uint64_t transid_max_nocache();
|
||||||
void state_load();
|
void state_load();
|
||||||
ostream &state_to_stream(ostream &os);
|
ostream &state_to_stream(ostream &os);
|
||||||
void state_save();
|
void state_save();
|
||||||
bool crawl_roots();
|
|
||||||
string crawl_state_filename() const;
|
string crawl_state_filename() const;
|
||||||
void crawl_state_set_dirty();
|
void crawl_state_set_dirty();
|
||||||
void crawl_state_erase(const BeesCrawlState &bcs);
|
void crawl_state_erase(const BeesCrawlState &bcs);
|
||||||
@@ -576,6 +580,9 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
|||||||
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||||
void clear_caches();
|
void clear_caches();
|
||||||
|
|
||||||
|
friend class BeesScanModeExtent;
|
||||||
|
shared_ptr<BeesCrawl> insert_root(const BeesCrawlState &bcs);
|
||||||
|
|
||||||
friend class BeesCrawl;
|
friend class BeesCrawl;
|
||||||
friend class BeesFdCache;
|
friend class BeesFdCache;
|
||||||
friend class BeesScanMode;
|
friend class BeesScanMode;
|
||||||
@@ -594,17 +601,20 @@ public:
|
|||||||
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
||||||
bool is_root_ro(uint64_t root);
|
bool is_root_ro(uint64_t root);
|
||||||
|
|
||||||
// TODO: do extent-tree scans instead
|
|
||||||
enum ScanMode {
|
enum ScanMode {
|
||||||
SCAN_MODE_LOCKSTEP,
|
SCAN_MODE_LOCKSTEP,
|
||||||
SCAN_MODE_INDEPENDENT,
|
SCAN_MODE_INDEPENDENT,
|
||||||
SCAN_MODE_SEQUENTIAL,
|
SCAN_MODE_SEQUENTIAL,
|
||||||
SCAN_MODE_RECENT,
|
SCAN_MODE_RECENT,
|
||||||
|
SCAN_MODE_EXTENT,
|
||||||
SCAN_MODE_COUNT, // must be last
|
SCAN_MODE_COUNT, // must be last
|
||||||
};
|
};
|
||||||
|
|
||||||
void set_scan_mode(ScanMode new_mode);
|
void set_scan_mode(ScanMode new_mode);
|
||||||
void set_workaround_btrfs_send(bool do_avoid);
|
void set_workaround_btrfs_send(bool do_avoid);
|
||||||
|
|
||||||
|
uint64_t transid_min();
|
||||||
|
uint64_t transid_max();
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BeesHash {
|
struct BeesHash {
|
||||||
@@ -664,6 +674,8 @@ class BeesRangePair : public pair<BeesFileRange, BeesFileRange> {
|
|||||||
public:
|
public:
|
||||||
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
|
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
|
||||||
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
|
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
|
||||||
|
void shrink_begin(const off_t delta);
|
||||||
|
void shrink_end(const off_t delta);
|
||||||
BeesRangePair copy_closed() const;
|
BeesRangePair copy_closed() const;
|
||||||
bool operator<(const BeesRangePair &that) const;
|
bool operator<(const BeesRangePair &that) const;
|
||||||
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
||||||
@@ -737,11 +749,14 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
|||||||
shared_ptr<BeesThread> m_progress_thread;
|
shared_ptr<BeesThread> m_progress_thread;
|
||||||
shared_ptr<BeesThread> m_status_thread;
|
shared_ptr<BeesThread> m_status_thread;
|
||||||
|
|
||||||
|
mutex m_progress_mtx;
|
||||||
|
string m_progress_str;
|
||||||
|
|
||||||
void set_root_fd(Fd fd);
|
void set_root_fd(Fd fd);
|
||||||
|
|
||||||
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
||||||
|
|
||||||
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
void scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||||
void rewrite_file_range(const BeesFileRange &bfr);
|
void rewrite_file_range(const BeesFileRange &bfr);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -772,6 +787,8 @@ public:
|
|||||||
|
|
||||||
void dump_status();
|
void dump_status();
|
||||||
void show_progress();
|
void show_progress();
|
||||||
|
void set_progress(const string &str);
|
||||||
|
string get_progress();
|
||||||
|
|
||||||
void start();
|
void start();
|
||||||
void stop();
|
void stop();
|
||||||
@@ -834,7 +851,7 @@ public:
|
|||||||
BeesFileRange find_one_match(BeesHash hash);
|
BeesFileRange find_one_match(BeesHash hash);
|
||||||
|
|
||||||
void replace_src(const BeesFileRange &src_bfr);
|
void replace_src(const BeesFileRange &src_bfr);
|
||||||
BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
|
BeesRangePair replace_dst(const BeesFileRange &dst_bfr);
|
||||||
|
|
||||||
bool found_addr() const { return m_found_addr; }
|
bool found_addr() const { return m_found_addr; }
|
||||||
bool found_data() const { return m_found_data; }
|
bool found_data() const { return m_found_data; }
|
||||||
@@ -868,6 +885,7 @@ extern const char *BEES_VERSION;
|
|||||||
extern thread_local default_random_engine bees_generator;
|
extern thread_local default_random_engine bees_generator;
|
||||||
string pretty(double d);
|
string pretty(double d);
|
||||||
void bees_readahead(int fd, off_t offset, size_t size);
|
void bees_readahead(int fd, off_t offset, size_t size);
|
||||||
|
void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2);
|
||||||
void bees_unreadahead(int fd, off_t offset, size_t size);
|
void bees_unreadahead(int fd, off_t offset, size_t size);
|
||||||
string format_time(time_t t);
|
string format_time(time_t t);
|
||||||
|
|
||||||
|
@@ -8,6 +8,7 @@ PROGRAMS = \
|
|||||||
process \
|
process \
|
||||||
progress \
|
progress \
|
||||||
seeker \
|
seeker \
|
||||||
|
table \
|
||||||
task \
|
task \
|
||||||
|
|
||||||
all: test
|
all: test
|
||||||
|
63
test/table.cc
Normal file
63
test/table.cc
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
#include "tests.h"
|
||||||
|
|
||||||
|
#include "crucible/table.h"
|
||||||
|
|
||||||
|
using namespace crucible;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void
|
||||||
|
print_table(const Table::Table& t)
|
||||||
|
{
|
||||||
|
cerr << "BEGIN TABLE\n";
|
||||||
|
cerr << t;
|
||||||
|
cerr << "END TABLE\n";
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_table()
|
||||||
|
{
|
||||||
|
Table::Table t;
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Text("Hello, World!"),
|
||||||
|
Table::Text("2"),
|
||||||
|
Table::Text("3"),
|
||||||
|
Table::Text("4"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Text("Greeting"),
|
||||||
|
Table::Text("two"),
|
||||||
|
Table::Text("three"),
|
||||||
|
Table::Text("four"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.insert_row(Table::endpos, vector<Table::Content> {
|
||||||
|
Table::Fill('-'),
|
||||||
|
Table::Text("ii"),
|
||||||
|
Table::Text("iii"),
|
||||||
|
Table::Text("iv"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.mid(" | ");
|
||||||
|
t.left("| ");
|
||||||
|
t.right(" |");
|
||||||
|
print_table(t);
|
||||||
|
t.insert_col(1, vector<Table::Content> {
|
||||||
|
Table::Text("1"),
|
||||||
|
Table::Text("one"),
|
||||||
|
Table::Text("i"),
|
||||||
|
Table::Text("I"),
|
||||||
|
});
|
||||||
|
print_table(t);
|
||||||
|
t.at(2, 1) = Table::Text("Two\nLines");
|
||||||
|
print_table(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int, char**)
|
||||||
|
{
|
||||||
|
RUN_A_TEST(test_table());
|
||||||
|
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
Reference in New Issue
Block a user