mirror of
https://github.com/Zygo/bees.git
synced 2025-08-02 05:43:29 +02:00
Compare commits
152 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
849c071146 | ||
|
85ff543695 | ||
|
8147f80a5a | ||
|
cbde237f79 | ||
|
3b85fc8bc7 | ||
|
4df1b2c834 | ||
|
495218104a | ||
|
e82ce3c06e | ||
|
bd336e81a6 | ||
|
ea17c89165 | ||
|
ccd8dcd43f | ||
|
facf4121a6 | ||
|
cbc76a7457 | ||
|
28ee2ae1a8 | ||
|
d27621b779 | ||
|
cb2c20ccc9 | ||
|
ded5bf0148 | ||
|
d5de012a17 | ||
|
66d1e8a89b | ||
|
c327e0bb10 | ||
|
9587c40677 | ||
|
a115587fad | ||
|
af6ecbc69b | ||
|
563e584da4 | ||
|
c5889049f0 | ||
|
ecaed09128 | ||
|
64dab81e42 | ||
|
cfcdac110b | ||
|
c3b664fea5 | ||
|
66b00f8a97 | ||
|
bbcfd9daa6 | ||
|
d6d3e1045e | ||
|
d5d17cbe62 | ||
|
48dd2a45fe | ||
|
7267707687 | ||
|
984ceeb2a5 | ||
|
03f809bf22 | ||
|
0dca6f74b0 | ||
|
f5c4714a28 | ||
|
de96a38460 | ||
|
82c2b5bafe | ||
|
d725f3c66c | ||
|
84f91af503 | ||
|
31d26bcfc6 | ||
|
e13c62084b | ||
|
7cef1133be | ||
|
f98599407f | ||
|
4d59939b07 | ||
|
24b904f002 | ||
|
23c16aa978 | ||
|
152e69a6d1 | ||
|
148cc03060 | ||
|
b699325a77 | ||
|
a59d89ea81 | ||
|
d1015b683f | ||
|
9cdeb608f5 | ||
|
83a2b010e6 | ||
|
31b2aa3c0d | ||
|
594ad1786d | ||
|
b143664747 | ||
|
a85ada3a49 | ||
|
46a38fe016 | ||
|
2aafa802a9 | ||
|
cdef59e2f3 | ||
|
dc2dc8d08a | ||
|
7873988dac | ||
|
3f740d6b2d | ||
|
c0a7533dd4 | ||
|
090fa39995 | ||
|
2f25f89067 | ||
|
7fdb87143c | ||
|
d345ea2b78 | ||
|
a2e1887c52 | ||
|
4a4a2de89f | ||
|
cc87125e41 | ||
|
be9321cdb3 | ||
|
a9c81e5531 | ||
|
942800ad00 | ||
|
21c08008e6 | ||
|
30ece57116 | ||
|
6556566f54 | ||
|
ece58cc910 | ||
|
331cb142e3 | ||
|
5953ea6d3c | ||
|
07a4c9e8c0 | ||
|
8f6f8e4ac2 | ||
|
972721016b | ||
|
5040303f50 | ||
|
3654738f56 | ||
|
be3c54e14c | ||
|
2751905f1d | ||
|
587588d53f | ||
|
14ce81c081 | ||
|
dffd6e0b13 | ||
|
a32cd5247f | ||
|
9c68f15474 | ||
|
5f3cb9b374 | ||
|
a52062822a | ||
|
fbf6b395c8 | ||
|
26acc6adfd | ||
|
01734e6d4b | ||
|
84094c7cb9 | ||
|
a3d2bc26d5 | ||
|
d0c35b4734 | ||
|
a83c68eb18 | ||
|
6d6686eb5b | ||
|
007067b83f | ||
|
bb5160987e | ||
|
feed04c944 | ||
|
670fce5be5 | ||
|
ff3b5a7a1b | ||
|
13ec4b5165 | ||
|
5d7e815eb4 | ||
|
7f67f55746 | ||
|
0103a04ca0 | ||
|
5e346beb2d | ||
|
85c93c10e6 | ||
|
ba694b4881 | ||
|
73f94750ec | ||
|
5e379b4c48 | ||
|
6325f9ed72 | ||
|
c698fd7211 | ||
|
95347a08bb | ||
|
eb2630dee6 | ||
|
b828f14dd1 | ||
|
ecf110f377 | ||
|
7f7f919d08 | ||
|
11fabd66a8 | ||
|
a60c53a9e1 | ||
|
01cb75ac0e | ||
|
7a8d98f94d | ||
|
fcd847bbf9 | ||
|
e861957632 | ||
|
fb0e676ee8 | ||
|
b2db140666 | ||
|
55dc98e21a | ||
|
14cd6ed033 | ||
|
99709d889f | ||
|
bba6f4f183 | ||
|
daf8a2cde1 | ||
|
ba1f3b93e4 | ||
|
f0eb9b202f | ||
|
2e36dd2d58 | ||
|
2f14a5a9c7 | ||
|
cf4091b352 | ||
|
587870911f | ||
|
d384f3eec0 | ||
|
596f2c7dbf | ||
|
84adbaecf9 | ||
|
12e80658a8 | ||
|
2427dd370e | ||
|
8aa343cecb |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,7 +1,8 @@
|
||||
*.[ao]
|
||||
*.bak
|
||||
*.new
|
||||
*.dep
|
||||
*.new
|
||||
*.tmp
|
||||
*.so*
|
||||
Doxyfile
|
||||
README.html
|
||||
|
@@ -2,6 +2,7 @@ MAKE += PREFIX=$(PREFIX) LIBEXEC_PREFIX=$(LIBEXEC_PREFIX) ETC_PREFIX=$(ETC_PREFI
|
||||
|
||||
define TEMPLATE_COMPILER =
|
||||
sed $< >$@ \
|
||||
-e's#@DESTDIR@#$(DESTDIR)#' \
|
||||
-e's#@PREFIX@#$(PREFIX)#' \
|
||||
-e's#@ETC_PREFIX@#$(ETC_PREFIX)#' \
|
||||
-e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#'
|
||||
|
2
Makefile
2
Makefile
@@ -61,7 +61,7 @@ install_bees: src $(RUN_INSTALL_TESTS)
|
||||
install_scripts: ## Install scipts
|
||||
install_scripts: scripts
|
||||
install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd
|
||||
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)/$(ETC_PREFIX)/bees/beesd.conf.sample
|
||||
install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample
|
||||
ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),)
|
||||
install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service
|
||||
endif
|
||||
|
@@ -17,7 +17,7 @@ Strengths
|
||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||
* Daemon incrementally dedupes new data using btrfs tree search
|
||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](docs/options.md)
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](docs/options.md)
|
||||
* Works around btrfs filesystem structure to free more disk space
|
||||
* Persistent hash table for rapid restart after shutdown
|
||||
* Whole-filesystem dedupe - including snapshots
|
||||
@@ -70,6 +70,6 @@ You can also use Github:
|
||||
Copyright & License
|
||||
-------------------
|
||||
|
||||
Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.
|
||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
||||
|
||||
GPL (version 3 or later).
|
||||
|
@@ -9,7 +9,7 @@ This issue is fixed in kernel 5.4.14 and later.
|
||||
|
||||
**Recommended kernel versions for bees are 4.19, 5.4, 5.10, 5.11, or 5.12,
|
||||
with recent LTS and -stable updates.** The latest released kernel as
|
||||
of this writing is 5.12.3.
|
||||
of this writing is 5.18.18.
|
||||
|
||||
4.14, 4.9, and 4.4 LTS kernels with recent updates are OK with
|
||||
some issues. Older kernels will be slower (a little slower or a lot
|
||||
@@ -31,7 +31,7 @@ In some future bees release, this API version may become mandatory.
|
||||
Kernel Bug Tracking Table
|
||||
-------------------------
|
||||
|
||||
These bugs are particularly popular among bees users:
|
||||
These bugs are particularly popular among bees users, though not all are specifically relevant to bees:
|
||||
|
||||
| First bad kernel | Last bad kernel | Issue Description | Fixed Kernel Versions | Fix Commit
|
||||
| :---: | :---: | --- | :---: | ---
|
||||
@@ -61,7 +61,11 @@ These bugs are particularly popular among bees users:
|
||||
| 5.4 | 5.11 | spurious tree checker failures on extent ref hash | 5.11.5, 5.12 and later | 1119a72e223f btrfs: tree-checker: do not error out if extent ref hash doesn't match
|
||||
| - | 5.11 | tree mod log issue #5 | 4.4.263, 4.9.263, 4.14.227, 4.19.183, 5.4.108, 5.10.26, 5.11.9, 5.12 and later | dbcc7d57bffc btrfs: fix race when cloning extent buffer during rewind of an old root
|
||||
| - | 5.12 | tree mod log issue #6 | 4.14.233, 4.19.191, 5.4.118, 5.10.36, 5.11.20, 5.12.3, 5.13 and later | f9690f426b21 btrfs: fix race when picking most recent mod log operation for an old root
|
||||
| 4.15 | - | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | - | workaround: comment out the `WARN_ON`
|
||||
| 4.15 | 5.16 | spurious warnings from `fs/fs-writeback.c` when `flushoncommit` is enabled | 5.15.27, 5.16.13, 5.17 and later | a0f0cf8341e3 btrfs: get rid of warning on transaction commit when using flushoncommit
|
||||
| - | 5.17 | crash during device removal can make filesystem unmountable | 5.15.54, 5.16.20, 5.17.3, 5.18 and later | bbac58698a55 btrfs: remove device item and update super block in the same transaction
|
||||
| - | 5.18 | wrong superblock num_devices makes filesystem unmountable | 4.14.283, 4.19.247, 5.4.198, 5.10.121, 5.15.46, 5.17.14, 5.18.3, 5.19 and later | d201238ccd2f btrfs: repair super block num_devices automatically
|
||||
| 5.18 | 5.19 | parent transid verify failed during log tree replay after a crash during a rename operation | 5.18.18, 5.19.2, 6.0 and later | 723df2bcc9e1 btrfs: join running log transaction when logging new name
|
||||
| 5.4 | - | kernel hang when multiple threads are running `LOGICAL_INO` and dedupe ioctl | - | workaround: reduce bees thread count to 1 with `-c1`
|
||||
|
||||
"Last bad kernel" refers to that version's last stable update from
|
||||
kernel.org. Distro kernels may backport additional fixes. Consult
|
||||
@@ -77,7 +81,7 @@ A "-" for "first bad kernel" indicates the bug has been present since
|
||||
the relevant feature first appeared in btrfs.
|
||||
|
||||
A "-" for "last bad kernel" indicates the bug has not yet been fixed as
|
||||
of 5.8.14.
|
||||
of 5.18.18.
|
||||
|
||||
In cases where issues are fixed by commits spread out over multiple
|
||||
kernel versions, "fixed kernel version" refers to the version that
|
||||
@@ -87,15 +91,10 @@ contains all components of the fix.
|
||||
Workarounds for known kernel bugs
|
||||
---------------------------------
|
||||
|
||||
* **Tree mod log issues**: bees will detect that a btrfs balance is
|
||||
running, and pause bees activity until the balance is done. This avoids
|
||||
running both the `LOGICAL_INO` ioctl and btrfs balance at the same time,
|
||||
which avoids kernel crashes on old kernel versions.
|
||||
|
||||
The numbers for "tree mod log issue #" in the above table are arbitrary.
|
||||
There are a lot of them, and they all behave fairly similarly.
|
||||
|
||||
This workaround is less necessary for kernels 5.4.19 and later.
|
||||
* **Hangs with high worker thread counts**: On kernels newer than
|
||||
5.4, multiple threads running `LOGICAL_INO` and dedupe ioctls
|
||||
at the same time can lead to a kernel hang. The workaround is
|
||||
to reduce the thread count to 1 with `-c1`.
|
||||
|
||||
* **Slow backrefs** (aka toxic extents): Under certain conditions,
|
||||
if the number of references to a single shared extent grows too
|
||||
@@ -128,7 +127,7 @@ Workarounds for known kernel bugs
|
||||
Unfixed kernel bugs
|
||||
-------------------
|
||||
|
||||
As of 5.12.3:
|
||||
As of 5.18.18:
|
||||
|
||||
* **The kernel does not permit `btrfs send` and dedupe to run at the
|
||||
same time**. Recent kernels no longer crash, but now refuse one
|
||||
@@ -151,22 +150,3 @@ As of 5.12.3:
|
||||
still saves some IO.
|
||||
|
||||
`btrfs receive` is not affected by this issue.
|
||||
|
||||
* **Spurious warnings in `fs/fs-writeback.c`** on kernel 4.15 and later
|
||||
when filesystem is mounted with `flushoncommit`. These
|
||||
seem to be harmless (there are other locks which prevent
|
||||
concurrent umount of the filesystem), but the underlying
|
||||
problems that trigger the `WARN_ON` are [not trivial to
|
||||
fix](https://www.spinics.net/lists/linux-btrfs/msg87752.html).
|
||||
|
||||
The warnings can be especially voluminous when bees is running.
|
||||
|
||||
Workarounds:
|
||||
|
||||
1. mount with `-o noflushoncommit`
|
||||
2. patch kernel to remove warning in `fs/fs-writeback.c`.
|
||||
|
||||
Note that using kernels 4.14 and earlier is *not* a viable workaround
|
||||
for this issue, because kernels 4.14 and earlier will eventually
|
||||
deadlock when a filesystem is mounted with `-o flushoncommit` (a single
|
||||
commit fixes one bug and introduces the other).
|
||||
|
@@ -94,38 +94,75 @@ every time a new client machine's data is added to the server.
|
||||
Scanning modes for multiple subvols
|
||||
-----------------------------------
|
||||
|
||||
The `--scan-mode` option affects how bees divides resources between
|
||||
subvolumes. This is particularly relevant when there are snapshots,
|
||||
as there are tradeoffs to be made depending on how snapshots are used
|
||||
on the filesystem.
|
||||
The `--scan-mode` option affects how bees schedules worker threads
|
||||
between subvolumes. Scan modes are an experimental feature and will
|
||||
likely be deprecated in favor of a better solution.
|
||||
|
||||
Note that if a filesystem has only one subvolume (i.e. the root,
|
||||
subvol ID 5) then the `--scan-mode` option has no effect, as there is
|
||||
only one subvolume to scan.
|
||||
Scan mode can be changed at any time by restarting bees with a different
|
||||
mode option. Scan state tracking is the same for all of the currently
|
||||
implemented modes. The difference between the modes is the order in
|
||||
which subvols are selected.
|
||||
|
||||
The default mode is mode 0, "lockstep". In this mode, each inode of each
|
||||
subvol is scanned at the same time, before moving to the next inode in
|
||||
each subvol. This maximizes the likelihood that all of the references to
|
||||
a snapshot of a file are scanned at the same time, which takes advantage
|
||||
of VFS caching in the Linux kernel. If snapshots are created very often,
|
||||
bees will not make very good progress as it constantly restarts the
|
||||
filesystem scan from the beginning each time a new snapshot is created.
|
||||
If a filesystem has only one subvolume with data in it, then the
|
||||
`--scan-mode` option has no effect. In this case, there is only one
|
||||
subvolume to scan, so worker threads will all scan that one.
|
||||
|
||||
Scan mode 1, "independent", simply scans every subvol independently
|
||||
in parallel. Each subvol's scanner shares time equally with all other
|
||||
subvol scanners. Whenever a new subvol appears, a new scanner is
|
||||
created and the new subvol scanner doesn't affect the behavior of any
|
||||
existing subvol scanner.
|
||||
Within a subvol, there is a single optimal scan order: files are scanned
|
||||
in ascending numerical inode order. Each worker will scan a different
|
||||
inode to avoid having the threads contend with each other for locks.
|
||||
File data is read sequentially and in order, but old blocks from earlier
|
||||
scans are skipped.
|
||||
|
||||
Scan mode 2, "sequential", processes each subvol completely before
|
||||
proceeding to the next subvol. This is a good mode when using bees for
|
||||
the first time on a filesystem that already has many existing snapshots
|
||||
and a high rate of new snapshot creation. Short-lived snapshots
|
||||
(e.g. those used for `btrfs send`) are effectively ignored, and bees
|
||||
directs its efforts toward older subvols that are more likely to be
|
||||
origin subvols for snapshots. By deduping origin subvols first, bees
|
||||
ensures that future snapshots will already be deduplicated and do not
|
||||
need to be deduplicated again.
|
||||
Between subvols, there are several scheduling algorithms with different
|
||||
trade-offs:
|
||||
|
||||
Scan mode 0, "lockstep", scans the same inode number in each subvol at
|
||||
close to the same time. This is useful if the subvols are snapshots
|
||||
with a common ancestor, since the same inode number in each subvol will
|
||||
have similar or identical contents. This maximizes the likelihood
|
||||
that all of the references to a snapshot of a file are scanned at
|
||||
close to the same time, improving dedupe hit rate and possibly taking
|
||||
advantage of VFS caching in the Linux kernel. If the subvols are
|
||||
unrelated (i.e. not snapshots of a single subvol) then this mode does
|
||||
not provide significant benefit over random selection. This mode uses
|
||||
smaller amounts of temporary space for shorter periods of time when most
|
||||
subvols are snapshots. When a new snapshot is created, this mode will
|
||||
stop scanning other subvols and scan the new snapshot until the same
|
||||
inode number is reached in each subvol, which will effectively stop
|
||||
dedupe temporarily as this data has already been scanned and deduped
|
||||
in the other snapshots.
|
||||
|
||||
Scan mode 1, "independent", scans the next inode with new data in each
|
||||
subvol. Each subvol's scanner shares inodes uniformly with all other
|
||||
subvol scanners until the subvol has no new inodes left. This mode makes
|
||||
continuous forward progress across the filesystem and provides average
|
||||
performance across a variety of workloads, but is slow to respond to new
|
||||
data, and may spend a lot of time deduping short-lived subvols that will
|
||||
soon be deleted when it is preferable to dedupe long-lived subvols that
|
||||
will be the origin of future snapshots. When a new snapshot is created,
|
||||
previous subvol scans continue as before, but the time is now divided
|
||||
among one more subvol.
|
||||
|
||||
Scan mode 2, "sequential", scans one subvol at a time, in numerical subvol
|
||||
ID order, processing each subvol completely before proceeding to the
|
||||
next subvol. This avoids spending time scanning short-lived snapshots
|
||||
that will be deleted before they can be fully deduped (e.g. those used
|
||||
for `btrfs send`). Scanning is concentrated on older subvols that are
|
||||
more likely to be origin subvols for future snapshots, eliminating the
|
||||
need to dedupe future snapshots separately. This mode uses the largest
|
||||
amount of temporary space for the longest time, and typically requires
|
||||
a larger hash table to maintain dedupe hit rate.
|
||||
|
||||
Scan mode 3, "recent", scans the subvols with the highest `min_transid`
|
||||
value first (i.e. the ones that were most recently completely scanned),
|
||||
then falls back to "independent" mode to break ties. This interrupts
|
||||
long scans of old subvols to give a rapid dedupe response to new data,
|
||||
then returns to the old subvols after the new data is scanned. It is
|
||||
useful for large filesystems with multiple active subvols and rotating
|
||||
snapshots, where the first-pass scan can take months, but new duplicate
|
||||
data appears every day.
|
||||
|
||||
The default scan mode is 1, "independent".
|
||||
|
||||
If you are using bees for the first time on a filesystem with many
|
||||
existing snapshots, you should read about [snapshot gotchas](gotchas.md).
|
||||
|
@@ -67,11 +67,12 @@ The `adjust` event group consists of operations related to translating stored vi
|
||||
* `adjust_exact`: A block address from the hash table corresponding to an uncompressed data block was processed to find its `(root, inode, offset)` references.
|
||||
* `adjust_exact_correct`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches another block bees has already read.
|
||||
* `adjust_exact_wrong`: A block address corresponding to an uncompressed block was retrieved from the hash table and resolved to a physical block containing data that matches the hash but not the data from another block bees has already read (i.e. there was a hash collision).
|
||||
* `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
||||
* `adjust_hit`: A block address was retrieved from the hash table and resolved to a physical block in an uncompressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
||||
* `adjust_miss`: A block address was retrieved from the hash table and resolved to a physical block containing a hash that does not match the hash from another block bees has already read (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
||||
* `adjust_needle_too_long`: A block address was retrieved from the hash table, but when the corresponding extent item was retrieved, its offset or length were out of range to be a match (i.e. the hash table contained a stale entry and the data it referred to has since been overwritten in the filesystem).
|
||||
* `adjust_no_match`: A hash collision occurred (i.e. a block on disk was located with the same hash as the hash table entry but different data) . Effectively an alias for `hash_collision` as it is not possible to have one event without the other.
|
||||
* `adjust_offset_high`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item ends before the desired block in the extent data.
|
||||
* `adjust_offset_hit`: A block address was retrieved from the hash table and resolved to a physical block in a compressed extent containing data that matches the data from another block bees has already read (i.e. a duplicate match was found).
|
||||
* `adjust_offset_low`: The `LOGICAL_INO` ioctl gave an extent item that does not overlap with the desired block because the extent item begins after the desired block in the extent data.
|
||||
* `adjust_try`: A block address and extent item candidate were passed to `BeesResolver::adjust_offset` for processing.
|
||||
|
||||
@@ -117,6 +118,7 @@ crawl
|
||||
|
||||
The `crawl` event group consists of operations related to scanning btrfs trees to find new extent refs to scan for dedupe.
|
||||
|
||||
* `crawl_again`: An inode crawl was restarted because the extent was already locked by another running crawl.
|
||||
* `crawl_blacklisted`: An extent was not scanned because it belongs to a blacklisted file.
|
||||
* `crawl_create`: A new subvol crawler was created.
|
||||
* `crawl_done`: One pass over all subvols on the filesystem was completed.
|
||||
@@ -132,7 +134,6 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
||||
* `crawl_nondata`: An item in the search results is not data.
|
||||
* `crawl_prealloc`: An extent item in the search results refers to a `PREALLOC` extent.
|
||||
* `crawl_push`: An extent item in the search results is suitable for scanning and deduplication.
|
||||
* `crawl_restart`: A subvol crawl was restarted with a new `min_transid..max_transid` range.
|
||||
* `crawl_scan`: An extent item in the search results is submitted to `BeesContext::scan_forward` for scanning and deduplication.
|
||||
* `crawl_search`: A `TREE_SEARCH_V2` ioctl call was successful.
|
||||
* `crawl_unknown`: An extent item in the search results has an unrecognized type.
|
||||
@@ -299,6 +300,7 @@ The `resolve` event group consists of operations related to translating a btrfs
|
||||
* `resolve_large`: The `LOGICAL_INO` ioctl returned more than 2730 results (the limit of the v1 ioctl).
|
||||
* `resolve_ms`: Total time spent in the `LOGICAL_INO` ioctl (i.e. wallclock time, not kernel CPU time).
|
||||
* `resolve_ok`: The `LOGICAL_INO` ioctl returned success.
|
||||
* `resolve_overflow`: The `LOGICAL_INO` ioctl returned more than 655050 extents (the limit of the v2 ioctl).
|
||||
* `resolve_toxic`: The `LOGICAL_INO` ioctl took more than 0.1 seconds of kernel CPU time.
|
||||
|
||||
root
|
||||
@@ -333,6 +335,7 @@ The `scan` event group consists of operations related to scanning incoming data.
|
||||
* `scan_eof`: Scan past EOF was attempted.
|
||||
* `scan_erase_redundant`: Blocks in the hash table were removed because they were removed from the filesystem by dedupe.
|
||||
* `scan_extent`: An extent was scanned (`scan_one_extent`).
|
||||
* `scan_extent_tiny`: An extent below 128K that was not the beginning or end of a file was scanned. No action is currently taken for these--they are merely counted.
|
||||
* `scan_forward`: A logical byte range was scanned (`scan_forward`).
|
||||
* `scan_found`: An entry was found in the hash table matching a scanned block from the filesystem.
|
||||
* `scan_hash_hit`: A block was found on the filesystem corresponding to a block found in the hash table.
|
||||
@@ -360,6 +363,8 @@ scanf
|
||||
|
||||
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
|
||||
|
||||
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
|
||||
* `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
|
||||
* `scanf_extent`: A btrfs extent item was scanned.
|
||||
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
|
||||
* `scanf_total`: A logical byte range of a file was scanned.
|
||||
|
@@ -45,7 +45,7 @@ bees will loop billions of times considering all possibilities. This is
|
||||
a waste of time, so an exception is currently used to break out of such
|
||||
loops early. The exception text in this case is:
|
||||
|
||||
`FIXME: bailing out here, need to fix this further up the call stack`
|
||||
`FIXME: too many duplicate candidates, bailing out here`
|
||||
|
||||
|
||||
Terminating bees with SIGTERM
|
||||
|
@@ -17,7 +17,7 @@ Strengths
|
||||
* Space-efficient hash table and matching algorithms - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB)
|
||||
* Daemon incrementally dedupes new data using btrfs tree search
|
||||
* Works with btrfs compression - dedupe any combination of compressed and uncompressed files
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent shapshots](options.md)
|
||||
* **NEW** [Works around `btrfs send` problems with dedupe and incremental parent snapshots](options.md)
|
||||
* Works around btrfs filesystem structure to free more disk space
|
||||
* Persistent hash table for rapid restart after shutdown
|
||||
* Whole-filesystem dedupe - including snapshots
|
||||
@@ -70,6 +70,6 @@ You can also use Github:
|
||||
Copyright & License
|
||||
-------------------
|
||||
|
||||
Copyright 2015-2018 Zygo Blaxell <bees@furryterror.org>.
|
||||
Copyright 2015-2022 Zygo Blaxell <bees@furryterror.org>.
|
||||
|
||||
GPL (version 3 or later).
|
||||
|
@@ -80,7 +80,7 @@ within a temporary runtime directory.
|
||||
Packaging
|
||||
---------
|
||||
|
||||
See 'Dependencies' below. Package maintainers can pick ideas for building and
|
||||
See 'Dependencies' above. Package maintainers can pick ideas for building and
|
||||
configuring the source package from the Gentoo ebuild:
|
||||
|
||||
<https://github.com/gentoo/gentoo/tree/master/sys-fs/bees>
|
||||
|
@@ -40,16 +40,16 @@
|
||||
|
||||
* `--scan-mode MODE` or `-m`
|
||||
|
||||
Specify extent scanning algorithm. Default `MODE` is 0.
|
||||
Specify extent scanning algorithm.
|
||||
**EXPERIMENTAL** feature that may go away.
|
||||
|
||||
* Mode 0: scan extents in ascending order of (inode, subvol, offset).
|
||||
Keeps shared extents between snapshots together. Reads files sequentially.
|
||||
Minimizes temporary space usage.
|
||||
* Mode 1: scan extents from all subvols in parallel. Good performance
|
||||
on non-spinning media when subvols are unrelated.
|
||||
* Mode 2: scan all extents from one subvol at a time. Good sequential
|
||||
read performance for spinning media. Maximizes temporary space usage.
|
||||
* Mode 0: lockstep
|
||||
* Mode 1: independent
|
||||
* Mode 2: sequential
|
||||
* Mode 3: recent
|
||||
|
||||
For details of the different scanning modes and the default value of
|
||||
this option, see [bees configuration](config.md).
|
||||
|
||||
## Workarounds
|
||||
|
||||
|
@@ -134,7 +134,7 @@ ulimit -c 0
|
||||
|
||||
# If there were core files, generate reports for them
|
||||
for x in core*; do
|
||||
if [ -e "$x" ]; then
|
||||
if [ -e "$x" ]; then
|
||||
gdb --core="$x" \
|
||||
--eval-command='set pagination off' \
|
||||
--eval-command='info shared' \
|
||||
|
204
include/crucible/btrfs-tree.h
Normal file
204
include/crucible/btrfs-tree.h
Normal file
@@ -0,0 +1,204 @@
|
||||
#ifndef CRUCIBLE_BTRFS_TREE_H
|
||||
#define CRUCIBLE_BTRFS_TREE_H
|
||||
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/bytevector.h"
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
class BtrfsTreeItem {
|
||||
uint64_t m_objectid = 0;
|
||||
uint64_t m_offset = 0;
|
||||
uint64_t m_transid = 0;
|
||||
ByteVector m_data;
|
||||
uint8_t m_type = 0;
|
||||
public:
|
||||
uint64_t objectid() const { return m_objectid; }
|
||||
uint64_t offset() const { return m_offset; }
|
||||
uint64_t transid() const { return m_transid; }
|
||||
uint8_t type() const { return m_type; }
|
||||
const ByteVector data() const { return m_data; }
|
||||
BtrfsTreeItem() = default;
|
||||
BtrfsTreeItem(const BtrfsIoctlSearchHeader &bish);
|
||||
BtrfsTreeItem& operator=(const BtrfsIoctlSearchHeader &bish);
|
||||
bool operator!() const;
|
||||
|
||||
/// Member access methods. Invoking a method on the
|
||||
/// wrong type of item will throw an exception.
|
||||
|
||||
/// @{ Block group items
|
||||
uint64_t block_group_flags() const;
|
||||
uint64_t block_group_used() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Chunk items
|
||||
uint64_t chunk_length() const;
|
||||
uint64_t chunk_type() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Dev extent items (physical byte ranges)
|
||||
uint64_t dev_extent_chunk_offset() const;
|
||||
uint64_t dev_extent_length() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Dev items (devices)
|
||||
uint64_t dev_item_total_bytes() const;
|
||||
uint64_t dev_item_bytes_used() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Inode items
|
||||
uint64_t inode_size() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Extent refs (EXTENT_DATA)
|
||||
uint64_t file_extent_logical_bytes() const;
|
||||
uint64_t file_extent_generation() const;
|
||||
uint64_t file_extent_offset() const;
|
||||
uint64_t file_extent_bytenr() const;
|
||||
uint8_t file_extent_type() const;
|
||||
btrfs_compression_type file_extent_compression() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Extent items (EXTENT_ITEM)
|
||||
uint64_t extent_begin() const;
|
||||
uint64_t extent_end() const;
|
||||
uint64_t extent_generation() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Root items
|
||||
uint64_t root_flags() const;
|
||||
/// @}
|
||||
|
||||
/// @{ Root backref items.
|
||||
uint64_t root_ref_dirid() const;
|
||||
string root_ref_name() const;
|
||||
uint64_t root_ref_parent_rootid() const;
|
||||
/// @}
|
||||
};
|
||||
|
||||
ostream &operator<<(ostream &os, const BtrfsTreeItem &bti);
|
||||
|
||||
class BtrfsTreeFetcher {
|
||||
protected:
|
||||
Fd m_fd;
|
||||
BtrfsIoctlSearchKey m_sk;
|
||||
uint64_t m_tree = 0;
|
||||
uint64_t m_min_transid = 0;
|
||||
uint64_t m_max_transid = numeric_limits<uint64_t>::max();
|
||||
uint64_t m_block_size = 0;
|
||||
uint64_t m_lookbehind_size = 0;
|
||||
uint64_t m_scale_size = 0;
|
||||
uint8_t m_type = 0;
|
||||
|
||||
uint64_t scale_logical(uint64_t logical) const;
|
||||
uint64_t unscale_logical(uint64_t logical) const;
|
||||
const static uint64_t s_max_logical = numeric_limits<uint64_t>::max();
|
||||
uint64_t scaled_max_logical() const;
|
||||
|
||||
virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t object);
|
||||
virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr);
|
||||
virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) = 0;
|
||||
virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) = 0;
|
||||
virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) = 0;
|
||||
Fd fd() const;
|
||||
void fd(Fd fd);
|
||||
public:
|
||||
virtual ~BtrfsTreeFetcher() = default;
|
||||
BtrfsTreeFetcher(Fd new_fd);
|
||||
void type(uint8_t type);
|
||||
void tree(uint64_t tree);
|
||||
void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits<uint64_t>::max());
|
||||
/// Block size (sectorsize) of filesystem
|
||||
uint64_t block_size() const;
|
||||
/// Fetch last object < logical, null if not found
|
||||
BtrfsTreeItem prev(uint64_t logical);
|
||||
/// Fetch first object > logical, null if not found
|
||||
BtrfsTreeItem next(uint64_t logical);
|
||||
/// Fetch object at exactly logical, null if not found
|
||||
BtrfsTreeItem at(uint64_t);
|
||||
/// Fetch first object >= logical
|
||||
BtrfsTreeItem lower_bound(uint64_t logical);
|
||||
/// Fetch last object <= logical
|
||||
BtrfsTreeItem rlower_bound(uint64_t logical);
|
||||
|
||||
/// Estimated distance between objects
|
||||
virtual uint64_t lookbehind_size() const;
|
||||
virtual void lookbehind_size(uint64_t);
|
||||
|
||||
/// Scale size (normally block size but must be set to 1 for fs trees)
|
||||
uint64_t scale_size() const;
|
||||
void scale_size(uint64_t);
|
||||
};
|
||||
|
||||
class BtrfsTreeObjectFetcher : public BtrfsTreeFetcher {
|
||||
protected:
|
||||
virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t logical) override;
|
||||
virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
public:
|
||||
using BtrfsTreeFetcher::BtrfsTreeFetcher;
|
||||
};
|
||||
|
||||
class BtrfsTreeOffsetFetcher : public BtrfsTreeFetcher {
|
||||
protected:
|
||||
uint64_t m_objectid = 0;
|
||||
virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t offset) override;
|
||||
virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override;
|
||||
public:
|
||||
using BtrfsTreeFetcher::BtrfsTreeFetcher;
|
||||
void objectid(uint64_t objectid);
|
||||
uint64_t objectid() const;
|
||||
};
|
||||
|
||||
class BtrfsCsumTreeFetcher : public BtrfsTreeOffsetFetcher {
|
||||
public:
|
||||
const uint32_t BTRFS_CSUM_TYPE_UNKNOWN = uint32_t(1) << 16;
|
||||
private:
|
||||
size_t m_sum_size = 0;
|
||||
uint32_t m_sum_type = BTRFS_CSUM_TYPE_UNKNOWN;
|
||||
public:
|
||||
BtrfsCsumTreeFetcher(const Fd &fd);
|
||||
|
||||
uint32_t sum_type() const;
|
||||
size_t sum_size() const;
|
||||
void get_sums(uint64_t logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t count)> output);
|
||||
};
|
||||
|
||||
/// Fetch extent items from extent tree
|
||||
class BtrfsExtentItemFetcher : public BtrfsTreeObjectFetcher {
|
||||
public:
|
||||
BtrfsExtentItemFetcher(const Fd &fd);
|
||||
};
|
||||
|
||||
/// Fetch extent refs from an inode
|
||||
class BtrfsExtentDataFetcher : public BtrfsTreeOffsetFetcher {
|
||||
public:
|
||||
BtrfsExtentDataFetcher(const Fd &fd);
|
||||
};
|
||||
|
||||
/// Fetch inodes from a subvol
|
||||
class BtrfsFsTreeFetcher : public BtrfsTreeObjectFetcher {
|
||||
public:
|
||||
BtrfsFsTreeFetcher(const Fd &fd, uint64_t subvol);
|
||||
};
|
||||
|
||||
class BtrfsInodeFetcher : public BtrfsTreeObjectFetcher {
|
||||
public:
|
||||
BtrfsInodeFetcher(const Fd &fd);
|
||||
BtrfsTreeItem stat(uint64_t subvol, uint64_t inode);
|
||||
};
|
||||
|
||||
class BtrfsRootFetcher : public BtrfsTreeObjectFetcher {
|
||||
public:
|
||||
BtrfsRootFetcher(const Fd &fd);
|
||||
BtrfsTreeItem root(uint64_t subvol);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -216,7 +216,28 @@ enum btrfs_compression_type {
|
||||
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
|
||||
#endif
|
||||
|
||||
struct btrfs_ioctl_fs_info_args_v2 {
|
||||
#ifndef BTRFS_FS_INFO_FLAG_GENERATION
|
||||
/* Request information about filesystem generation */
|
||||
#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
|
||||
#endif
|
||||
|
||||
#ifndef BTRFS_FS_INFO_FLAG_METADATA_UUID
|
||||
/* Request information about filesystem metadata UUID */
|
||||
#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
|
||||
#endif
|
||||
|
||||
// BTRFS_CSUM_TYPE_CRC32 was a #define from 2008 to 2019.
|
||||
// After that, it's an enum with the other 3 types.
|
||||
// So if we do _not_ have CRC32 defined, it means we have the other 3;
|
||||
// if we _do_ have CRC32 defined, it means we need the other 3.
|
||||
// This seems likely to break some day.
|
||||
#ifdef BTRFS_CSUM_TYPE_CRC32
|
||||
#define BTRFS_CSUM_TYPE_XXHASH 1
|
||||
#define BTRFS_CSUM_TYPE_SHA256 2
|
||||
#define BTRFS_CSUM_TYPE_BLAKE2 3
|
||||
#endif
|
||||
|
||||
struct btrfs_ioctl_fs_info_args_v3 {
|
||||
__u64 max_id; /* out */
|
||||
__u64 num_devices; /* out */
|
||||
__u8 fsid[BTRFS_FSID_SIZE]; /* out */
|
||||
@@ -227,7 +248,9 @@ struct btrfs_ioctl_fs_info_args_v2 {
|
||||
__u16 csum_type; /* out */
|
||||
__u16 csum_size; /* out */
|
||||
__u64 flags; /* in/out */
|
||||
__u8 reserved[968]; /* pad to 1k */
|
||||
__u64 generation; /* out */
|
||||
__u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
|
||||
__u8 reserved[944]; /* pad to 1k */
|
||||
};
|
||||
|
||||
#endif // CRUCIBLE_BTRFS_H
|
||||
|
79
include/crucible/bytevector.h
Normal file
79
include/crucible/bytevector.h
Normal file
@@ -0,0 +1,79 @@
|
||||
#ifndef _CRUCIBLE_BYTEVECTOR_H_
|
||||
#define _CRUCIBLE_BYTEVECTOR_H_
|
||||
|
||||
#include <crucible/error.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
// new[] is a little slower than malloc
|
||||
// shared_ptr is about 2x slower than unique_ptr
|
||||
// vector<uint8_t> is ~160x slower
|
||||
// so we won't bother with unique_ptr because we can't do shared copies with it
|
||||
|
||||
class ByteVector {
|
||||
public:
|
||||
using Pointer = shared_ptr<uint8_t>;
|
||||
using value_type = Pointer::element_type;
|
||||
using iterator = value_type*;
|
||||
|
||||
ByteVector() = default;
|
||||
ByteVector(const ByteVector &that);
|
||||
ByteVector& operator=(const ByteVector &that);
|
||||
ByteVector(size_t size);
|
||||
ByteVector(const ByteVector &that, size_t start, size_t length);
|
||||
ByteVector(iterator begin, iterator end, size_t min_size = 0);
|
||||
|
||||
ByteVector at(size_t start, size_t length) const;
|
||||
|
||||
value_type& at(size_t) const;
|
||||
iterator begin() const;
|
||||
void clear();
|
||||
value_type* data() const;
|
||||
bool empty() const;
|
||||
iterator end() const;
|
||||
value_type& operator[](size_t) const;
|
||||
size_t size() const;
|
||||
bool operator==(const ByteVector &that) const;
|
||||
|
||||
// this version of erase only works at the beginning or end of the buffer, else throws exception
|
||||
void erase(iterator first);
|
||||
void erase(iterator first, iterator last);
|
||||
|
||||
// An important use case is ioctls that have a fixed-size header struct
|
||||
// followed by a buffer for further arguments. These templates avoid
|
||||
// doing reinterpret_casts every time.
|
||||
template <class T> ByteVector(const T& object, size_t min_size);
|
||||
template <class T> T* get() const;
|
||||
private:
|
||||
Pointer m_ptr;
|
||||
size_t m_size = 0;
|
||||
mutable mutex m_mutex;
|
||||
friend ostream & operator<<(ostream &os, const ByteVector &bv);
|
||||
};
|
||||
|
||||
template <class T>
|
||||
ByteVector::ByteVector(const T& object, size_t min_size)
|
||||
{
|
||||
const auto size = max(min_size, sizeof(T));
|
||||
m_ptr = Pointer(static_cast<value_type*>(malloc(size)), free);
|
||||
memcpy(m_ptr.get(), &object, sizeof(T));
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T*
|
||||
ByteVector::get() const
|
||||
{
|
||||
THROW_CHECK2(out_of_range, size(), sizeof(T), size() >= sizeof(T));
|
||||
return reinterpret_cast<T*>(data());
|
||||
}
|
||||
}
|
||||
|
||||
#endif // _CRUCIBLE_BYTEVECTOR_H_
|
@@ -30,7 +30,7 @@ namespace crucible {
|
||||
map<Key, ListIter> m_map;
|
||||
LockSet<Key> m_lockset;
|
||||
size_t m_max_size;
|
||||
mutex m_mutex;
|
||||
mutable mutex m_mutex;
|
||||
|
||||
void check_overflow();
|
||||
void recent_use(ListIter vp);
|
||||
@@ -48,6 +48,7 @@ namespace crucible {
|
||||
void expire(Arguments... args);
|
||||
void insert(const Return &r, Arguments... args);
|
||||
void clear();
|
||||
size_t size() const;
|
||||
};
|
||||
|
||||
template <class Return, class... Arguments>
|
||||
@@ -190,6 +191,14 @@ namespace crucible {
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
template <class Return, class... Arguments>
|
||||
size_t
|
||||
LRUCache<Return, Arguments...>::size() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_map.size();
|
||||
}
|
||||
|
||||
template<class Return, class... Arguments>
|
||||
Return
|
||||
LRUCache<Return, Arguments...>::operator()(Arguments... args)
|
||||
|
@@ -28,7 +28,7 @@ namespace crucible {
|
||||
};
|
||||
|
||||
template<> struct le_to_cpu_helper<uint16_t> {
|
||||
uint16_t operator()(const uint16_t v) { return le64toh(v); }
|
||||
uint16_t operator()(const uint16_t v) { return le16toh(v); }
|
||||
};
|
||||
|
||||
template<> struct le_to_cpu_helper<uint8_t> {
|
||||
|
@@ -126,6 +126,13 @@ namespace crucible {
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define THROW_CHECK4(type, value1, value2, value3, value4, expr) do { \
|
||||
if (!(expr)) { \
|
||||
THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) << ", " #value4 << " = " << (value4) \
|
||||
<< " failed constraint check (" << #expr << ")"); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \
|
||||
if (!((value1) op (value2))) { \
|
||||
THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \
|
||||
|
@@ -42,9 +42,6 @@ namespace crucible {
|
||||
uint64_t bytenr() const;
|
||||
bool operator==(const Extent &that) const;
|
||||
bool operator!=(const Extent &that) const { return !(*this == that); }
|
||||
|
||||
Extent() = default;
|
||||
Extent(const Extent &e) = default;
|
||||
};
|
||||
|
||||
class ExtentWalker {
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#ifndef CRUCIBLE_FD_H
|
||||
#define CRUCIBLE_FD_H
|
||||
|
||||
#include "crucible/bytevector.h"
|
||||
#include "crucible/namedptr.h"
|
||||
|
||||
#include <cstring>
|
||||
@@ -26,9 +27,9 @@
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
// IOHandle is a file descriptor owner object. It closes them when destroyed.
|
||||
// Most of the functions here don't use it because these functions don't own FDs.
|
||||
// All good names for such objects are taken.
|
||||
/// File descriptor owner object. It closes them when destroyed.
|
||||
/// Most of the functions here don't use it because these functions don't own FDs.
|
||||
/// All good names for such objects are taken.
|
||||
class IOHandle {
|
||||
IOHandle(const IOHandle &) = delete;
|
||||
IOHandle(IOHandle &&) = delete;
|
||||
@@ -42,6 +43,7 @@ namespace crucible {
|
||||
int get_fd() const;
|
||||
};
|
||||
|
||||
/// Copyable file descriptor.
|
||||
class Fd {
|
||||
static NamedPtr<IOHandle, int> s_named_ptr;
|
||||
shared_ptr<IOHandle> m_handle;
|
||||
@@ -61,24 +63,29 @@ namespace crucible {
|
||||
|
||||
// Functions named "foo_or_die" throw exceptions on failure.
|
||||
|
||||
// Attempt to open the file with the given mode
|
||||
/// Attempt to open the file with the given mode, throw exception on failure.
|
||||
int open_or_die(const string &file, int flags = O_RDONLY, mode_t mode = 0777);
|
||||
/// Attempt to open the file with the given mode, throw exception on failure.
|
||||
int openat_or_die(int dir_fd, const string &file, int flags = O_RDONLY, mode_t mode = 0777);
|
||||
|
||||
// Decode open parameters
|
||||
/// Decode open flags
|
||||
string o_flags_ntoa(int flags);
|
||||
/// Decode open mode
|
||||
string o_mode_ntoa(mode_t mode);
|
||||
|
||||
// mmap with its one weird error case
|
||||
/// mmap with its one weird error case
|
||||
void *mmap_or_die(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
|
||||
// Decode mmap parameters
|
||||
/// Decode mmap prot
|
||||
string mmap_prot_ntoa(int prot);
|
||||
/// Decode mmap flags
|
||||
string mmap_flags_ntoa(int flags);
|
||||
|
||||
// Unlink, rename
|
||||
/// Rename, throw exception on failure.
|
||||
void rename_or_die(const string &from, const string &to);
|
||||
/// Rename, throw exception on failure.
|
||||
void renameat_or_die(int fromfd, const string &frompath, int tofd, const string &topath);
|
||||
|
||||
/// Truncate, throw exception on failure.
|
||||
void ftruncate_or_die(int fd, off_t size);
|
||||
|
||||
// Read or write structs:
|
||||
@@ -86,19 +93,25 @@ namespace crucible {
|
||||
// Three-arg version of read_or_die/write_or_die throws an error on incomplete read/writes
|
||||
// Four-arg version returns number of bytes read/written through reference arg
|
||||
|
||||
/// Attempt read by pointer and length, throw exception on IO error or short read.
|
||||
void read_or_die(int fd, void *buf, size_t size);
|
||||
/// Attempt read of a POD struct, throw exception on IO error or short read.
|
||||
template <class T> void read_or_die(int fd, T& buf)
|
||||
{
|
||||
return read_or_die(fd, static_cast<void *>(&buf), sizeof(buf));
|
||||
}
|
||||
|
||||
/// Attempt read by pointer and length, throw exception on IO error but not short read.
|
||||
void read_partial_or_die(int fd, void *buf, size_t size_wanted, size_t &size_read);
|
||||
/// Attempt read of a POD struct, throw exception on IO error but not short read.
|
||||
template <class T> void read_partial_or_die(int fd, T& buf, size_t &size_read)
|
||||
{
|
||||
return read_partial_or_die(fd, static_cast<void *>(&buf), sizeof(buf), size_read);
|
||||
}
|
||||
|
||||
/// Attempt read at position by pointer and length, throw exception on IO error but not short read.
|
||||
void pread_or_die(int fd, void *buf, size_t size, off_t offset);
|
||||
/// Attempt read at position of a POD struct, throw exception on IO error but not short read.
|
||||
template <class T> void pread_or_die(int fd, T& buf, off_t offset)
|
||||
{
|
||||
return pread_or_die(fd, static_cast<void *>(&buf), sizeof(buf), offset);
|
||||
@@ -125,20 +138,23 @@ namespace crucible {
|
||||
// Specialization for strings which reads/writes the string content, not the struct string
|
||||
template<> void write_or_die<string>(int fd, const string& str);
|
||||
template<> void pread_or_die<string>(int fd, string& str, off_t offset);
|
||||
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset);
|
||||
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset);
|
||||
template<> void pwrite_or_die<string>(int fd, const string& str, off_t offset);
|
||||
template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset);
|
||||
template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset);
|
||||
template<> void pread_or_die<ByteVector>(int fd, ByteVector& str, off_t offset);
|
||||
template<> void pwrite_or_die<ByteVector>(int fd, const ByteVector& str, off_t offset);
|
||||
// Deprecated
|
||||
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset) = delete;
|
||||
template<> void pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t>& str, off_t offset) = delete;
|
||||
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset) = delete;
|
||||
template<> void pwrite_or_die<vector<char>>(int fd, const vector<char>& str, off_t offset) = delete;
|
||||
|
||||
// A different approach to reading a simple string
|
||||
/// Read a simple string.
|
||||
string read_string(int fd, size_t size);
|
||||
|
||||
// A lot of Unix API wants you to initialize a struct and call
|
||||
// one function to fill it, another function to throw it away,
|
||||
// and has some unknown third thing you have to do when there's
|
||||
// an error. That's also a C++ object with an exception-throwing
|
||||
// constructor.
|
||||
/// A lot of Unix API wants you to initialize a struct and call
|
||||
/// one function to fill it, another function to throw it away,
|
||||
/// and has some unknown third thing you have to do when there's
|
||||
/// an error. That's also a C++ object with an exception-throwing
|
||||
/// constructor.
|
||||
struct Stat : public stat {
|
||||
Stat();
|
||||
Stat(int f);
|
||||
@@ -152,17 +168,17 @@ namespace crucible {
|
||||
|
||||
string st_mode_ntoa(mode_t mode);
|
||||
|
||||
// Because it's not trivial to do correctly
|
||||
/// Because it's not trivial to do correctly
|
||||
string readlink_or_die(const string &path);
|
||||
|
||||
// Determine the name of a FD by readlink through /proc/self/fd/
|
||||
/// Determine the name of a FD by readlink through /proc/self/fd/
|
||||
string name_fd(int fd);
|
||||
|
||||
// Returns Fd objects because it does own them.
|
||||
/// Returns Fd objects because it does own them.
|
||||
pair<Fd, Fd> socketpair_or_die(int domain = AF_UNIX, int type = SOCK_STREAM, int protocol = 0);
|
||||
|
||||
// like unique_lock but for flock instead of mutexes...and not trying
|
||||
// to hide the many and subtle differences between those two things *at all*.
|
||||
/// like unique_lock but for flock instead of mutexes...and not trying
|
||||
/// to hide the many and subtle differences between those two things *at all*.
|
||||
class Flock {
|
||||
int m_fd;
|
||||
bool m_locked;
|
||||
@@ -183,7 +199,7 @@ namespace crucible {
|
||||
int fd();
|
||||
};
|
||||
|
||||
// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr.
|
||||
/// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr.
|
||||
void dup2_or_die(int fd_in, int fd_out);
|
||||
|
||||
}
|
||||
|
@@ -1,9 +1,9 @@
|
||||
#ifndef CRUCIBLE_FS_H
|
||||
#define CRUCIBLE_FS_H
|
||||
|
||||
#include "crucible/bytevector.h"
|
||||
#include "crucible/endian.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/spanner.h"
|
||||
|
||||
// Terribly Linux-specific FS-wrangling functions
|
||||
|
||||
@@ -27,18 +27,16 @@ namespace crucible {
|
||||
// wrapper around fallocate(...FALLOC_FL_PUNCH_HOLE...)
|
||||
void punch_hole(int fd, off_t offset, off_t len);
|
||||
|
||||
struct BtrfsExtentInfo : public btrfs_ioctl_same_extent_info {
|
||||
BtrfsExtentInfo(int dst_fd, off_t dst_offset);
|
||||
};
|
||||
|
||||
struct BtrfsExtentSame : public btrfs_ioctl_same_args {
|
||||
struct BtrfsExtentSame {
|
||||
virtual ~BtrfsExtentSame();
|
||||
BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length);
|
||||
void add(int fd, off_t offset);
|
||||
void add(int fd, uint64_t offset);
|
||||
virtual void do_ioctl();
|
||||
|
||||
uint64_t m_logical_offset = 0;
|
||||
uint64_t m_length = 0;
|
||||
int m_fd;
|
||||
vector<BtrfsExtentInfo> m_info;
|
||||
vector<btrfs_ioctl_same_extent_info> m_info;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const btrfs_ioctl_same_extent_info *info);
|
||||
@@ -53,20 +51,20 @@ namespace crucible {
|
||||
|
||||
ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p);
|
||||
|
||||
struct BtrfsDataContainer : public btrfs_data_container {
|
||||
struct BtrfsDataContainer {
|
||||
BtrfsDataContainer(size_t size = 64 * 1024);
|
||||
void *prepare(size_t size);
|
||||
|
||||
size_t get_size() const;
|
||||
decltype(bytes_left) get_bytes_left() const;
|
||||
decltype(bytes_missing) get_bytes_missing() const;
|
||||
decltype(elem_cnt) get_elem_cnt() const;
|
||||
decltype(elem_missed) get_elem_missed() const;
|
||||
decltype(btrfs_data_container::bytes_left) get_bytes_left() const;
|
||||
decltype(btrfs_data_container::bytes_missing) get_bytes_missing() const;
|
||||
decltype(btrfs_data_container::elem_cnt) get_elem_cnt() const;
|
||||
decltype(btrfs_data_container::elem_missed) get_elem_missed() const;
|
||||
|
||||
vector<uint8_t> m_data;
|
||||
ByteVector m_data;
|
||||
};
|
||||
|
||||
struct BtrfsIoctlLogicalInoArgs : public btrfs_ioctl_logical_ino_args {
|
||||
struct BtrfsIoctlLogicalInoArgs {
|
||||
BtrfsIoctlLogicalInoArgs(uint64_t logical, size_t buf_size = 16 * 1024 * 1024);
|
||||
|
||||
uint64_t get_flags() const;
|
||||
@@ -75,7 +73,6 @@ namespace crucible {
|
||||
virtual void do_ioctl(int fd);
|
||||
virtual bool do_ioctl_nothrow(int fd);
|
||||
|
||||
size_t m_container_size;
|
||||
struct BtrfsInodeOffsetRootSpan {
|
||||
using iterator = BtrfsInodeOffsetRoot*;
|
||||
using const_iterator = const BtrfsInodeOffsetRoot*;
|
||||
@@ -86,13 +83,17 @@ namespace crucible {
|
||||
const_iterator cend() const;
|
||||
iterator data() const;
|
||||
void clear();
|
||||
operator vector<BtrfsInodeOffsetRoot>() const;
|
||||
private:
|
||||
iterator m_begin = nullptr;
|
||||
iterator m_end = nullptr;
|
||||
friend struct BtrfsIoctlLogicalInoArgs;
|
||||
} m_iors;
|
||||
private:
|
||||
size_t m_container_size;
|
||||
BtrfsDataContainer m_container;
|
||||
uint64_t m_logical;
|
||||
uint64_t m_flags = 0;
|
||||
friend ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs *p);
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs &p);
|
||||
@@ -124,15 +125,6 @@ namespace crucible {
|
||||
|
||||
ostream & operator<<(ostream &os, const BtrfsIoctlDefragRangeArgs *p);
|
||||
|
||||
// in btrfs/ctree.h, but that's a nightmare to #include here
|
||||
typedef enum {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
BTRFS_COMPRESS_LZO = 2,
|
||||
BTRFS_COMPRESS_ZSTD = 3,
|
||||
BTRFS_COMPRESS_TYPES = 3
|
||||
} btrfs_compression_type;
|
||||
|
||||
struct FiemapExtent : public fiemap_extent {
|
||||
FiemapExtent();
|
||||
FiemapExtent(const fiemap_extent &that);
|
||||
@@ -141,16 +133,26 @@ namespace crucible {
|
||||
off_t end() const;
|
||||
};
|
||||
|
||||
struct Fiemap : public fiemap {
|
||||
struct Fiemap {
|
||||
|
||||
// because fiemap.h insists on giving FIEMAP_MAX_OFFSET
|
||||
// a different type from the struct fiemap members
|
||||
static const uint64_t s_fiemap_max_offset = FIEMAP_MAX_OFFSET;
|
||||
|
||||
// Get entire file
|
||||
Fiemap(uint64_t start = 0, uint64_t length = FIEMAP_MAX_OFFSET);
|
||||
Fiemap(uint64_t start = 0, uint64_t length = s_fiemap_max_offset);
|
||||
|
||||
void do_ioctl(int fd);
|
||||
|
||||
vector<FiemapExtent> m_extents;
|
||||
uint64_t m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
|
||||
uint64_t m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
|
||||
decltype(fiemap::fm_extent_count) m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
|
||||
decltype(fiemap::fm_extent_count) m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
|
||||
uint64_t m_start;
|
||||
uint64_t m_length;
|
||||
// FIEMAP is slow and full of lies.
|
||||
// This makes FIEMAP even slower, but reduces the lies a little.
|
||||
decltype(fiemap::fm_flags) m_flags = FIEMAP_FLAG_SYNC;
|
||||
friend ostream &operator<<(ostream &, const Fiemap &);
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const fiemap_extent *info);
|
||||
@@ -166,8 +168,8 @@ namespace crucible {
|
||||
|
||||
struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header {
|
||||
BtrfsIoctlSearchHeader();
|
||||
Spanner<const uint8_t> m_data;
|
||||
size_t set_data(const vector<uint8_t> &v, size_t offset);
|
||||
ByteVector m_data;
|
||||
size_t set_data(const ByteVector &v, size_t offset);
|
||||
bool operator<(const BtrfsIoctlSearchHeader &that) const;
|
||||
};
|
||||
|
||||
@@ -181,17 +183,18 @@ namespace crucible {
|
||||
ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr);
|
||||
|
||||
struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key {
|
||||
BtrfsIoctlSearchKey(size_t buf_size = 4096);
|
||||
virtual bool do_ioctl_nothrow(int fd);
|
||||
virtual void do_ioctl(int fd);
|
||||
BtrfsIoctlSearchKey(size_t buf_size = 1024);
|
||||
bool do_ioctl_nothrow(int fd);
|
||||
void do_ioctl(int fd);
|
||||
|
||||
// Copy objectid/type/offset so we move forward
|
||||
void next_min(const BtrfsIoctlSearchHeader& ref);
|
||||
|
||||
size_t m_buf_size;
|
||||
vector<uint8_t> m_ioctl_arg;
|
||||
set<BtrfsIoctlSearchHeader> m_result;
|
||||
// move forward to next object of a single type
|
||||
void next_min(const BtrfsIoctlSearchHeader& ref, const uint8_t type);
|
||||
|
||||
size_t m_buf_size;
|
||||
set<BtrfsIoctlSearchHeader> m_result;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
|
||||
@@ -199,6 +202,7 @@ namespace crucible {
|
||||
|
||||
string btrfs_search_type_ntoa(unsigned type);
|
||||
string btrfs_search_objectid_ntoa(uint64_t objectid);
|
||||
string btrfs_compress_type_ntoa(uint8_t type);
|
||||
|
||||
uint64_t btrfs_get_root_id(int fd);
|
||||
uint64_t btrfs_get_root_transid(int fd);
|
||||
@@ -235,11 +239,12 @@ namespace crucible {
|
||||
|
||||
template<class V> ostream &hexdump(ostream &os, const V &v);
|
||||
|
||||
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v2 {
|
||||
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 {
|
||||
BtrfsIoctlFsInfoArgs();
|
||||
void do_ioctl(int fd);
|
||||
uint16_t csum_type() const;
|
||||
uint16_t csum_size() const;
|
||||
uint64_t generation() const;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
|
||||
|
36
include/crucible/hexdump.h
Normal file
36
include/crucible/hexdump.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef CRUCIBLE_HEXDUMP_H
|
||||
#define CRUCIBLE_HEXDUMP_H
|
||||
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <ostream>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
template <class V>
|
||||
ostream &
|
||||
hexdump(ostream &os, const V &v)
|
||||
{
|
||||
os << "V { size = " << v.size() << ", data:\n";
|
||||
for (size_t i = 0; i < v.size(); i += 8) {
|
||||
string hex, ascii;
|
||||
for (size_t j = i; j < i + 8; ++j) {
|
||||
if (j < v.size()) {
|
||||
uint8_t c = v[j];
|
||||
char buf[8];
|
||||
sprintf(buf, "%02x ", c);
|
||||
hex += buf;
|
||||
ascii += (c < 32 || c > 126) ? '.' : c;
|
||||
} else {
|
||||
hex += " ";
|
||||
ascii += ' ';
|
||||
}
|
||||
}
|
||||
os << astringprintf("\t%08x %s %s\n", i, hex.c_str(), ascii.c_str());
|
||||
}
|
||||
return os << "}";
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CRUCIBLE_HEXDUMP_H
|
40
include/crucible/multilock.h
Normal file
40
include/crucible/multilock.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef CRUCIBLE_MULTILOCK_H
|
||||
#define CRUCIBLE_MULTILOCK_H
|
||||
|
||||
#include <condition_variable>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
class MultiLocker {
|
||||
mutex m_mutex;
|
||||
condition_variable m_cv;
|
||||
map<string, size_t> m_counters;
|
||||
|
||||
class LockHandle {
|
||||
const string m_type;
|
||||
MultiLocker &m_parent;
|
||||
bool m_locked = false;
|
||||
void set_locked(bool state);
|
||||
public:
|
||||
~LockHandle();
|
||||
LockHandle(const string &type, MultiLocker &parent);
|
||||
friend class MultiLocker;
|
||||
};
|
||||
|
||||
friend class LockHandle;
|
||||
|
||||
bool is_lock_available(const string &type);
|
||||
void put_lock(const string &type);
|
||||
shared_ptr<LockHandle> get_lock_private(const string &type);
|
||||
public:
|
||||
static shared_ptr<LockHandle> get_lock(const string &type);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // CRUCIBLE_MULTILOCK_H
|
@@ -12,13 +12,18 @@
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
/// Storage for objects with unique names
|
||||
/// A thread-safe container for RAII of shared resources with unique names.
|
||||
|
||||
template <class Return, class... Arguments>
|
||||
class NamedPtr {
|
||||
public:
|
||||
/// The name in "NamedPtr"
|
||||
using Key = tuple<Arguments...>;
|
||||
/// A shared pointer to the named object with ownership
|
||||
/// tracking that erases the object's stored name when
|
||||
/// the last shared pointer is destroyed.
|
||||
using Ptr = shared_ptr<Return>;
|
||||
/// A function that translates a name into a shared pointer to an object.
|
||||
using Func = function<Ptr(Arguments...)>;
|
||||
private:
|
||||
struct Value;
|
||||
@@ -29,6 +34,7 @@ namespace crucible {
|
||||
mutex m_mutex;
|
||||
};
|
||||
using MapPtr = shared_ptr<MapRep>;
|
||||
/// Container for Return pointers. Destructor removes entry from map.
|
||||
struct Value {
|
||||
Ptr m_ret_ptr;
|
||||
MapPtr m_map_rep;
|
||||
@@ -50,15 +56,21 @@ namespace crucible {
|
||||
void func(Func f);
|
||||
|
||||
Ptr operator()(Arguments... args);
|
||||
|
||||
Ptr insert(const Ptr &r, Arguments... args);
|
||||
};
|
||||
|
||||
/// Construct NamedPtr map and define a function to turn a name into a pointer.
|
||||
template <class Return, class... Arguments>
|
||||
NamedPtr<Return, Arguments...>::NamedPtr(Func f) :
|
||||
m_fn(f)
|
||||
{
|
||||
}
|
||||
|
||||
/// Construct a Value wrapper: the value to store, the argument key to store the value under,
|
||||
/// and a pointer to the map. Everything needed to remove the key from the map when the
|
||||
/// last NamedPtr is deleted. NamedPtr then releases its own pointer to the value, which
|
||||
/// may or may not trigger deletion there.
|
||||
template <class Return, class... Arguments>
|
||||
NamedPtr<Return, Arguments...>::Value::Value(Ptr&& ret_ptr, const Key &key, const MapPtr &map_rep) :
|
||||
m_ret_ptr(ret_ptr),
|
||||
@@ -67,6 +79,8 @@ namespace crucible {
|
||||
{
|
||||
}
|
||||
|
||||
/// Destroy a Value wrapper: remove a dead Key from the map, then let the member destructors
|
||||
/// do the rest. The Key might be in the map and not dead, so leave it alone in that case.
|
||||
template <class Return, class... Arguments>
|
||||
NamedPtr<Return, Arguments...>::Value::~Value()
|
||||
{
|
||||
@@ -82,21 +96,23 @@ namespace crucible {
|
||||
// "our" map entry if it exists and is expired. The other
|
||||
// thread would have done the same for us if the race had
|
||||
// a different winner.
|
||||
auto found = m_map_rep->m_map.find(m_ret_key);
|
||||
const auto found = m_map_rep->m_map.find(m_ret_key);
|
||||
if (found != m_map_rep->m_map.end() && found->second.expired()) {
|
||||
m_map_rep->m_map.erase(found);
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a Return by key and fetch a strong Return pointer.
|
||||
/// Ignore Keys that have expired weak pointers.
|
||||
template <class Return, class... Arguments>
|
||||
typename NamedPtr<Return, Arguments...>::Ptr
|
||||
NamedPtr<Return, Arguments...>::lookup_item(const Key &k)
|
||||
{
|
||||
// Must be called with lock held
|
||||
auto found = m_map_rep->m_map.find(k);
|
||||
const auto found = m_map_rep->m_map.find(k);
|
||||
if (found != m_map_rep->m_map.end()) {
|
||||
// Get the strong pointer back
|
||||
auto rv = found->second.lock();
|
||||
const auto rv = found->second.lock();
|
||||
if (rv) {
|
||||
// Have strong pointer. Return value that shares map entry.
|
||||
return shared_ptr<Return>(rv, rv->m_ret_ptr.get());
|
||||
@@ -109,6 +125,11 @@ namespace crucible {
|
||||
return Ptr();
|
||||
}
|
||||
|
||||
/// Insert the Return value of calling Func(Arguments...).
|
||||
/// If the value already exists in the map, return the existing value.
|
||||
/// If another thread is already running Func(Arguments...) then this thread
|
||||
/// will block until the other thread finishes inserting the Return in the
|
||||
/// map, and both threads will return the same Return value.
|
||||
template <class Return, class... Arguments>
|
||||
typename NamedPtr<Return, Arguments...>::Ptr
|
||||
NamedPtr<Return, Arguments...>::insert_item(Func fn, Arguments... args)
|
||||
@@ -116,34 +137,36 @@ namespace crucible {
|
||||
Key k(args...);
|
||||
|
||||
// Is it already in the map?
|
||||
unique_lock<mutex> lock(m_map_rep->m_mutex);
|
||||
unique_lock<mutex> lock_lookup(m_map_rep->m_mutex);
|
||||
auto rv = lookup_item(k);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
// Release map lock and acquire key lock
|
||||
lock.unlock();
|
||||
auto key_lock = m_lockset.make_lock(k);
|
||||
lock_lookup.unlock();
|
||||
const auto key_lock = m_lockset.make_lock(k);
|
||||
|
||||
// Did item appear in map while we were waiting for key?
|
||||
lock.lock();
|
||||
lock_lookup.lock();
|
||||
rv = lookup_item(k);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
// We now hold key and index locks, but item not in map (or expired).
|
||||
// Release map lock
|
||||
lock.unlock();
|
||||
// Release map lock so other threads can use the map
|
||||
lock_lookup.unlock();
|
||||
|
||||
// Call the function and create a new Value outside of the map
|
||||
const auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
|
||||
|
||||
// Call the function and create a new Value
|
||||
auto new_value_ptr = make_shared<Value>(fn(args...), k, m_map_rep);
|
||||
// Function must return a non-null pointer
|
||||
THROW_CHECK0(runtime_error, new_value_ptr->m_ret_ptr);
|
||||
|
||||
// Reacquire index lock for map insertion
|
||||
lock.lock();
|
||||
// Reacquire index lock for map insertion. We still hold the key lock.
|
||||
// Use a different lock object to make exceptions unlock in the right order
|
||||
unique_lock<mutex> lock_insert(m_map_rep->m_mutex);
|
||||
|
||||
// Insert return value in map or overwrite existing
|
||||
// empty or expired weak_ptr value.
|
||||
@@ -158,16 +181,16 @@ namespace crucible {
|
||||
// to find and fix.
|
||||
assert(new_item_ref.expired());
|
||||
|
||||
// Update the empty map slot
|
||||
// Update the map slot we are sure is empty
|
||||
new_item_ref = new_value_ptr;
|
||||
|
||||
// Drop lock so we don't deadlock in constructor exceptions
|
||||
lock.unlock();
|
||||
|
||||
// Return shared_ptr to Return using strong pointer's reference counter
|
||||
return shared_ptr<Return>(new_value_ptr, new_value_ptr->m_ret_ptr.get());
|
||||
|
||||
// Release map lock, then key lock
|
||||
}
|
||||
|
||||
/// (Re)define a function to turn a name into a pointer.
|
||||
template <class Return, class... Arguments>
|
||||
void
|
||||
NamedPtr<Return, Arguments...>::func(Func func)
|
||||
@@ -176,6 +199,7 @@ namespace crucible {
|
||||
m_fn = func;
|
||||
}
|
||||
|
||||
/// Convert a name into a pointer using the configured function.
|
||||
template<class Return, class... Arguments>
|
||||
typename NamedPtr<Return, Arguments...>::Ptr
|
||||
NamedPtr<Return, Arguments...>::operator()(Arguments... args)
|
||||
@@ -183,14 +207,19 @@ namespace crucible {
|
||||
return insert_item(m_fn, args...);
|
||||
}
|
||||
|
||||
/// Insert a pointer that has already been created under the
|
||||
/// given name. Useful for inserting a pointer to a derived
|
||||
/// class when the name doesn't contain all of the information
|
||||
/// required for the object, or when the Return is already known by
|
||||
/// some cheaper method than calling the function.
|
||||
template<class Return, class... Arguments>
|
||||
typename NamedPtr<Return, Arguments...>::Ptr
|
||||
NamedPtr<Return, Arguments...>::insert(const Ptr &r, Arguments... args)
|
||||
{
|
||||
THROW_CHECK0(invalid_argument, r);
|
||||
return insert_item([&](Arguments...) -> Ptr { return r; }, args...);
|
||||
return insert_item([&](Arguments...) { return r; }, args...);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // NAMEDPTR_H
|
||||
#endif // CRUCIBLE_NAMEDPTR_H
|
||||
|
@@ -20,7 +20,7 @@ namespace crucible {
|
||||
#define NTOA_TABLE_ENTRY_BITS(x) { .n = (x), .mask = (x), .a = (#x) }
|
||||
|
||||
// Enumerations (entire value matches all bits)
|
||||
#define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0UL, .a = (#x) }
|
||||
#define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0ULL, .a = (#x) }
|
||||
|
||||
// End of table (sorry, C++ didn't get C99's compound literals, so we have to write out all the member names)
|
||||
#define NTOA_TABLE_ENTRY_END() { .n = 0, .mask = 0, .a = nullptr }
|
||||
|
@@ -20,8 +20,8 @@ namespace crucible {
|
||||
using ProgressHolder = shared_ptr<ProgressHolderState>;
|
||||
|
||||
ProgressTracker(const value_type &v);
|
||||
value_type begin();
|
||||
value_type end();
|
||||
value_type begin() const;
|
||||
value_type end() const;
|
||||
|
||||
ProgressHolder hold(const value_type &v);
|
||||
|
||||
@@ -51,7 +51,7 @@ namespace crucible {
|
||||
|
||||
template <class T>
|
||||
typename ProgressTracker<T>::value_type
|
||||
ProgressTracker<T>::begin()
|
||||
ProgressTracker<T>::begin() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_state->m_mutex);
|
||||
return m_state->m_begin;
|
||||
@@ -59,7 +59,7 @@ namespace crucible {
|
||||
|
||||
template <class T>
|
||||
typename ProgressTracker<T>::value_type
|
||||
ProgressTracker<T>::end()
|
||||
ProgressTracker<T>::end() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_state->m_mutex);
|
||||
return m_state->m_end;
|
||||
|
163
include/crucible/seeker.h
Normal file
163
include/crucible/seeker.h
Normal file
@@ -0,0 +1,163 @@
|
||||
#ifndef _CRUCIBLE_SEEKER_H_
|
||||
#define _CRUCIBLE_SEEKER_H_
|
||||
|
||||
#include "crucible/error.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#if 1
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#define DINIT(__x) __x
|
||||
#define DLOG(__x) do { logs << __x << std::endl; } while (false)
|
||||
#define DOUT(__err) do { __err << logs.str(); } while (false)
|
||||
#else
|
||||
#define DINIT(__x) do {} while (false)
|
||||
#define DLOG(__x) do {} while (false)
|
||||
#define DOUT(__x) do {} while (false)
|
||||
#endif
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
// Requirements for Container<Pos> Fetch(Pos lower, Pos upper):
|
||||
// - fetches objects in Pos order, starting from lower (must be >= lower)
|
||||
// - must return upper if present, may or may not return objects after that
|
||||
// - returns a container of Pos objects with begin(), end(), rbegin(), rend()
|
||||
// - container must iterate over objects in Pos order
|
||||
// - uniqueness of Pos objects not required
|
||||
// - should store the underlying data as a side effect
|
||||
//
|
||||
// Requirements for Pos:
|
||||
// - should behave like an unsigned integer type
|
||||
// - must have specializations in numeric_limits<T> for digits, max(), min()
|
||||
// - must support +, -, -=, and related operators
|
||||
// - must support <, <=, ==, and related operators
|
||||
// - must support Pos / 2 (only)
|
||||
//
|
||||
// Requirements for seek_backward:
|
||||
// - calls Fetch to search Pos space near target_pos
|
||||
// - if no key exists with value <= target_pos, returns the minimum Pos value
|
||||
// - returns the highest key value <= target_pos
|
||||
// - returned key value may not be part of most recent Fetch result
|
||||
// - 1 loop iteration when target_pos exists
|
||||
|
||||
template <class Fetch, class Pos = uint64_t>
|
||||
Pos
|
||||
seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max())
|
||||
{
|
||||
DINIT(ostringstream logs);
|
||||
try {
|
||||
static const Pos end_pos = numeric_limits<Pos>::max();
|
||||
// TBH this probably won't work if begin_pos != 0, i.e. any signed type
|
||||
static const Pos begin_pos = numeric_limits<Pos>::min();
|
||||
// Run a binary search looking for the highest key below target_pos.
|
||||
// Initial upper bound of the search is target_pos.
|
||||
// Find initial lower bound by doubling the size of the range until a key below target_pos
|
||||
// is found, or the lower bound reaches the beginning of the search space.
|
||||
// If the lower bound search reaches the beginning of the search space without finding a key,
|
||||
// return the beginning of the search space; otherwise, perform a binary search between
|
||||
// the bounds now established.
|
||||
Pos lower_bound = 0;
|
||||
Pos upper_bound = target_pos;
|
||||
bool found_low = false;
|
||||
Pos probe_pos = target_pos;
|
||||
// We need one loop for each bit of the search space to find the lower bound,
|
||||
// one loop for each bit of the search space to find the upper bound,
|
||||
// and one extra loop to confirm the boundary is correct.
|
||||
for (size_t loop_count = min(numeric_limits<Pos>::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) {
|
||||
DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")");
|
||||
auto result = fetch(probe_pos, target_pos);
|
||||
const Pos low_pos = result.empty() ? end_pos : *result.begin();
|
||||
const Pos high_pos = result.empty() ? end_pos : *result.rbegin();
|
||||
DLOG(" = " << low_pos << ".." << high_pos);
|
||||
// check for correct behavior of the fetch function
|
||||
THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos);
|
||||
THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos);
|
||||
THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos);
|
||||
if (!found_low) {
|
||||
// if target_pos == end_pos then we will find it in every empty result set,
|
||||
// so in that case we force the lower bound to be lower than end_pos
|
||||
if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) {
|
||||
// found a lower bound, set the low bound there and switch to binary search
|
||||
found_low = true;
|
||||
lower_bound = low_pos;
|
||||
DLOG("found_low = true, lower_bound = " << lower_bound);
|
||||
} else {
|
||||
// still looking for lower bound
|
||||
// if probe_pos was begin_pos then we can stop with no result
|
||||
if (probe_pos == begin_pos) {
|
||||
DLOG("return: probe_pos == begin_pos " << begin_pos);
|
||||
return begin_pos;
|
||||
}
|
||||
// double the range size, or use the distance between objects found so far
|
||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||
// already checked low_pos <= high_pos above
|
||||
const Pos want_delta = max(upper_bound - probe_pos, min_step);
|
||||
// avoid underflowing the beginning of the search space
|
||||
const Pos have_delta = min(want_delta, probe_pos - begin_pos);
|
||||
THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta);
|
||||
// move probe and try again
|
||||
probe_pos = probe_pos - have_delta;
|
||||
DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (low_pos <= target_pos && target_pos <= high_pos) {
|
||||
// have keys on either side of target_pos in result
|
||||
// search from the high end until we find the highest key below target
|
||||
for (auto i = result.rbegin(); i != result.rend(); ++i) {
|
||||
// more correctness checking for fetch
|
||||
THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i);
|
||||
if (*i <= target_pos) {
|
||||
DLOG("return: *i " << *i << " <= target_pos " << target_pos);
|
||||
return *i;
|
||||
}
|
||||
}
|
||||
// if the list is empty then low_pos = high_pos = end_pos
|
||||
// if target_pos = end_pos also, then we will execute the loop
|
||||
// above but not find any matching entries.
|
||||
THROW_CHECK0(runtime_error, result.empty());
|
||||
}
|
||||
if (target_pos <= low_pos) {
|
||||
// results are all too high, so probe_pos..low_pos is too high
|
||||
// lower the high bound to the probe pos
|
||||
upper_bound = probe_pos;
|
||||
DLOG("upper_bound = probe_pos " << probe_pos);
|
||||
}
|
||||
if (high_pos < target_pos) {
|
||||
// results are all too low, so probe_pos..high_pos is too low
|
||||
// raise the low bound to the high_pos
|
||||
DLOG("lower_bound = high_pos " << high_pos);
|
||||
lower_bound = high_pos;
|
||||
}
|
||||
// compute a new probe pos at the middle of the range and try again
|
||||
// we can't have a zero-size range here because we would not have set found_low yet
|
||||
THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound);
|
||||
const Pos delta = (upper_bound - lower_bound) / 2;
|
||||
probe_pos = lower_bound + delta;
|
||||
if (delta < 1) {
|
||||
// nothing can exist in the range (lower_bound, upper_bound)
|
||||
// and an object is known to exist at lower_bound
|
||||
DLOG("return: probe_pos == lower_bound " << lower_bound);
|
||||
return lower_bound;
|
||||
}
|
||||
THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos);
|
||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||
DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound);
|
||||
}
|
||||
THROW_ERROR(runtime_error, "FIXME: should not reach this line: "
|
||||
"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", "
|
||||
"found_low " << found_low);
|
||||
} catch (...) {
|
||||
DOUT(cerr);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // _CRUCIBLE_SEEKER_H_
|
||||
|
@@ -1,167 +0,0 @@
|
||||
#ifndef CRUCIBLE_SPANNER_H
|
||||
#define CRUCIBLE_SPANNER_H
|
||||
|
||||
#include "crucible/error.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace crucible {
|
||||
|
||||
using namespace std;
|
||||
|
||||
// C++20 is already using the name "span" for something similar.
|
||||
template <class T, class Head = T*, class Iter = Head>
|
||||
class Spanner {
|
||||
public:
|
||||
using iterator = Iter;
|
||||
using head_pointer = Head;
|
||||
using value_type = T;
|
||||
|
||||
template <class Container>
|
||||
Spanner(Container& container);
|
||||
|
||||
Spanner(head_pointer begin, iterator end);
|
||||
Spanner(size_t size, head_pointer begin);
|
||||
Spanner() = default;
|
||||
Spanner &operator=(const Spanner &that) = default;
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
value_type *data() const;
|
||||
value_type &at(size_t n) const;
|
||||
size_t size() const;
|
||||
bool empty() const;
|
||||
void clear();
|
||||
value_type &operator[](size_t n) const;
|
||||
iterator erase(iterator first, iterator last);
|
||||
iterator erase(iterator first);
|
||||
private:
|
||||
head_pointer m_begin;
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
template <class Container, class Head = typename Container::value_type *, class Iter = Head>
|
||||
Spanner<typename Container::value_type, Head, Iter> make_spanner(Container &container)
|
||||
{
|
||||
return Spanner<typename Container::value_type, Head, Iter>(container);
|
||||
}
|
||||
|
||||
// This template is an attempt to turn a shared_ptr to a container
|
||||
// into a range view that can be cheaply passed around.
|
||||
// It probably doesn't quite work in the general case.
|
||||
template <class Container, class Head = shared_ptr<typename Container::value_type>, class Iter = typename Container::value_type *>
|
||||
Spanner<typename Container::value_type, Head, Iter> make_spanner(shared_ptr<Container> &cont_ptr)
|
||||
{
|
||||
shared_ptr<typename Container::value_type> head(cont_ptr, cont_ptr->data());
|
||||
size_t const size = cont_ptr->size();
|
||||
return Spanner<typename Container::value_type, Head, Iter>(size, head);
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
template <class Container>
|
||||
Spanner<T, Head, Iter>::Spanner(Container &container) :
|
||||
m_begin(container.data()),
|
||||
m_size(container.size())
|
||||
{
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
Spanner<T, Head, Iter>::Spanner(head_pointer begin, iterator end) :
|
||||
m_begin(begin),
|
||||
m_size(end - begin)
|
||||
{
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
Spanner<T, Head, Iter>::Spanner(size_t size, head_pointer begin) :
|
||||
m_begin(begin),
|
||||
m_size(size)
|
||||
{
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::iterator
|
||||
Spanner<T, Head, Iter>::erase(iterator first, iterator last)
|
||||
{
|
||||
auto end = m_begin + m_size;
|
||||
if (first == m_begin) {
|
||||
THROW_CHECK0(invalid_argument, last <= end);
|
||||
m_begin = last;
|
||||
return last;
|
||||
}
|
||||
if (last == end) {
|
||||
THROW_CHECK0(invalid_argument, m_begin <= first);
|
||||
m_size = first - m_begin;
|
||||
return first;
|
||||
}
|
||||
THROW_ERROR(invalid_argument, "first != begin() and last != end()");
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::iterator
|
||||
Spanner<T, Head, Iter>::erase(iterator first)
|
||||
{
|
||||
return erase(first, first + 1);
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::value_type &
|
||||
Spanner<T, Head, Iter>::operator[](size_t n) const
|
||||
{
|
||||
return at(n);
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
void
|
||||
Spanner<T, Head, Iter>::clear()
|
||||
{
|
||||
m_begin = head_pointer();
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
bool
|
||||
Spanner<T, Head, Iter>::empty() const
|
||||
{
|
||||
return m_size == 0;
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
size_t
|
||||
Spanner<T, Head, Iter>::size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::value_type *
|
||||
Spanner<T, Head, Iter>::data() const
|
||||
{
|
||||
return &(*m_begin);
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::iterator
|
||||
Spanner<T, Head, Iter>::begin() const
|
||||
{
|
||||
return data();
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::iterator
|
||||
Spanner<T, Head, Iter>::end() const
|
||||
{
|
||||
return data() + m_size;
|
||||
}
|
||||
|
||||
template <class T, class Head, class Iter>
|
||||
typename Spanner<T, Head, Iter>::value_type &
|
||||
Spanner<T, Head, Iter>::at(size_t n) const
|
||||
{
|
||||
THROW_CHECK2(out_of_range, n, size(), n < size());
|
||||
return *(data() + n);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // CRUCIBLE_SPANNER_H
|
@@ -11,23 +11,6 @@
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
// Zero-initialize a base class object (usually a C struct)
|
||||
template <class Base>
|
||||
void
|
||||
memset_zero(Base *that)
|
||||
{
|
||||
memset(that, 0, sizeof(Base));
|
||||
}
|
||||
|
||||
// Copy a base class object (usually a C struct) into a vector<uint8_t>
|
||||
template <class Base>
|
||||
vector<uint8_t>
|
||||
vector_copy_struct(Base *that)
|
||||
{
|
||||
const uint8_t *begin_that = reinterpret_cast<const uint8_t *>(static_cast<const Base *>(that));
|
||||
return vector<uint8_t>(begin_that, begin_that + sizeof(Base));
|
||||
}
|
||||
|
||||
// int->hex conversion with sprintf
|
||||
string to_hex(uint64_t i);
|
||||
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
@@ -92,92 +93,92 @@ namespace crucible {
|
||||
/// Gets the current number of active workers
|
||||
static size_t get_thread_count();
|
||||
|
||||
/// Gets the current load tracking statistics
|
||||
struct LoadStats {
|
||||
/// Current load extracted from last two 5-second load average samples
|
||||
double current_load;
|
||||
/// Target thread count computed from previous thread count and current load
|
||||
double thread_target;
|
||||
/// Load average for last 60 seconds
|
||||
double loadavg;
|
||||
};
|
||||
static LoadStats get_current_load();
|
||||
|
||||
/// Drop the current queue and discard new Tasks without
|
||||
/// running them. Currently executing tasks are not
|
||||
/// affected (use set_thread_count(0) to wait for those
|
||||
/// to complete).
|
||||
static void cancel();
|
||||
};
|
||||
|
||||
// Barrier executes waiting Tasks once the last BarrierLock
|
||||
// is released. Multiple unique Tasks may be scheduled while
|
||||
// BarrierLocks exist and all will be run() at once upon
|
||||
// release. If no BarrierLocks exist, Tasks are executed
|
||||
// immediately upon insertion.
|
||||
/// Stop running any new Tasks. All existing
|
||||
/// Consumer threads will exit. Does not affect queue.
|
||||
/// Does not wait for threads to exit. Reversible.
|
||||
static void pause(bool paused = true);
|
||||
};
|
||||
|
||||
class BarrierState;
|
||||
|
||||
class BarrierLock {
|
||||
shared_ptr<BarrierState> m_barrier_state;
|
||||
BarrierLock(shared_ptr<BarrierState> pbs);
|
||||
friend class Barrier;
|
||||
public:
|
||||
// Release this Lock immediately and permanently
|
||||
void release();
|
||||
};
|
||||
|
||||
/// Barrier delays the execution of one or more Tasks.
|
||||
/// The Tasks are executed when the last shared reference to the
|
||||
/// BarrierState is released. Copies of Barrier objects refer
|
||||
/// to the same Barrier state.
|
||||
class Barrier {
|
||||
shared_ptr<BarrierState> m_barrier_state;
|
||||
|
||||
Barrier(shared_ptr<BarrierState> pbs);
|
||||
public:
|
||||
Barrier();
|
||||
|
||||
// Prevent execution of tasks behind barrier until
|
||||
// BarrierLock destructor or release() method is called.
|
||||
BarrierLock lock();
|
||||
|
||||
// Schedule a task for execution when no Locks exist
|
||||
/// Schedule a task for execution when last Barrier is released.
|
||||
void insert_task(Task t);
|
||||
|
||||
/// Release this reference to the barrier state.
|
||||
/// Last released reference executes the task.
|
||||
/// Barrier can only be released once, after which the
|
||||
/// object can no longer be used.
|
||||
void release();
|
||||
};
|
||||
|
||||
// Exclusion provides exclusive access to a ExclusionLock.
|
||||
// One Task will be able to obtain the ExclusionLock; other Tasks
|
||||
// may schedule themselves for re-execution after the ExclusionLock
|
||||
// is released.
|
||||
|
||||
class ExclusionState;
|
||||
class Exclusion;
|
||||
|
||||
class ExclusionLock {
|
||||
shared_ptr<ExclusionState> m_exclusion_state;
|
||||
ExclusionLock(shared_ptr<ExclusionState> pes);
|
||||
ExclusionLock() = default;
|
||||
shared_ptr<Task> m_owner;
|
||||
ExclusionLock(shared_ptr<Task> owner);
|
||||
friend class Exclusion;
|
||||
public:
|
||||
// Calls release()
|
||||
~ExclusionLock();
|
||||
/// Explicit default constructor because we have other kinds
|
||||
ExclusionLock() = default;
|
||||
|
||||
// Release this Lock immediately and permanently
|
||||
/// Release this Lock immediately and permanently
|
||||
void release();
|
||||
|
||||
// Test for locked state
|
||||
/// Test for locked state
|
||||
operator bool() const;
|
||||
};
|
||||
|
||||
class Exclusion {
|
||||
shared_ptr<ExclusionState> m_exclusion_state;
|
||||
mutex m_mutex;
|
||||
weak_ptr<Task> m_owner;
|
||||
|
||||
Exclusion(shared_ptr<ExclusionState> pes);
|
||||
public:
|
||||
Exclusion(const string &title);
|
||||
/// Attempt to obtain a Lock. If successful, current Task
|
||||
/// owns the Lock until the ExclusionLock is released
|
||||
/// (it is the ExclusionLock that owns the lock, so it can
|
||||
/// be passed to other Tasks or threads, but this is not
|
||||
/// recommended practice).
|
||||
/// If not successful, current Task is appended to the
|
||||
/// task that currently holds the lock. Current task is
|
||||
/// expected to release any other ExclusionLock
|
||||
/// objects it holds, and exit its Task function.
|
||||
ExclusionLock try_lock(const Task &task);
|
||||
|
||||
// Attempt to obtain a Lock. If successful, current Task
|
||||
// owns the Lock until the ExclusionLock is released
|
||||
// (it is the ExclusionLock that owns the lock, so it can
|
||||
// be passed to other Tasks or threads, but this is not
|
||||
// recommended practice).
|
||||
// If not successful, current Task is expected to call
|
||||
// insert_task(current_task()), release any ExclusionLock
|
||||
// objects it holds, and exit its Task function.
|
||||
ExclusionLock try_lock();
|
||||
|
||||
// Execute Task when Exclusion is unlocked (possibly
|
||||
// immediately).
|
||||
void insert_task(Task t = Task::current_task());
|
||||
/// Execute Task when Exclusion is unlocked (possibly
|
||||
/// immediately).
|
||||
void insert_task(const Task &t);
|
||||
};
|
||||
|
||||
/// Wrapper around pthread_setname_np which handles length limits
|
||||
void pthread_setname(const string &name);
|
||||
|
||||
/// Wrapper around pthread_getname_np for symmetry
|
||||
string pthread_getname();
|
||||
}
|
||||
|
||||
#endif // CRUCIBLE_TASK_H
|
||||
|
14
include/crucible/uname.h
Normal file
14
include/crucible/uname.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef CRUCIBLE_UNAME_H
|
||||
#define CRUCIBLE_UNAME_H
|
||||
|
||||
#include <sys/utsname.h>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
struct Uname : public utsname {
|
||||
Uname();
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
23
lib/Makefile
23
lib/Makefile
@@ -1,9 +1,9 @@
|
||||
TAG ?= $(shell git describe --always --dirty || echo UNKNOWN)
|
||||
|
||||
default: libcrucible.a
|
||||
%.a: Makefile
|
||||
|
||||
CRUCIBLE_OBJS = \
|
||||
bytevector.o \
|
||||
btrfs-tree.o \
|
||||
chatter.o \
|
||||
city.o \
|
||||
cleanup.o \
|
||||
@@ -12,12 +12,14 @@ CRUCIBLE_OBJS = \
|
||||
extentwalker.o \
|
||||
fd.o \
|
||||
fs.o \
|
||||
multilock.o \
|
||||
ntoa.o \
|
||||
path.o \
|
||||
process.o \
|
||||
string.o \
|
||||
task.o \
|
||||
time.o \
|
||||
uname.o \
|
||||
|
||||
include ../makeflags
|
||||
-include ../localconf
|
||||
@@ -28,24 +30,13 @@ BEES_LDFLAGS = $(LDFLAGS)
|
||||
configure.h: configure.h.in
|
||||
$(TEMPLATE_COMPILER)
|
||||
|
||||
.depends:
|
||||
mkdir -p $@
|
||||
|
||||
.depends/%.dep: %.cc configure.h Makefile | .depends
|
||||
%.dep: %.cc configure.h Makefile
|
||||
$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<
|
||||
|
||||
depends.mk: $(CRUCIBLE_OBJS:%.o=.depends/%.dep)
|
||||
cat $^ > $@.new
|
||||
mv -f $@.new $@
|
||||
|
||||
.version.cc: configure.h Makefile ../makeflags $(CRUCIBLE_OBJS:.o=.cc) ../include/crucible/*.h
|
||||
echo "namespace crucible { const char *VERSION = \"$(TAG)\"; }" > $@.new
|
||||
if ! cmp "$@.new" "$@"; then mv -fv $@.new $@; fi
|
||||
|
||||
include depends.mk
|
||||
include $(CRUCIBLE_OBJS:%.o=%.dep)
|
||||
|
||||
%.o: %.cc ../makeflags
|
||||
$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<
|
||||
|
||||
libcrucible.a: $(CRUCIBLE_OBJS) .version.o
|
||||
libcrucible.a: $(CRUCIBLE_OBJS)
|
||||
$(AR) rcs $@ $^
|
||||
|
684
lib/btrfs-tree.cc
Normal file
684
lib/btrfs-tree.cc
Normal file
@@ -0,0 +1,684 @@
|
||||
#include "crucible/btrfs-tree.h"
|
||||
#include "crucible/btrfs.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/hexdump.h"
|
||||
#include "crucible/seeker.h"
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::extent_begin() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
|
||||
return m_objectid;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::extent_end() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
|
||||
return m_objectid + m_offset;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::extent_generation() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_extent_item::generation, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::root_ref_dirid() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
|
||||
return btrfs_get_member(&btrfs_root_ref::dirid, m_data);
|
||||
}
|
||||
|
||||
string
|
||||
BtrfsTreeItem::root_ref_name() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
|
||||
const auto name_len = btrfs_get_member(&btrfs_root_ref::name_len, m_data);
|
||||
const auto name_start = sizeof(struct btrfs_root_ref);
|
||||
const auto name_end = name_len + name_start;
|
||||
THROW_CHECK2(runtime_error, m_data.size(), name_end, m_data.size() >= name_end);
|
||||
return string(m_data.data() + name_start, m_data.data() + name_end);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::root_ref_parent_rootid() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_BACKREF_KEY);
|
||||
return offset();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::root_flags() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_ROOT_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_root_item::flags, m_data);
|
||||
}
|
||||
|
||||
ostream &
|
||||
operator<<(ostream &os, const BtrfsTreeItem &bti)
|
||||
{
|
||||
os << "BtrfsTreeItem {"
|
||||
<< " objectid = " << to_hex(bti.objectid())
|
||||
<< ", type = " << btrfs_search_type_ntoa(bti.type())
|
||||
<< ", offset = " << to_hex(bti.offset())
|
||||
<< ", transid = " << bti.transid()
|
||||
<< ", data = ";
|
||||
hexdump(os, bti.data());
|
||||
return os;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::block_group_flags() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_BLOCK_GROUP_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_block_group_item::flags, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::block_group_used() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_BLOCK_GROUP_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_block_group_item::used, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::chunk_length() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_CHUNK_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_chunk::length, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::chunk_type() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_CHUNK_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_chunk::type, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::dev_extent_chunk_offset() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_EXTENT_KEY);
|
||||
return btrfs_get_member(&btrfs_dev_extent::chunk_offset, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::dev_extent_length() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_EXTENT_KEY);
|
||||
return btrfs_get_member(&btrfs_dev_extent::length, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::dev_item_total_bytes() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_dev_item::total_bytes, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::dev_item_bytes_used() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_DEV_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_dev_item::bytes_used, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::inode_size() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_INODE_ITEM_KEY);
|
||||
return btrfs_get_member(&btrfs_inode_item::size, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::file_extent_logical_bytes() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
const auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
|
||||
switch (file_extent_item_type) {
|
||||
case BTRFS_FILE_EXTENT_INLINE:
|
||||
return btrfs_get_member(&btrfs_file_extent_item::ram_bytes, m_data);
|
||||
case BTRFS_FILE_EXTENT_PREALLOC:
|
||||
case BTRFS_FILE_EXTENT_REG:
|
||||
return btrfs_get_member(&btrfs_file_extent_item::num_bytes, m_data);
|
||||
default:
|
||||
THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::file_extent_offset() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
const auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
|
||||
switch (file_extent_item_type) {
|
||||
case BTRFS_FILE_EXTENT_INLINE:
|
||||
THROW_ERROR(invalid_argument, "extent is inline " << *this);
|
||||
case BTRFS_FILE_EXTENT_PREALLOC:
|
||||
case BTRFS_FILE_EXTENT_REG:
|
||||
return btrfs_get_member(&btrfs_file_extent_item::offset, m_data);
|
||||
default:
|
||||
THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type << " in " << *this);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::file_extent_generation() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
return btrfs_get_member(&btrfs_file_extent_item::generation, m_data);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeItem::file_extent_bytenr() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
auto file_extent_item_type = btrfs_get_member(&btrfs_file_extent_item::type, m_data);
|
||||
switch (file_extent_item_type) {
|
||||
case BTRFS_FILE_EXTENT_INLINE:
|
||||
THROW_ERROR(invalid_argument, "extent is inline " << *this);
|
||||
case BTRFS_FILE_EXTENT_PREALLOC:
|
||||
case BTRFS_FILE_EXTENT_REG:
|
||||
return btrfs_get_member(&btrfs_file_extent_item::disk_bytenr, m_data);
|
||||
default:
|
||||
THROW_ERROR(runtime_error, "unknown btrfs_file_extent_item type " << file_extent_item_type << " in " << *this);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t
|
||||
BtrfsTreeItem::file_extent_type() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
return btrfs_get_member(&btrfs_file_extent_item::type, m_data);
|
||||
}
|
||||
|
||||
btrfs_compression_type
|
||||
BtrfsTreeItem::file_extent_compression() const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, btrfs_search_type_ntoa(m_type), m_type == BTRFS_EXTENT_DATA_KEY);
|
||||
return static_cast<btrfs_compression_type>(btrfs_get_member(&btrfs_file_extent_item::compression, m_data));
|
||||
}
|
||||
|
||||
BtrfsTreeItem::BtrfsTreeItem(const BtrfsIoctlSearchHeader &bish) :
|
||||
m_objectid(bish.objectid),
|
||||
m_offset(bish.offset),
|
||||
m_transid(bish.transid),
|
||||
m_data(bish.m_data),
|
||||
m_type(bish.type)
|
||||
{
|
||||
}
|
||||
|
||||
BtrfsTreeItem &
|
||||
BtrfsTreeItem::operator=(const BtrfsIoctlSearchHeader &bish)
|
||||
{
|
||||
m_objectid = bish.objectid;
|
||||
m_offset = bish.offset;
|
||||
m_transid = bish.transid;
|
||||
m_data = bish.m_data;
|
||||
m_type = bish.type;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsTreeItem::operator!() const
|
||||
{
|
||||
return m_transid == 0 && m_objectid == 0 && m_offset == 0 && m_type == 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::block_size() const
|
||||
{
|
||||
return m_block_size;
|
||||
}
|
||||
|
||||
BtrfsTreeFetcher::BtrfsTreeFetcher(Fd new_fd) :
|
||||
m_fd(new_fd)
|
||||
{
|
||||
BtrfsIoctlFsInfoArgs bifia;
|
||||
bifia.do_ioctl(fd());
|
||||
m_block_size = bifia.sectorsize;
|
||||
THROW_CHECK1(runtime_error, m_block_size, m_block_size > 0);
|
||||
// We don't believe sector sizes that aren't multiples of 4K
|
||||
THROW_CHECK1(runtime_error, m_block_size, (m_block_size % 4096) == 0);
|
||||
m_lookbehind_size = 128 * 1024;
|
||||
m_scale_size = m_block_size;
|
||||
}
|
||||
|
||||
Fd
|
||||
BtrfsTreeFetcher::fd() const
|
||||
{
|
||||
return m_fd;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::fd(Fd fd)
|
||||
{
|
||||
m_fd = fd;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::type(uint8_t type)
|
||||
{
|
||||
m_type = type;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::tree(uint64_t tree)
|
||||
{
|
||||
m_tree = tree;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::transid(uint64_t min_transid, uint64_t max_transid)
|
||||
{
|
||||
m_min_transid = min_transid;
|
||||
m_max_transid = max_transid;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::lookbehind_size() const
|
||||
{
|
||||
return m_lookbehind_size;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::lookbehind_size(uint64_t lookbehind_size)
|
||||
{
|
||||
m_lookbehind_size = lookbehind_size;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::scale_size() const
|
||||
{
|
||||
return m_scale_size;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::scale_size(uint64_t scale_size)
|
||||
{
|
||||
m_scale_size = scale_size;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t object)
|
||||
{
|
||||
(void)object;
|
||||
// btrfs allows tree ID 0 meaning the current tree, but we do not.
|
||||
THROW_CHECK0(invalid_argument, m_tree != 0);
|
||||
sk.tree_id = m_tree;
|
||||
sk.min_type = m_type;
|
||||
sk.max_type = m_type;
|
||||
sk.min_transid = m_min_transid;
|
||||
sk.max_transid = m_max_transid;
|
||||
sk.nr_items = 1;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeFetcher::next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
key.next_min(hdr, m_type);
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsTreeFetcher::at(uint64_t logical)
|
||||
{
|
||||
BtrfsIoctlSearchKey &sk = m_sk;
|
||||
fill_sk(sk, logical);
|
||||
// Exact match, should return 0 or 1 items
|
||||
sk.max_type = sk.min_type;
|
||||
sk.nr_items = 1;
|
||||
sk.do_ioctl(fd());
|
||||
THROW_CHECK1(runtime_error, sk.m_result.size(), sk.m_result.size() < 2);
|
||||
for (const auto &i : sk.m_result) {
|
||||
if (hdr_logical(i) == logical && hdr_match(i)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return BtrfsTreeItem();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::scale_logical(const uint64_t logical) const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, logical, (logical % m_scale_size) == 0 || logical == s_max_logical);
|
||||
return logical / m_scale_size;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::scaled_max_logical() const
|
||||
{
|
||||
return scale_logical(s_max_logical);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeFetcher::unscale_logical(const uint64_t logical) const
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, logical, logical <= scaled_max_logical());
|
||||
if (logical == scaled_max_logical()) {
|
||||
return s_max_logical;
|
||||
}
|
||||
return logical * scale_size();
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsTreeFetcher::rlower_bound(uint64_t logical)
|
||||
{
|
||||
#if 0
|
||||
#define BTFRLB_DEBUG(x) do { cerr << x; } while (false)
|
||||
#else
|
||||
#define BTFRLB_DEBUG(x) do { } while (false)
|
||||
#endif
|
||||
BtrfsTreeItem closest_item;
|
||||
uint64_t closest_logical = 0;
|
||||
BtrfsIoctlSearchKey &sk = m_sk;
|
||||
size_t loops = 0;
|
||||
BTFRLB_DEBUG("rlower_bound: " << to_hex(logical) << endl);
|
||||
seek_backward(scale_logical(logical), [&](uint64_t lower_bound, uint64_t upper_bound) {
|
||||
++loops;
|
||||
fill_sk(sk, unscale_logical(min(scaled_max_logical(), lower_bound)));
|
||||
set<uint64_t> rv;
|
||||
do {
|
||||
sk.nr_items = 4;
|
||||
sk.do_ioctl(fd());
|
||||
BTFRLB_DEBUG("fetch: loop " << loops << " lower_bound..upper_bound " << to_hex(lower_bound) << ".." << to_hex(upper_bound));
|
||||
for (auto &i : sk.m_result) {
|
||||
next_sk(sk, i);
|
||||
const auto this_logical = hdr_logical(i);
|
||||
const auto scaled_hdr_logical = scale_logical(this_logical);
|
||||
BTFRLB_DEBUG(" " << to_hex(scaled_hdr_logical));
|
||||
if (hdr_match(i)) {
|
||||
if (this_logical <= logical && this_logical > closest_logical) {
|
||||
closest_logical = this_logical;
|
||||
closest_item = i;
|
||||
}
|
||||
BTFRLB_DEBUG("(match)");
|
||||
rv.insert(scaled_hdr_logical);
|
||||
}
|
||||
if (scaled_hdr_logical > upper_bound || hdr_stop(i)) {
|
||||
if (scaled_hdr_logical >= upper_bound) {
|
||||
BTFRLB_DEBUG("(" << to_hex(scaled_hdr_logical) << " >= " << to_hex(upper_bound) << ")");
|
||||
}
|
||||
if (hdr_stop(i)) {
|
||||
rv.insert(numeric_limits<uint64_t>::max());
|
||||
BTFRLB_DEBUG("(stop)");
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
BTFRLB_DEBUG("(cont'd)");
|
||||
}
|
||||
}
|
||||
BTFRLB_DEBUG(endl);
|
||||
// We might get a search result that contains only non-matching items.
|
||||
// Keep looping until we find any matching item or we run out of tree.
|
||||
} while (rv.empty() && !sk.m_result.empty());
|
||||
return rv;
|
||||
}, scale_logical(lookbehind_size()));
|
||||
return closest_item;
|
||||
#undef BTFRLB_DEBUG
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsTreeFetcher::lower_bound(uint64_t logical)
|
||||
{
|
||||
BtrfsIoctlSearchKey &sk = m_sk;
|
||||
fill_sk(sk, logical);
|
||||
do {
|
||||
assert(sk.max_offset == s_max_logical);
|
||||
sk.do_ioctl(fd());
|
||||
for (const auto &i : sk.m_result) {
|
||||
if (hdr_match(i)) {
|
||||
return i;
|
||||
}
|
||||
if (hdr_stop(i)) {
|
||||
return BtrfsTreeItem();
|
||||
}
|
||||
next_sk(sk, i);
|
||||
}
|
||||
} while (!sk.m_result.empty());
|
||||
return BtrfsTreeItem();
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsTreeFetcher::next(uint64_t logical)
|
||||
{
|
||||
const auto scaled_logical = scale_logical(logical);
|
||||
if (scaled_logical + 1 > scaled_max_logical()) {
|
||||
return BtrfsTreeItem();
|
||||
}
|
||||
return lower_bound(unscale_logical(scaled_logical + 1));
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsTreeFetcher::prev(uint64_t logical)
|
||||
{
|
||||
const auto scaled_logical = scale_logical(logical);
|
||||
if (scaled_logical < 1) {
|
||||
return BtrfsTreeItem();
|
||||
}
|
||||
return rlower_bound(unscale_logical(scaled_logical - 1));
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeObjectFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t object)
|
||||
{
|
||||
BtrfsTreeFetcher::fill_sk(sk, object);
|
||||
sk.min_offset = 0;
|
||||
sk.max_offset = numeric_limits<decltype(sk.max_offset)>::max();
|
||||
sk.min_objectid = object;
|
||||
sk.max_objectid = numeric_limits<decltype(sk.max_objectid)>::max();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeObjectFetcher::hdr_logical(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
return hdr.objectid;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsTreeObjectFetcher::hdr_match(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
// If you're calling this method without overriding it, you should have set type first
|
||||
assert(m_type);
|
||||
return hdr.type == m_type;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsTreeObjectFetcher::hdr_stop(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
return false;
|
||||
(void)hdr;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeOffsetFetcher::hdr_logical(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
return hdr.offset;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsTreeOffsetFetcher::hdr_match(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
assert(m_type);
|
||||
return hdr.type == m_type && hdr.objectid == m_objectid;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsTreeOffsetFetcher::hdr_stop(const BtrfsIoctlSearchHeader &hdr)
|
||||
{
|
||||
assert(m_type);
|
||||
return hdr.objectid > m_objectid || hdr.type > m_type;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeOffsetFetcher::objectid(uint64_t objectid)
|
||||
{
|
||||
m_objectid = objectid;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsTreeOffsetFetcher::objectid() const
|
||||
{
|
||||
return m_objectid;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsTreeOffsetFetcher::fill_sk(BtrfsIoctlSearchKey &sk, uint64_t offset)
|
||||
{
|
||||
BtrfsTreeFetcher::fill_sk(sk, offset);
|
||||
sk.min_offset = offset;
|
||||
sk.max_offset = numeric_limits<decltype(sk.max_offset)>::max();
|
||||
sk.min_objectid = m_objectid;
|
||||
sk.max_objectid = m_objectid;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsCsumTreeFetcher::get_sums(uint64_t const logical, size_t count, function<void(uint64_t logical, const uint8_t *buf, size_t bytes)> output)
|
||||
{
|
||||
#if 0
|
||||
#define BCTFGS_DEBUG(x) do { cerr << x; } while (false)
|
||||
#else
|
||||
#define BCTFGS_DEBUG(x) do { } while (false)
|
||||
#endif
|
||||
const uint64_t logical_end = logical + count * block_size();
|
||||
BtrfsTreeItem bti = rlower_bound(logical);
|
||||
size_t loops = 0;
|
||||
BCTFGS_DEBUG("get_sums " << to_hex(logical) << ".." << to_hex(logical_end) << endl);
|
||||
while (!!bti) {
|
||||
BCTFGS_DEBUG("get_sums[" << loops << "]: " << bti << endl);
|
||||
++loops;
|
||||
// Reject wrong type or objectid
|
||||
THROW_CHECK1(runtime_error, bti.type(), bti.type() == BTRFS_EXTENT_CSUM_KEY);
|
||||
THROW_CHECK1(runtime_error, bti.objectid(), bti.objectid() == BTRFS_EXTENT_CSUM_OBJECTID);
|
||||
// Is this object in range?
|
||||
const uint64_t data_logical = bti.offset();
|
||||
if (data_logical >= logical_end) {
|
||||
// csum object is past end of range, we are done
|
||||
return;
|
||||
}
|
||||
// Figure out how long this csum item is in various units
|
||||
const size_t csum_byte_count = bti.data().size();
|
||||
THROW_CHECK1(runtime_error, csum_byte_count, (csum_byte_count % m_sum_size) == 0);
|
||||
THROW_CHECK1(runtime_error, csum_byte_count, csum_byte_count > 0);
|
||||
const size_t csum_count = csum_byte_count / m_sum_size;
|
||||
const uint64_t data_byte_count = csum_count * block_size();
|
||||
const uint64_t data_logical_end = data_logical + data_byte_count;
|
||||
if (data_logical_end <= logical) {
|
||||
// too low, look at next item
|
||||
bti = lower_bound(logical);
|
||||
continue;
|
||||
}
|
||||
// There is some overlap?
|
||||
const uint64_t overlap_begin = max(logical, data_logical);
|
||||
const uint64_t overlap_end = min(logical_end, data_logical_end);
|
||||
THROW_CHECK2(runtime_error, overlap_begin, overlap_end, overlap_begin < overlap_end);
|
||||
const uint64_t overlap_offset = overlap_begin - data_logical;
|
||||
THROW_CHECK1(runtime_error, overlap_offset, (overlap_offset % block_size()) == 0);
|
||||
const uint64_t overlap_index = overlap_offset * m_sum_size / block_size();
|
||||
const uint64_t overlap_byte_count = overlap_end - overlap_begin;
|
||||
const uint64_t overlap_csum_byte_count = overlap_byte_count * m_sum_size / block_size();
|
||||
// Can't be bigger than a btrfs item
|
||||
THROW_CHECK1(runtime_error, overlap_index, overlap_index < 65536);
|
||||
THROW_CHECK1(runtime_error, overlap_csum_byte_count, overlap_csum_byte_count < 65536);
|
||||
// Yes, process the overlap
|
||||
output(overlap_begin, bti.data().data() + overlap_index, overlap_csum_byte_count);
|
||||
// Advance
|
||||
bti = lower_bound(overlap_end);
|
||||
}
|
||||
#undef BCTFGS_DEBUG
|
||||
}
|
||||
|
||||
uint32_t
|
||||
BtrfsCsumTreeFetcher::sum_type() const
|
||||
{
|
||||
return m_sum_type;
|
||||
}
|
||||
|
||||
size_t
|
||||
BtrfsCsumTreeFetcher::sum_size() const
|
||||
{
|
||||
return m_sum_size;
|
||||
}
|
||||
|
||||
BtrfsCsumTreeFetcher::BtrfsCsumTreeFetcher(const Fd &new_fd) :
|
||||
BtrfsTreeOffsetFetcher(new_fd)
|
||||
{
|
||||
type(BTRFS_EXTENT_CSUM_KEY);
|
||||
tree(BTRFS_CSUM_TREE_OBJECTID);
|
||||
objectid(BTRFS_EXTENT_CSUM_OBJECTID);
|
||||
BtrfsIoctlFsInfoArgs bifia;
|
||||
bifia.do_ioctl(fd());
|
||||
m_sum_type = static_cast<btrfs_compression_type>(bifia.csum_type());
|
||||
m_sum_size = bifia.csum_size();
|
||||
if (m_sum_type == BTRFS_CSUM_TYPE_CRC32 && m_sum_size == 0) {
|
||||
// Older kernel versions don't fill in this field
|
||||
m_sum_size = 4;
|
||||
}
|
||||
THROW_CHECK1(runtime_error, m_sum_size, m_sum_size > 0);
|
||||
}
|
||||
|
||||
BtrfsExtentItemFetcher::BtrfsExtentItemFetcher(const Fd &new_fd) :
|
||||
BtrfsTreeObjectFetcher(new_fd)
|
||||
{
|
||||
tree(BTRFS_EXTENT_TREE_OBJECTID);
|
||||
type(BTRFS_EXTENT_ITEM_KEY);
|
||||
}
|
||||
|
||||
BtrfsExtentDataFetcher::BtrfsExtentDataFetcher(const Fd &new_fd) :
|
||||
BtrfsTreeOffsetFetcher(new_fd)
|
||||
{
|
||||
type(BTRFS_EXTENT_DATA_KEY);
|
||||
}
|
||||
|
||||
BtrfsFsTreeFetcher::BtrfsFsTreeFetcher(const Fd &new_fd, uint64_t subvol) :
|
||||
BtrfsTreeObjectFetcher(new_fd)
|
||||
{
|
||||
tree(subvol);
|
||||
type(BTRFS_EXTENT_DATA_KEY);
|
||||
scale_size(1);
|
||||
}
|
||||
|
||||
BtrfsInodeFetcher::BtrfsInodeFetcher(const Fd &fd) :
|
||||
BtrfsTreeObjectFetcher(fd)
|
||||
{
|
||||
type(BTRFS_INODE_ITEM_KEY);
|
||||
scale_size(1);
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsInodeFetcher::stat(uint64_t subvol, uint64_t inode)
|
||||
{
|
||||
tree(subvol);
|
||||
const auto item = at(inode);
|
||||
if (!!item) {
|
||||
THROW_CHECK2(runtime_error, item.objectid(), inode, inode == item.objectid());
|
||||
THROW_CHECK2(runtime_error, item.type(), BTRFS_INODE_ITEM_KEY, item.type() == BTRFS_INODE_ITEM_KEY);
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
BtrfsRootFetcher::BtrfsRootFetcher(const Fd &fd) :
|
||||
BtrfsTreeObjectFetcher(fd)
|
||||
{
|
||||
tree(BTRFS_ROOT_TREE_OBJECTID);
|
||||
type(BTRFS_ROOT_ITEM_KEY);
|
||||
scale_size(1);
|
||||
}
|
||||
|
||||
BtrfsTreeItem
|
||||
BtrfsRootFetcher::root(uint64_t subvol)
|
||||
{
|
||||
const auto item = at(subvol);
|
||||
if (!!item) {
|
||||
THROW_CHECK2(runtime_error, item.objectid(), subvol, subvol == item.objectid());
|
||||
THROW_CHECK2(runtime_error, item.type(), BTRFS_ROOT_ITEM_KEY, item.type() == BTRFS_ROOT_ITEM_KEY);
|
||||
}
|
||||
return item;
|
||||
}
|
||||
}
|
190
lib/bytevector.cc
Normal file
190
lib/bytevector.cc
Normal file
@@ -0,0 +1,190 @@
|
||||
#include "crucible/bytevector.h"
|
||||
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/hexdump.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
ByteVector::iterator
|
||||
ByteVector::begin() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_ptr.get();
|
||||
}
|
||||
|
||||
ByteVector::iterator
|
||||
ByteVector::end() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_ptr.get() + m_size;
|
||||
}
|
||||
|
||||
size_t
|
||||
ByteVector::size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
bool
|
||||
ByteVector::empty() const
|
||||
{
|
||||
return !m_ptr || !m_size;
|
||||
}
|
||||
|
||||
void
|
||||
ByteVector::clear()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_ptr.reset();
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
ByteVector::value_type&
|
||||
ByteVector::operator[](size_t size) const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_ptr.get()[size];
|
||||
}
|
||||
|
||||
ByteVector::ByteVector(const ByteVector &that)
|
||||
{
|
||||
unique_lock<mutex> lock(that.m_mutex);
|
||||
m_ptr = that.m_ptr;
|
||||
m_size = that.m_size;
|
||||
}
|
||||
|
||||
ByteVector&
|
||||
ByteVector::operator=(const ByteVector &that)
|
||||
{
|
||||
// If &that == this, there's no need to do anything, but
|
||||
// especially don't try to lock the same mutex twice.
|
||||
if (&m_mutex != &that.m_mutex) {
|
||||
unique_lock<mutex> lock_this(m_mutex, defer_lock);
|
||||
unique_lock<mutex> lock_that(that.m_mutex, defer_lock);
|
||||
lock(lock_this, lock_that);
|
||||
m_ptr = that.m_ptr;
|
||||
m_size = that.m_size;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
ByteVector::ByteVector(const ByteVector &that, size_t start, size_t length)
|
||||
{
|
||||
THROW_CHECK0(out_of_range, that.m_ptr);
|
||||
THROW_CHECK2(out_of_range, start, that.m_size, start <= that.m_size);
|
||||
THROW_CHECK2(out_of_range, start + length, that.m_size + length, start + length <= that.m_size + length);
|
||||
m_ptr = Pointer(that.m_ptr, that.m_ptr.get() + start);
|
||||
m_size = length;
|
||||
}
|
||||
|
||||
ByteVector
|
||||
ByteVector::at(size_t start, size_t length) const
|
||||
{
|
||||
return ByteVector(*this, start, length);
|
||||
}
|
||||
|
||||
ByteVector::value_type&
|
||||
ByteVector::at(size_t size) const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
THROW_CHECK0(out_of_range, m_ptr);
|
||||
THROW_CHECK2(out_of_range, size, m_size, size < m_size);
|
||||
return m_ptr.get()[size];
|
||||
}
|
||||
|
||||
static
|
||||
void *
|
||||
bv_allocate(size_t size)
|
||||
{
|
||||
#ifdef BEES_VALGRIND
|
||||
// XXX: only do this to shut up valgrind
|
||||
return calloc(1, size);
|
||||
#else
|
||||
return malloc(size);
|
||||
#endif
|
||||
}
|
||||
|
||||
ByteVector::ByteVector(size_t size)
|
||||
{
|
||||
m_ptr = Pointer(static_cast<value_type*>(bv_allocate(size)), free);
|
||||
// bad_alloc doesn't fit THROW_CHECK's template
|
||||
THROW_CHECK0(runtime_error, m_ptr);
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
ByteVector::ByteVector(iterator begin, iterator end, size_t min_size)
|
||||
{
|
||||
const size_t size = end - begin;
|
||||
const size_t alloc_size = max(size, min_size);
|
||||
m_ptr = Pointer(static_cast<value_type*>(bv_allocate(alloc_size)), free);
|
||||
THROW_CHECK0(runtime_error, m_ptr);
|
||||
m_size = alloc_size;
|
||||
memcpy(m_ptr.get(), begin, size);
|
||||
}
|
||||
|
||||
bool
|
||||
ByteVector::operator==(const ByteVector &that) const
|
||||
{
|
||||
unique_lock<mutex> lock_this(m_mutex, defer_lock);
|
||||
unique_lock<mutex> lock_that(that.m_mutex, defer_lock);
|
||||
lock(lock_this, lock_that);
|
||||
if (!m_ptr) {
|
||||
return !that.m_ptr;
|
||||
}
|
||||
if (!that.m_ptr) {
|
||||
return false;
|
||||
}
|
||||
if (m_size != that.m_size) {
|
||||
return false;
|
||||
}
|
||||
if (m_ptr.get() == that.m_ptr.get()) {
|
||||
return true;
|
||||
}
|
||||
return !memcmp(m_ptr.get(), that.m_ptr.get(), m_size);
|
||||
}
|
||||
|
||||
void
|
||||
ByteVector::erase(iterator begin, iterator end)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
const size_t size = end - begin;
|
||||
if (!size) return;
|
||||
THROW_CHECK0(out_of_range, m_ptr);
|
||||
const iterator my_begin = m_ptr.get();
|
||||
const iterator my_end = my_begin + m_size;
|
||||
THROW_CHECK4(out_of_range, my_begin, begin, my_end, end, my_begin == begin || my_end == end);
|
||||
if (begin == my_begin) {
|
||||
if (end == my_end) {
|
||||
m_size = 0;
|
||||
m_ptr.reset();
|
||||
return;
|
||||
}
|
||||
m_ptr = Pointer(m_ptr, end);
|
||||
}
|
||||
m_size -= size;
|
||||
}
|
||||
|
||||
void
|
||||
ByteVector::erase(iterator begin)
|
||||
{
|
||||
erase(begin, begin + 1);
|
||||
}
|
||||
|
||||
ByteVector::value_type*
|
||||
ByteVector::data() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_ptr.get();
|
||||
}
|
||||
|
||||
ostream&
|
||||
operator<<(ostream &os, const ByteVector &bv) {
|
||||
unique_lock<mutex> lock(bv.m_mutex);
|
||||
hexdump(os, bv);
|
||||
return os;
|
||||
}
|
||||
}
|
@@ -496,7 +496,7 @@ namespace crucible {
|
||||
BtrfsExtentWalker::Vec
|
||||
BtrfsExtentWalker::get_extent_map(off_t pos)
|
||||
{
|
||||
BtrfsIoctlSearchKey sk(65536);
|
||||
BtrfsIoctlSearchKey sk;
|
||||
if (!m_root_fd) {
|
||||
m_root_fd = m_fd;
|
||||
}
|
||||
@@ -640,9 +640,7 @@ namespace crucible {
|
||||
ExtentWalker::get_extent_map(off_t pos)
|
||||
{
|
||||
EWLOG("get_extent_map(" << to_hex(pos) << ")");
|
||||
Fiemap fm;
|
||||
fm.fm_start = ranged_cast<uint64_t>(pos);
|
||||
fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos);
|
||||
Fiemap fm(ranged_cast<uint64_t>(pos), ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos));
|
||||
fm.m_max_count = fm.m_min_count = sc_extent_fetch_max;
|
||||
fm.do_ioctl(m_fd);
|
||||
Vec rv;
|
||||
|
52
lib/fd.cc
52
lib/fd.cc
@@ -361,8 +361,11 @@ namespace crucible {
|
||||
THROW_ERROR(invalid_argument, "pwrite: trying to write on a closed file descriptor");
|
||||
}
|
||||
int rv = ::pwrite(fd, buf, size, offset);
|
||||
if (rv != static_cast<int>(size)) {
|
||||
THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at offset " << offset);
|
||||
if (rv < 0) {
|
||||
THROW_ERRNO("pwrite: could not write " << size << " bytes at fd " << name_fd(fd) << " offset " << offset);
|
||||
}
|
||||
if (rv != static_cast<ssize_t>(size)) {
|
||||
THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at fd " << name_fd(fd) << " offset " << offset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +395,7 @@ namespace crucible {
|
||||
}
|
||||
THROW_ERRNO("read: " << size << " bytes");
|
||||
}
|
||||
if (rv > static_cast<int>(size)) {
|
||||
if (rv > static_cast<ssize_t>(size)) {
|
||||
THROW_ERROR(runtime_error, "read: somehow read more bytes (" << rv << ") than requested (" << size << ")");
|
||||
}
|
||||
if (rv == 0) break;
|
||||
@@ -441,8 +444,8 @@ namespace crucible {
|
||||
}
|
||||
THROW_ERRNO("pread: " << size << " bytes");
|
||||
}
|
||||
if (rv != static_cast<int>(size)) {
|
||||
THROW_ERROR(runtime_error, "pread: " << size << " bytes at offset " << offset << " returned " << rv);
|
||||
if (rv != static_cast<ssize_t>(size)) {
|
||||
THROW_ERROR(runtime_error, "pread: " << size << " bytes at fd " << name_fd(fd) << " offset " << offset << " returned " << rv);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -458,28 +461,14 @@ namespace crucible {
|
||||
|
||||
template<>
|
||||
void
|
||||
pread_or_die<vector<char>>(int fd, vector<char> &text, off_t offset)
|
||||
pread_or_die<ByteVector>(int fd, ByteVector &text, off_t offset)
|
||||
{
|
||||
return pread_or_die(fd, text.data(), text.size(), offset);
|
||||
}
|
||||
|
||||
template<>
|
||||
void
|
||||
pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t> &text, off_t offset)
|
||||
{
|
||||
return pread_or_die(fd, text.data(), text.size(), offset);
|
||||
}
|
||||
|
||||
template<>
|
||||
void
|
||||
pwrite_or_die<vector<uint8_t>>(int fd, const vector<uint8_t> &text, off_t offset)
|
||||
{
|
||||
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||
}
|
||||
|
||||
template<>
|
||||
void
|
||||
pwrite_or_die<vector<char>>(int fd, const vector<char> &text, off_t offset)
|
||||
pwrite_or_die<ByteVector>(int fd, const ByteVector &text, off_t offset)
|
||||
{
|
||||
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||
}
|
||||
@@ -491,9 +480,9 @@ namespace crucible {
|
||||
return pwrite_or_die(fd, text.data(), text.size(), offset);
|
||||
}
|
||||
|
||||
Stat::Stat()
|
||||
Stat::Stat() :
|
||||
stat( (stat) { } )
|
||||
{
|
||||
memset_zero<stat>(this);
|
||||
}
|
||||
|
||||
Stat &
|
||||
@@ -512,15 +501,15 @@ namespace crucible {
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stat::Stat(int fd)
|
||||
Stat::Stat(int fd) :
|
||||
stat( (stat) { } )
|
||||
{
|
||||
memset_zero<stat>(this);
|
||||
fstat(fd);
|
||||
}
|
||||
|
||||
Stat::Stat(const string &filename)
|
||||
Stat::Stat(const string &filename) :
|
||||
stat( (stat) { } )
|
||||
{
|
||||
memset_zero<stat>(this);
|
||||
lstat(filename);
|
||||
}
|
||||
|
||||
@@ -535,7 +524,14 @@ namespace crucible {
|
||||
void
|
||||
ioctl_iflags_set(int fd, int attr)
|
||||
{
|
||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &attr));
|
||||
// This bit of nonsense brought to you by Valgrind.
|
||||
union {
|
||||
int attr;
|
||||
long zero;
|
||||
} u;
|
||||
u.zero = 0;
|
||||
u.attr = attr;
|
||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_SETFLAGS, &u.attr));
|
||||
}
|
||||
|
||||
string
|
||||
|
355
lib/fs.cc
355
lib/fs.cc
@@ -2,6 +2,7 @@
|
||||
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/hexdump.h"
|
||||
#include "crucible/limits.h"
|
||||
#include "crucible/ntoa.h"
|
||||
#include "crucible/string.h"
|
||||
@@ -32,19 +33,11 @@ namespace crucible {
|
||||
#endif
|
||||
}
|
||||
|
||||
BtrfsExtentInfo::BtrfsExtentInfo(int dst_fd, off_t dst_offset)
|
||||
{
|
||||
memset_zero<btrfs_ioctl_same_extent_info>(this);
|
||||
fd = dst_fd;
|
||||
logical_offset = dst_offset;
|
||||
}
|
||||
|
||||
BtrfsExtentSame::BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length) :
|
||||
m_logical_offset(src_offset),
|
||||
m_length(src_length),
|
||||
m_fd(src_fd)
|
||||
{
|
||||
memset_zero<btrfs_ioctl_same_args>(this);
|
||||
logical_offset = src_offset;
|
||||
length = src_length;
|
||||
}
|
||||
|
||||
BtrfsExtentSame::~BtrfsExtentSame()
|
||||
@@ -52,9 +45,12 @@ namespace crucible {
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsExtentSame::add(int fd, off_t offset)
|
||||
BtrfsExtentSame::add(int const fd, uint64_t const offset)
|
||||
{
|
||||
m_info.push_back(BtrfsExtentInfo(fd, offset));
|
||||
m_info.push_back( (btrfs_ioctl_same_extent_info) {
|
||||
.fd = fd,
|
||||
.logical_offset = offset,
|
||||
});
|
||||
}
|
||||
|
||||
ostream &
|
||||
@@ -111,11 +107,8 @@ namespace crucible {
|
||||
os << " '" << fd_name << "'";
|
||||
});
|
||||
}
|
||||
os << ", .logical_offset = " << to_hex(bes.logical_offset);
|
||||
os << ", .length = " << to_hex(bes.length);
|
||||
os << ", .dest_count = " << bes.dest_count;
|
||||
os << ", .reserved1 = " << bes.reserved1;
|
||||
os << ", .reserved2 = " << bes.reserved2;
|
||||
os << ", .logical_offset = " << to_hex(bes.m_logical_offset);
|
||||
os << ", .length = " << to_hex(bes.m_length);
|
||||
os << ", .info[] = {";
|
||||
for (size_t i = 0; i < bes.m_info.size(); ++i) {
|
||||
os << " [" << i << "] = " << &(bes.m_info[i]) << ",";
|
||||
@@ -126,22 +119,25 @@ namespace crucible {
|
||||
void
|
||||
btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset)
|
||||
{
|
||||
struct btrfs_ioctl_clone_range_args args;
|
||||
memset_zero(&args);
|
||||
args.src_fd = src_fd;
|
||||
args.src_offset = src_offset;
|
||||
args.src_length = src_length;
|
||||
args.dest_offset = dst_offset;
|
||||
btrfs_ioctl_clone_range_args args ( (btrfs_ioctl_clone_range_args) {
|
||||
.src_fd = src_fd,
|
||||
.src_offset = ranged_cast<uint64_t, off_t>(src_offset),
|
||||
.src_length = ranged_cast<uint64_t, off_t>(src_length),
|
||||
.dest_offset = ranged_cast<uint64_t, off_t>(dst_offset),
|
||||
} );
|
||||
DIE_IF_MINUS_ONE(ioctl(dst_fd, BTRFS_IOC_CLONE_RANGE, &args));
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsExtentSame::do_ioctl()
|
||||
{
|
||||
dest_count = m_info.size();
|
||||
vector<uint8_t> ioctl_arg = vector_copy_struct<btrfs_ioctl_same_args>(this);
|
||||
ioctl_arg.resize(sizeof(btrfs_ioctl_same_args) + dest_count * sizeof(btrfs_ioctl_same_extent_info), 0);
|
||||
btrfs_ioctl_same_args *ioctl_ptr = reinterpret_cast<btrfs_ioctl_same_args *>(ioctl_arg.data());
|
||||
const size_t buf_size = sizeof(btrfs_ioctl_same_args) + m_info.size() * sizeof(btrfs_ioctl_same_extent_info);
|
||||
ByteVector ioctl_arg( (btrfs_ioctl_same_args) {
|
||||
.logical_offset = m_logical_offset,
|
||||
.length = m_length,
|
||||
.dest_count = ranged_cast<decltype(btrfs_ioctl_same_args::dest_count)>(m_info.size()),
|
||||
}, buf_size);
|
||||
btrfs_ioctl_same_args *const ioctl_ptr = ioctl_arg.get<btrfs_ioctl_same_args>();
|
||||
size_t count = 0;
|
||||
for (auto i = m_info.cbegin(); i != m_info.cend(); ++i) {
|
||||
ioctl_ptr->info[count] = static_cast<const btrfs_ioctl_same_extent_info &>(m_info[count]);
|
||||
@@ -194,18 +190,15 @@ namespace crucible {
|
||||
void *
|
||||
BtrfsDataContainer::prepare(size_t container_size)
|
||||
{
|
||||
if (m_data.size() < container_size) {
|
||||
m_data.resize(container_size);
|
||||
}
|
||||
btrfs_data_container *p = reinterpret_cast<btrfs_data_container *>(m_data.data());
|
||||
const size_t min_size = offsetof(btrfs_data_container, val);
|
||||
if (container_size < min_size) {
|
||||
THROW_ERROR(out_of_range, "container size " << container_size << " smaller than minimum " << min_size);
|
||||
}
|
||||
p->bytes_left = 0;
|
||||
p->bytes_missing = 0;
|
||||
p->elem_cnt = 0;
|
||||
p->elem_missed = 0;
|
||||
if (m_data.size() < container_size) {
|
||||
m_data = ByteVector(container_size);
|
||||
}
|
||||
const auto p = m_data.get<btrfs_data_container>();
|
||||
*p = (btrfs_data_container) { };
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -218,25 +211,29 @@ namespace crucible {
|
||||
decltype(btrfs_data_container::bytes_left)
|
||||
BtrfsDataContainer::get_bytes_left() const
|
||||
{
|
||||
return bytes_left;
|
||||
const auto p = m_data.get<btrfs_data_container>();
|
||||
return p->bytes_left;
|
||||
}
|
||||
|
||||
decltype(btrfs_data_container::bytes_missing)
|
||||
BtrfsDataContainer::get_bytes_missing() const
|
||||
{
|
||||
return bytes_missing;
|
||||
const auto p = m_data.get<btrfs_data_container>();
|
||||
return p->bytes_missing;
|
||||
}
|
||||
|
||||
decltype(btrfs_data_container::elem_cnt)
|
||||
BtrfsDataContainer::get_elem_cnt() const
|
||||
{
|
||||
return elem_cnt;
|
||||
const auto p = m_data.get<btrfs_data_container>();
|
||||
return p->elem_cnt;
|
||||
}
|
||||
|
||||
decltype(btrfs_data_container::elem_missed)
|
||||
BtrfsDataContainer::get_elem_missed() const
|
||||
{
|
||||
return elem_missed;
|
||||
const auto p = m_data.get<btrfs_data_container>();
|
||||
return p->elem_missed;
|
||||
}
|
||||
|
||||
ostream &
|
||||
@@ -246,7 +243,7 @@ namespace crucible {
|
||||
return os << "BtrfsIoctlLogicalInoArgs NULL";
|
||||
}
|
||||
os << "BtrfsIoctlLogicalInoArgs {";
|
||||
os << " .logical = " << to_hex(p->logical);
|
||||
os << " .m_logical = " << to_hex(p->m_logical);
|
||||
os << " .inodes[] = {\n";
|
||||
unsigned count = 0;
|
||||
for (auto i = p->m_iors.cbegin(); i != p->m_iors.cend(); ++i) {
|
||||
@@ -258,10 +255,9 @@ namespace crucible {
|
||||
|
||||
BtrfsIoctlLogicalInoArgs::BtrfsIoctlLogicalInoArgs(uint64_t new_logical, size_t new_size) :
|
||||
m_container_size(new_size),
|
||||
m_container(new_size)
|
||||
m_container(new_size),
|
||||
m_logical(new_logical)
|
||||
{
|
||||
memset_zero<btrfs_ioctl_logical_ino_args>(this);
|
||||
logical = new_logical;
|
||||
}
|
||||
|
||||
size_t
|
||||
@@ -300,11 +296,6 @@ namespace crucible {
|
||||
return m_begin;
|
||||
}
|
||||
|
||||
BtrfsIoctlLogicalInoArgs::BtrfsInodeOffsetRootSpan::operator vector<BtrfsInodeOffsetRoot>() const
|
||||
{
|
||||
return vector<BtrfsInodeOffsetRoot>(m_begin, m_end);
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsIoctlLogicalInoArgs::BtrfsInodeOffsetRootSpan::clear()
|
||||
{
|
||||
@@ -314,23 +305,28 @@ namespace crucible {
|
||||
void
|
||||
BtrfsIoctlLogicalInoArgs::set_flags(uint64_t new_flags)
|
||||
{
|
||||
// We are still supporting building with old headers that don't have .flags yet
|
||||
*(&reserved[0] + 3) = new_flags;
|
||||
m_flags = new_flags;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsIoctlLogicalInoArgs::get_flags() const
|
||||
{
|
||||
// We are still supporting building with old headers that don't have .flags yet
|
||||
return *(&reserved[0] + 3);
|
||||
return m_flags;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
|
||||
{
|
||||
btrfs_ioctl_logical_ino_args *p = static_cast<btrfs_ioctl_logical_ino_args *>(this);
|
||||
inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size));
|
||||
size = m_container.get_size();
|
||||
btrfs_ioctl_logical_ino_args args = (btrfs_ioctl_logical_ino_args) {
|
||||
.logical = m_logical,
|
||||
.size = m_container_size,
|
||||
.inodes = reinterpret_cast<uint64_t>(m_container.prepare(m_container_size)),
|
||||
};
|
||||
// We are still supporting building with old headers that don't have .flags yet
|
||||
*(&args.reserved[0] + 3) = m_flags;
|
||||
|
||||
btrfs_ioctl_logical_ino_args *const p = &args;
|
||||
|
||||
m_iors.clear();
|
||||
|
||||
@@ -367,13 +363,13 @@ namespace crucible {
|
||||
bili_version = BTRFS_IOC_LOGICAL_INO_V2;
|
||||
}
|
||||
|
||||
btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
|
||||
BtrfsInodeOffsetRoot *input_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);
|
||||
btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->inodes);
|
||||
BtrfsInodeOffsetRoot *const ior_iter = reinterpret_cast<BtrfsInodeOffsetRoot *>(bdc->val);
|
||||
|
||||
// elem_cnt counts uint64_t, but BtrfsInodeOffsetRoot is 3x uint64_t
|
||||
THROW_CHECK1(runtime_error, bdc->elem_cnt, bdc->elem_cnt % 3 == 0);
|
||||
m_iors.m_begin = input_iter;
|
||||
m_iors.m_end = input_iter + bdc->elem_cnt / 3;
|
||||
m_iors.m_begin = ior_iter;
|
||||
m_iors.m_end = ior_iter + bdc->elem_cnt / 3;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -396,9 +392,10 @@ namespace crucible {
|
||||
}
|
||||
|
||||
BtrfsIoctlInoPathArgs::BtrfsIoctlInoPathArgs(uint64_t inode, size_t new_size) :
|
||||
btrfs_ioctl_ino_path_args( (btrfs_ioctl_ino_path_args) { } ),
|
||||
m_container_size(new_size)
|
||||
{
|
||||
memset_zero<btrfs_ioctl_ino_path_args>(this);
|
||||
assert(inum == 0);
|
||||
inum = inode;
|
||||
}
|
||||
|
||||
@@ -416,14 +413,14 @@ namespace crucible {
|
||||
return false;
|
||||
}
|
||||
|
||||
btrfs_data_container *bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
|
||||
btrfs_data_container *const bdc = reinterpret_cast<btrfs_data_container *>(p->fspath);
|
||||
m_paths.reserve(bdc->elem_cnt);
|
||||
|
||||
const uint64_t *up = reinterpret_cast<const uint64_t *>(bdc->val);
|
||||
const char *cp = reinterpret_cast<const char *>(bdc->val);
|
||||
const char *const cp = reinterpret_cast<const char *>(bdc->val);
|
||||
|
||||
for (auto count = bdc->elem_cnt; count > 0; --count) {
|
||||
const char *path = cp + *up++;
|
||||
const char *const path = cp + *up++;
|
||||
if (static_cast<size_t>(path - cp) > container.get_size()) {
|
||||
THROW_ERROR(out_of_range, "offset " << (path - cp) << " > size " << container.get_size() << " in " << __PRETTY_FUNCTION__);
|
||||
}
|
||||
@@ -458,9 +455,10 @@ namespace crucible {
|
||||
return os;
|
||||
}
|
||||
|
||||
BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid)
|
||||
BtrfsIoctlInoLookupArgs::BtrfsIoctlInoLookupArgs(uint64_t new_objectid) :
|
||||
btrfs_ioctl_ino_lookup_args( (btrfs_ioctl_ino_lookup_args) { } )
|
||||
{
|
||||
memset_zero<btrfs_ioctl_ino_lookup_args>(this);
|
||||
assert(objectid == 0);
|
||||
objectid = new_objectid;
|
||||
}
|
||||
|
||||
@@ -478,9 +476,9 @@ namespace crucible {
|
||||
}
|
||||
}
|
||||
|
||||
BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs()
|
||||
BtrfsIoctlDefragRangeArgs::BtrfsIoctlDefragRangeArgs() :
|
||||
btrfs_ioctl_defrag_range_args( (btrfs_ioctl_defrag_range_args) { } )
|
||||
{
|
||||
memset_zero<btrfs_ioctl_defrag_range_args>(this);
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -510,9 +508,10 @@ namespace crucible {
|
||||
}
|
||||
|
||||
string
|
||||
btrfs_ioctl_defrag_range_compress_type_ntoa(uint32_t compress_type)
|
||||
btrfs_compress_type_ntoa(uint8_t compress_type)
|
||||
{
|
||||
static const bits_ntoa_table table[] = {
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_NONE),
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZLIB),
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_LZO),
|
||||
NTOA_TABLE_ENTRY_ENUM(BTRFS_COMPRESS_ZSTD),
|
||||
@@ -532,14 +531,14 @@ namespace crucible {
|
||||
os << " .len = " << p->len;
|
||||
os << " .flags = " << btrfs_ioctl_defrag_range_flags_ntoa(p->flags);
|
||||
os << " .extent_thresh = " << p->extent_thresh;
|
||||
os << " .compress_type = " << btrfs_ioctl_defrag_range_compress_type_ntoa(p->compress_type);
|
||||
os << " .compress_type = " << btrfs_compress_type_ntoa(p->compress_type);
|
||||
os << " .unused[4] = { " << p->unused[0] << ", " << p->unused[1] << ", " << p->unused[2] << ", " << p->unused[3] << "} }";
|
||||
return os;
|
||||
}
|
||||
|
||||
FiemapExtent::FiemapExtent()
|
||||
FiemapExtent::FiemapExtent() :
|
||||
fiemap_extent( (fiemap_extent) { } )
|
||||
{
|
||||
memset_zero<fiemap_extent>(this);
|
||||
}
|
||||
|
||||
FiemapExtent::FiemapExtent(const fiemap_extent &that)
|
||||
@@ -646,13 +645,10 @@ namespace crucible {
|
||||
operator<<(ostream &os, const Fiemap &args)
|
||||
{
|
||||
os << "Fiemap {";
|
||||
os << " .fm_start = " << to_hex(args.fm_start) << ".." << to_hex(args.fm_start + args.fm_length);
|
||||
os << ", .fm_length = " << to_hex(args.fm_length);
|
||||
if (args.fm_flags) os << ", .fm_flags = " << fiemap_flags_ntoa(args.fm_flags);
|
||||
os << ", .fm_mapped_extents = " << args.fm_mapped_extents;
|
||||
os << ", .fm_extent_count = " << args.fm_extent_count;
|
||||
if (args.fm_reserved) os << ", .fm_reserved = " << args.fm_reserved;
|
||||
os << ", .fm_extents[] = {";
|
||||
os << " .m_start = " << to_hex(args.m_start) << ".." << to_hex(args.m_start + args.m_length);
|
||||
os << ", .m_length = " << to_hex(args.m_length);
|
||||
os << ", .m_flags = " << fiemap_flags_ntoa(args.m_flags);
|
||||
os << ", .fm_extents[" << args.m_extents.size() << "] = {";
|
||||
size_t count = 0;
|
||||
for (auto i = args.m_extents.cbegin(); i != args.m_extents.cend(); ++i) {
|
||||
os << "\n\t[" << count++ << "] = " << &(*i) << ",";
|
||||
@@ -660,41 +656,35 @@ namespace crucible {
|
||||
return os << "\n}";
|
||||
}
|
||||
|
||||
Fiemap::Fiemap(uint64_t start, uint64_t length)
|
||||
Fiemap::Fiemap(uint64_t start, uint64_t length) :
|
||||
m_start(start),
|
||||
m_length(length)
|
||||
{
|
||||
memset_zero<fiemap>(this);
|
||||
fm_start = start;
|
||||
fm_length = length;
|
||||
// FIEMAP is slow and full of lines.
|
||||
// This makes FIEMAP even slower, but reduces the lies a little.
|
||||
fm_flags = FIEMAP_FLAG_SYNC;
|
||||
}
|
||||
|
||||
void
|
||||
Fiemap::do_ioctl(int fd)
|
||||
{
|
||||
THROW_CHECK1(out_of_range, m_min_count, m_min_count <= m_max_count);
|
||||
THROW_CHECK1(out_of_range, m_min_count, m_min_count > 0);
|
||||
|
||||
auto extent_count = m_min_count;
|
||||
vector<uint8_t> ioctl_arg = vector_copy_struct<fiemap>(this);
|
||||
const auto extent_count = m_min_count;
|
||||
ByteVector ioctl_arg(sizeof(fiemap) + extent_count * sizeof(fiemap_extent));
|
||||
|
||||
ioctl_arg.resize(sizeof(fiemap) + extent_count * sizeof(fiemap_extent), 0);
|
||||
fiemap *const ioctl_ptr = ioctl_arg.get<fiemap>();
|
||||
|
||||
fiemap *ioctl_ptr = reinterpret_cast<fiemap *>(ioctl_arg.data());
|
||||
|
||||
auto start = fm_start;
|
||||
auto end = fm_start + fm_length;
|
||||
|
||||
auto orig_start = fm_start;
|
||||
auto orig_length = fm_length;
|
||||
auto start = m_start;
|
||||
const auto end = m_start + m_length;
|
||||
|
||||
vector<FiemapExtent> extents;
|
||||
|
||||
while (start < end && extents.size() < m_max_count) {
|
||||
ioctl_ptr->fm_start = start;
|
||||
ioctl_ptr->fm_length = end - start;
|
||||
ioctl_ptr->fm_extent_count = extent_count;
|
||||
ioctl_ptr->fm_mapped_extents = 0;
|
||||
*ioctl_ptr = (fiemap) {
|
||||
.fm_start = start,
|
||||
.fm_length = end - start,
|
||||
.fm_flags = m_flags,
|
||||
.fm_extent_count = extent_count,
|
||||
};
|
||||
|
||||
// cerr << "Before (fd = " << fd << ") : " << ioctl_ptr << endl;
|
||||
DIE_IF_MINUS_ONE(ioctl(fd, FS_IOC_FIEMAP, ioctl_ptr));
|
||||
@@ -720,74 +710,89 @@ namespace crucible {
|
||||
}
|
||||
}
|
||||
|
||||
fiemap *this_ptr = static_cast<fiemap *>(this);
|
||||
*this_ptr = *ioctl_ptr;
|
||||
fm_start = orig_start;
|
||||
fm_length = orig_length;
|
||||
fm_extent_count = extents.size();
|
||||
m_extents = extents;
|
||||
}
|
||||
|
||||
BtrfsIoctlSearchKey::BtrfsIoctlSearchKey(size_t buf_size) :
|
||||
btrfs_ioctl_search_key( (btrfs_ioctl_search_key) {
|
||||
.max_objectid = numeric_limits<decltype(max_objectid)>::max(),
|
||||
.max_offset = numeric_limits<decltype(max_offset)>::max(),
|
||||
.max_transid = numeric_limits<decltype(max_transid)>::max(),
|
||||
.max_type = numeric_limits<decltype(max_type)>::max(),
|
||||
.nr_items = 1,
|
||||
}),
|
||||
m_buf_size(buf_size)
|
||||
{
|
||||
memset_zero<btrfs_ioctl_search_key>(this);
|
||||
max_objectid = numeric_limits<decltype(max_objectid)>::max();
|
||||
max_offset = numeric_limits<decltype(max_offset)>::max();
|
||||
max_transid = numeric_limits<decltype(max_transid)>::max();
|
||||
max_type = numeric_limits<decltype(max_type)>::max();
|
||||
nr_items = numeric_limits<decltype(nr_items)>::max();
|
||||
}
|
||||
|
||||
BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader()
|
||||
BtrfsIoctlSearchHeader::BtrfsIoctlSearchHeader() :
|
||||
btrfs_ioctl_search_header( (btrfs_ioctl_search_header) { } )
|
||||
{
|
||||
memset_zero<btrfs_ioctl_search_header>(this);
|
||||
}
|
||||
|
||||
size_t
|
||||
BtrfsIoctlSearchHeader::set_data(const vector<uint8_t> &v, size_t offset)
|
||||
BtrfsIoctlSearchHeader::set_data(const ByteVector &v, size_t offset)
|
||||
{
|
||||
THROW_CHECK2(invalid_argument, offset, v.size(), offset + sizeof(btrfs_ioctl_search_header) <= v.size());
|
||||
memcpy(static_cast<btrfs_ioctl_search_header *>(this), &v[offset], sizeof(btrfs_ioctl_search_header));
|
||||
offset += sizeof(btrfs_ioctl_search_header);
|
||||
THROW_CHECK2(invalid_argument, offset + len, v.size(), offset + len <= v.size());
|
||||
m_data = Spanner<const uint8_t>(&v[offset], &v[offset + len]);
|
||||
m_data = ByteVector(v, offset, len);
|
||||
return offset + len;
|
||||
}
|
||||
|
||||
bool
|
||||
BtrfsIoctlSearchKey::do_ioctl_nothrow(int fd)
|
||||
{
|
||||
// Normally we like to be paranoid and fill empty bytes with zero,
|
||||
// but these buffers can be huge. 80% of a 4GHz CPU huge.
|
||||
|
||||
// Keep the ioctl buffer from one run to the next to save on malloc costs
|
||||
size_t target_buf_size = sizeof(btrfs_ioctl_search_args_v2) + m_buf_size;
|
||||
|
||||
m_ioctl_arg = vector_copy_struct<btrfs_ioctl_search_key>(this);
|
||||
m_ioctl_arg.resize(target_buf_size);
|
||||
// It would be really nice if the kernel tells us whether our
|
||||
// buffer overflowed or how big the overflowing object
|
||||
// was; instead, we have to guess.
|
||||
|
||||
m_result.clear();
|
||||
// Make sure there is space for at least the search key and one (empty) header
|
||||
size_t buf_size = max(m_buf_size, sizeof(btrfs_ioctl_search_args_v2) + sizeof(btrfs_ioctl_search_header));
|
||||
ByteVector ioctl_arg;
|
||||
btrfs_ioctl_search_args_v2 *ioctl_ptr;
|
||||
do {
|
||||
// ioctl buffer size does not include search key header or buffer size
|
||||
ioctl_arg = ByteVector(buf_size + sizeof(btrfs_ioctl_search_args_v2));
|
||||
ioctl_ptr = ioctl_arg.get<btrfs_ioctl_search_args_v2>();
|
||||
ioctl_ptr->key = static_cast<const btrfs_ioctl_search_key&>(*this);
|
||||
ioctl_ptr->buf_size = buf_size;
|
||||
// Don't bother supporting V1. Kernels that old have other problems.
|
||||
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_arg.data());
|
||||
if (rv != 0 && errno != EOVERFLOW) {
|
||||
return false;
|
||||
}
|
||||
if (rv == 0 && nr_items <= ioctl_ptr->key.nr_items) {
|
||||
// got all the items we wanted, thanks
|
||||
m_buf_size = max(m_buf_size, buf_size);
|
||||
break;
|
||||
}
|
||||
// Didn't get all the items we wanted. Increase the buf size and try again.
|
||||
// These sizes are very common on default-formatted btrfs, so use these
|
||||
// instead of naive doubling.
|
||||
if (buf_size < 4096) {
|
||||
buf_size = 4096;
|
||||
} else if (buf_size < 16384) {
|
||||
buf_size = 16384;
|
||||
} else if (buf_size < 65536) {
|
||||
buf_size = 65536;
|
||||
} else {
|
||||
buf_size *= 2;
|
||||
}
|
||||
// don't automatically raise the buf size higher than 64K, the largest possible btrfs item
|
||||
} while (buf_size < 65536);
|
||||
|
||||
btrfs_ioctl_search_args_v2 *ioctl_ptr = reinterpret_cast<btrfs_ioctl_search_args_v2 *>(m_ioctl_arg.data());
|
||||
|
||||
ioctl_ptr->buf_size = m_buf_size;
|
||||
|
||||
// Don't bother supporting V1. Kernels that old have other problems.
|
||||
int rv = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, ioctl_ptr);
|
||||
if (rv != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ioctl changes nr_items, this has to be copied back
|
||||
static_cast<btrfs_ioctl_search_key&>(*this) = ioctl_ptr->key;
|
||||
|
||||
size_t offset = pointer_distance(ioctl_ptr->buf, ioctl_ptr);
|
||||
for (decltype(nr_items) i = 0; i < nr_items; ++i) {
|
||||
BtrfsIoctlSearchHeader item;
|
||||
offset = item.set_data(m_ioctl_arg, offset);
|
||||
offset = item.set_data(ioctl_arg, offset);
|
||||
m_result.insert(item);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -795,7 +800,7 @@ namespace crucible {
|
||||
BtrfsIoctlSearchKey::do_ioctl(int fd)
|
||||
{
|
||||
if (!do_ioctl_nothrow(fd)) {
|
||||
THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd));
|
||||
THROW_ERRNO("BTRFS_IOC_TREE_SEARCH_V2: " << name_fd(fd) << ": " << *this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -806,33 +811,47 @@ namespace crucible {
|
||||
min_type = ref.type;
|
||||
min_offset = ref.offset + 1;
|
||||
if (min_offset < ref.offset) {
|
||||
// We wrapped, try the next objectid
|
||||
++min_objectid;
|
||||
// We wrapped, try the next type
|
||||
++min_type;
|
||||
assert(min_offset == 0);
|
||||
if (min_type < ref.type) {
|
||||
assert(min_type == 0);
|
||||
// We wrapped, try the next objectid
|
||||
++min_objectid;
|
||||
// no advancement possible at end
|
||||
THROW_CHECK1(runtime_error, min_type, min_type == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class V>
|
||||
ostream &
|
||||
hexdump(ostream &os, const V &v)
|
||||
void
|
||||
BtrfsIoctlSearchKey::next_min(const BtrfsIoctlSearchHeader &ref, const uint8_t type)
|
||||
{
|
||||
os << "vector<uint8_t> { size = " << v.size() << ", data:\n";
|
||||
for (size_t i = 0; i < v.size(); i += 8) {
|
||||
string hex, ascii;
|
||||
for (size_t j = i; j < i + 8; ++j) {
|
||||
if (j < v.size()) {
|
||||
uint8_t c = v[j];
|
||||
char buf[8];
|
||||
sprintf(buf, "%02x ", c);
|
||||
hex += buf;
|
||||
ascii += (c < 32 || c > 126) ? '.' : c;
|
||||
} else {
|
||||
hex += " ";
|
||||
ascii += ' ';
|
||||
}
|
||||
if (ref.type < type) {
|
||||
// forward to type in same object with zero offset
|
||||
min_objectid = ref.objectid;
|
||||
min_type = type;
|
||||
min_offset = 0;
|
||||
} else if (ref.type > type) {
|
||||
// skip directly to start of next objectid with target type
|
||||
min_objectid = ref.objectid + 1;
|
||||
// no advancement possible at end
|
||||
THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
|
||||
min_type = type;
|
||||
min_offset = 0;
|
||||
} else {
|
||||
// advance within this type
|
||||
min_objectid = ref.objectid;
|
||||
min_type = ref.type;
|
||||
min_offset = ref.offset + 1;
|
||||
if (min_offset < ref.offset) {
|
||||
// We wrapped, try the next objectid, same type
|
||||
++min_objectid;
|
||||
THROW_CHECK2(out_of_range, min_objectid, ref.objectid, min_objectid > ref.objectid);
|
||||
min_type = type;
|
||||
assert(min_offset == 0);
|
||||
}
|
||||
os << astringprintf("\t%08x %s %s\n", i, hex.c_str(), ascii.c_str());
|
||||
}
|
||||
return os << "}";
|
||||
}
|
||||
|
||||
string
|
||||
@@ -1029,9 +1048,9 @@ namespace crucible {
|
||||
return rv;
|
||||
}
|
||||
|
||||
Statvfs::Statvfs()
|
||||
Statvfs::Statvfs() :
|
||||
statvfs( (statvfs) { } )
|
||||
{
|
||||
memset_zero<statvfs>(this);
|
||||
}
|
||||
|
||||
Statvfs::Statvfs(int fd) :
|
||||
@@ -1082,16 +1101,20 @@ namespace crucible {
|
||||
return os << " }";
|
||||
};
|
||||
|
||||
BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs()
|
||||
BtrfsIoctlFsInfoArgs::BtrfsIoctlFsInfoArgs() :
|
||||
btrfs_ioctl_fs_info_args_v3( (btrfs_ioctl_fs_info_args_v3) {
|
||||
.flags = 0
|
||||
| BTRFS_FS_INFO_FLAG_CSUM_INFO
|
||||
| BTRFS_FS_INFO_FLAG_GENERATION
|
||||
,
|
||||
})
|
||||
{
|
||||
memset_zero<btrfs_ioctl_fs_info_args_v2>(this);
|
||||
flags = BTRFS_FS_INFO_FLAG_CSUM_INFO;
|
||||
}
|
||||
|
||||
void
|
||||
BtrfsIoctlFsInfoArgs::do_ioctl(int fd)
|
||||
{
|
||||
btrfs_ioctl_fs_info_args_v2 *p = static_cast<btrfs_ioctl_fs_info_args_v2 *>(this);
|
||||
btrfs_ioctl_fs_info_args_v3 *p = static_cast<btrfs_ioctl_fs_info_args_v3 *>(this);
|
||||
if (ioctl(fd, BTRFS_IOC_FS_INFO, p)) {
|
||||
THROW_ERRNO("BTRFS_IOC_FS_INFO: fd " << fd);
|
||||
}
|
||||
@@ -1100,13 +1123,19 @@ namespace crucible {
|
||||
uint16_t
|
||||
BtrfsIoctlFsInfoArgs::csum_type() const
|
||||
{
|
||||
return this->btrfs_ioctl_fs_info_args_v2::csum_type;
|
||||
return this->btrfs_ioctl_fs_info_args_v3::csum_type;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
BtrfsIoctlFsInfoArgs::csum_size() const
|
||||
{
|
||||
return this->btrfs_ioctl_fs_info_args_v2::csum_size;
|
||||
return this->btrfs_ioctl_fs_info_args_v3::csum_size;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BtrfsIoctlFsInfoArgs::generation() const
|
||||
{
|
||||
return this->btrfs_ioctl_fs_info_args_v3::generation;
|
||||
}
|
||||
|
||||
};
|
||||
|
72
lib/multilock.cc
Normal file
72
lib/multilock.cc
Normal file
@@ -0,0 +1,72 @@
|
||||
#include "crucible/multilock.h"
|
||||
|
||||
#include "crucible/error.h"
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
MultiLocker::LockHandle::LockHandle(const string &type, MultiLocker &parent) :
|
||||
m_type(type),
|
||||
m_parent(parent)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
MultiLocker::LockHandle::set_locked(const bool state)
|
||||
{
|
||||
m_locked = state;
|
||||
}
|
||||
|
||||
MultiLocker::LockHandle::~LockHandle()
|
||||
{
|
||||
if (m_locked) {
|
||||
m_parent.put_lock(m_type);
|
||||
m_locked = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
MultiLocker::is_lock_available(const string &type)
|
||||
{
|
||||
for (const auto &i : m_counters) {
|
||||
if (i.second != 0 && i.first != type) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
MultiLocker::put_lock(const string &type)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
auto &counter = m_counters[type];
|
||||
THROW_CHECK2(runtime_error, type, counter, counter > 0);
|
||||
--counter;
|
||||
if (counter == 0) {
|
||||
m_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
shared_ptr<MultiLocker::LockHandle>
|
||||
MultiLocker::get_lock_private(const string &type)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_counters.insert(make_pair(type, size_t(0)));
|
||||
while (!is_lock_available(type)) {
|
||||
m_cv.wait(lock);
|
||||
}
|
||||
const auto rv = make_shared<LockHandle>(type, *this);
|
||||
++m_counters[type];
|
||||
rv->set_locked(true);
|
||||
return rv;
|
||||
}
|
||||
|
||||
shared_ptr<MultiLocker::LockHandle>
|
||||
MultiLocker::get_lock(const string &type)
|
||||
{
|
||||
static MultiLocker s_process_instance;
|
||||
return s_process_instance.get_lock_private(type);
|
||||
}
|
||||
|
||||
}
|
293
lib/task.cc
293
lib/task.cc
@@ -18,6 +18,27 @@
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
static const size_t thread_name_length = 15; // TASK_COMM_LEN on Linux
|
||||
|
||||
void
|
||||
pthread_setname(const string &name)
|
||||
{
|
||||
auto name_copy = name.substr(0, thread_name_length);
|
||||
// Don't care if a debugging facility fails
|
||||
pthread_setname_np(pthread_self(), name_copy.c_str());
|
||||
}
|
||||
|
||||
string
|
||||
pthread_getname()
|
||||
{
|
||||
char buf[thread_name_length + 1] = { 0 };
|
||||
// We'll get an empty name if this fails...
|
||||
pthread_getname_np(pthread_self(), buf, sizeof(buf));
|
||||
// ...or at least null-terminated garbage
|
||||
buf[thread_name_length] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
class TaskState;
|
||||
using TaskStatePtr = shared_ptr<TaskState>;
|
||||
using TaskStateWeak = weak_ptr<TaskState>;
|
||||
@@ -30,7 +51,8 @@ namespace crucible {
|
||||
|
||||
static thread_local TaskStatePtr tl_current_task;
|
||||
|
||||
/// because we don't want to bump -std=c++-17 just to get scoped_lock
|
||||
/// because we don't want to bump -std=c++-17 just to get scoped_lock.
|
||||
/// Also we don't want to self-deadlock if both mutexes are the same mutex.
|
||||
class PairLock {
|
||||
unique_lock<mutex> m_lock1, m_lock2;
|
||||
public:
|
||||
@@ -54,8 +76,8 @@ namespace crucible {
|
||||
/// Tasks to be executed after the current task is executed
|
||||
list<TaskStatePtr> m_post_exec_queue;
|
||||
|
||||
/// Incremented by run() and append(). Decremented by exec().
|
||||
size_t m_run_count = 0;
|
||||
/// Set by run() and append(). Cleared by exec().
|
||||
bool m_run_now = false;
|
||||
|
||||
/// Set when task starts execution by exec().
|
||||
/// Cleared when exec() ends.
|
||||
@@ -89,6 +111,7 @@ namespace crucible {
|
||||
|
||||
TaskState &operator=(const TaskState &) = delete;
|
||||
TaskState(const TaskState &) = delete;
|
||||
TaskState(TaskState &&) = delete;
|
||||
|
||||
public:
|
||||
~TaskState();
|
||||
@@ -136,6 +159,8 @@ namespace crucible {
|
||||
size_t m_configured_thread_max;
|
||||
double m_thread_target;
|
||||
bool m_cancelled = false;
|
||||
bool m_paused = false;
|
||||
TaskMaster::LoadStats m_load_stats;
|
||||
|
||||
friend class TaskConsumer;
|
||||
friend class TaskMaster;
|
||||
@@ -149,6 +174,7 @@ namespace crucible {
|
||||
void set_loadavg_target(double target);
|
||||
void loadavg_thread_fn();
|
||||
void cancel();
|
||||
void pause(bool paused = true);
|
||||
|
||||
TaskMasterState &operator=(const TaskMasterState &) = delete;
|
||||
TaskMasterState(const TaskMasterState &) = delete;
|
||||
@@ -161,6 +187,7 @@ namespace crucible {
|
||||
static void push_front(TaskQueue &queue);
|
||||
size_t get_queue_count();
|
||||
size_t get_thread_count();
|
||||
static TaskMaster::LoadStats get_current_load();
|
||||
};
|
||||
|
||||
class TaskConsumer : public enable_shared_from_this<TaskConsumer> {
|
||||
@@ -192,25 +219,34 @@ namespace crucible {
|
||||
if (queue.empty()) {
|
||||
return;
|
||||
}
|
||||
auto tlcc = tl_current_consumer;
|
||||
const auto tlcc = tl_current_consumer;
|
||||
if (tlcc) {
|
||||
// We are executing under a TaskConsumer, splice our post-exec queue at front.
|
||||
// No locks needed because we are using only thread-local objects.
|
||||
tlcc->m_local_queue.splice(tlcc->m_local_queue.begin(), queue);
|
||||
} else {
|
||||
// We are not executing under a TaskConsumer.
|
||||
// Create a new task to wrap our post-exec queue,
|
||||
// then push it to the front of the global queue using normal locking methods.
|
||||
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
||||
swap(rescue_task->m_post_exec_queue, queue);
|
||||
TaskQueue tq_one { rescue_task };
|
||||
TaskMasterState::push_front(tq_one);
|
||||
// If there is only one task, then just insert it at the front of the queue.
|
||||
if (queue.size() == 1) {
|
||||
TaskMasterState::push_front(queue);
|
||||
} else {
|
||||
// If there are multiple tasks, create a new task to wrap our post-exec queue,
|
||||
// then push it to the front of the global queue using normal locking methods.
|
||||
TaskStatePtr rescue_task(make_shared<TaskState>("rescue_task", [](){}));
|
||||
swap(rescue_task->m_post_exec_queue, queue);
|
||||
TaskQueue tq_one { rescue_task };
|
||||
TaskMasterState::push_front(tq_one);
|
||||
}
|
||||
}
|
||||
assert(queue.empty());
|
||||
}
|
||||
|
||||
TaskState::~TaskState()
|
||||
{
|
||||
--s_instance_count;
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
// If any dependent Tasks were appended since the last exec, run them now
|
||||
TaskState::rescue_queue(m_post_exec_queue);
|
||||
}
|
||||
|
||||
TaskState::TaskState(string title, function<void()> exec_fn) :
|
||||
@@ -247,11 +283,10 @@ namespace crucible {
|
||||
void
|
||||
TaskState::clear_queue(TaskQueue &tq)
|
||||
{
|
||||
while (!tq.empty()) {
|
||||
auto i = *tq.begin();
|
||||
tq.pop_front();
|
||||
for (auto &i : tq) {
|
||||
i->clear();
|
||||
}
|
||||
tq.clear();
|
||||
}
|
||||
|
||||
void
|
||||
@@ -266,8 +301,8 @@ namespace crucible {
|
||||
{
|
||||
THROW_CHECK0(invalid_argument, task);
|
||||
PairLock lock(m_mutex, task->m_mutex);
|
||||
if (!task->m_run_count) {
|
||||
++task->m_run_count;
|
||||
if (!task->m_run_now) {
|
||||
task->m_run_now = true;
|
||||
append_nolock(task);
|
||||
}
|
||||
}
|
||||
@@ -283,26 +318,25 @@ namespace crucible {
|
||||
append_nolock(shared_from_this());
|
||||
return;
|
||||
} else {
|
||||
--m_run_count;
|
||||
m_run_now = false;
|
||||
m_is_running = true;
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
char buf[24] = { 0 };
|
||||
DIE_IF_MINUS_ERRNO(pthread_getname_np(pthread_self(), buf, sizeof(buf)));
|
||||
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_title.c_str()));
|
||||
|
||||
TaskStatePtr this_task = shared_from_this();
|
||||
swap(this_task, tl_current_task);
|
||||
lock.unlock();
|
||||
|
||||
const auto old_thread_name = pthread_getname();
|
||||
pthread_setname(m_title);
|
||||
|
||||
catch_all([&]() {
|
||||
m_exec_fn();
|
||||
});
|
||||
|
||||
swap(this_task, tl_current_task);
|
||||
pthread_setname_np(pthread_self(), buf);
|
||||
pthread_setname(old_thread_name);
|
||||
|
||||
lock.lock();
|
||||
swap(this_task, tl_current_task);
|
||||
m_is_running = false;
|
||||
|
||||
// Splice task post_exec queue at front of local queue
|
||||
@@ -326,24 +360,25 @@ namespace crucible {
|
||||
TaskState::run()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (m_run_count) {
|
||||
if (m_run_now) {
|
||||
return;
|
||||
}
|
||||
++m_run_count;
|
||||
m_run_now = true;
|
||||
TaskMasterState::push_back(shared_from_this());
|
||||
}
|
||||
|
||||
TaskMasterState::TaskMasterState(size_t thread_max) :
|
||||
m_thread_max(thread_max),
|
||||
m_configured_thread_max(thread_max),
|
||||
m_thread_target(thread_max)
|
||||
m_thread_target(thread_max),
|
||||
m_load_stats(TaskMaster::LoadStats { 0 })
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
TaskMasterState::start_threads_nolock()
|
||||
{
|
||||
while (m_threads.size() < m_thread_max) {
|
||||
while (m_threads.size() < m_thread_max && !m_paused) {
|
||||
m_threads.insert(make_shared<TaskConsumer>(shared_from_this()));
|
||||
}
|
||||
}
|
||||
@@ -410,6 +445,13 @@ namespace crucible {
|
||||
return s_tms->m_threads.size();
|
||||
}
|
||||
|
||||
TaskMaster::LoadStats
|
||||
TaskMaster::get_current_load()
|
||||
{
|
||||
unique_lock<mutex> lock(s_tms->m_mutex);
|
||||
return s_tms->m_load_stats;
|
||||
}
|
||||
|
||||
ostream &
|
||||
TaskMaster::print_queue(ostream &os)
|
||||
{
|
||||
@@ -444,8 +486,8 @@ namespace crucible {
|
||||
size_t
|
||||
TaskMasterState::calculate_thread_count_nolock()
|
||||
{
|
||||
if (m_cancelled) {
|
||||
// No threads running while cancelled
|
||||
if (m_paused) {
|
||||
// No threads running while paused or cancelled
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -477,19 +519,21 @@ namespace crucible {
|
||||
|
||||
m_prev_loadavg = loadavg;
|
||||
|
||||
// Change the thread target based on the
|
||||
// difference between current and desired load
|
||||
// but don't get too close all at once due to rounding and sample error.
|
||||
// If m_load_target < 1.0 then we are just doing PWM with one thread.
|
||||
|
||||
if (m_load_target <= 1.0) {
|
||||
m_thread_target = 1.0;
|
||||
} else if (m_load_target - current_load >= 1.0) {
|
||||
m_thread_target += (m_load_target - current_load - 1.0) / 2.0;
|
||||
} else if (m_load_target < current_load) {
|
||||
m_thread_target += m_load_target - current_load;
|
||||
const double load_deficit = m_load_target - loadavg;
|
||||
if (load_deficit > 0) {
|
||||
// Load is too low, solve by adding another worker
|
||||
m_thread_target += load_deficit / 3;
|
||||
} else if (load_deficit < 0) {
|
||||
// Load is too high, solve by removing all known excess tasks
|
||||
m_thread_target += load_deficit;
|
||||
}
|
||||
|
||||
m_load_stats = TaskMaster::LoadStats {
|
||||
.current_load = current_load,
|
||||
.thread_target = m_thread_target,
|
||||
.loadavg = loadavg,
|
||||
};
|
||||
|
||||
// Cannot exceed configured maximum thread count or less than zero
|
||||
m_thread_target = min(max(0.0, m_thread_target), double(m_configured_thread_max));
|
||||
|
||||
@@ -519,12 +563,6 @@ namespace crucible {
|
||||
TaskMasterState::set_thread_count(size_t thread_max)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
// XXX: someday we might want to uncancel, and this would be the place to do it;
|
||||
// however, when we cancel we destroy the entire Task queue, and that might be
|
||||
// non-trivial to recover from
|
||||
if (m_cancelled) {
|
||||
return;
|
||||
}
|
||||
m_configured_thread_max = thread_max;
|
||||
lock.unlock();
|
||||
adjust_thread_count();
|
||||
@@ -541,6 +579,7 @@ namespace crucible {
|
||||
TaskMasterState::cancel()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_paused = true;
|
||||
m_cancelled = true;
|
||||
decltype(m_queue) empty_queue;
|
||||
m_queue.swap(empty_queue);
|
||||
@@ -555,14 +594,25 @@ namespace crucible {
|
||||
s_tms->cancel();
|
||||
}
|
||||
|
||||
void
|
||||
TaskMasterState::pause(const bool paused)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_paused = paused;
|
||||
m_condvar.notify_all();
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
TaskMaster::pause(const bool paused)
|
||||
{
|
||||
s_tms->pause(paused);
|
||||
}
|
||||
|
||||
void
|
||||
TaskMasterState::set_thread_min_count(size_t thread_min)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
// XXX: someday we might want to uncancel, and this would be the place to do it
|
||||
if (m_cancelled) {
|
||||
return;
|
||||
}
|
||||
m_thread_min = thread_min;
|
||||
lock.unlock();
|
||||
adjust_thread_count();
|
||||
@@ -578,7 +628,7 @@ namespace crucible {
|
||||
void
|
||||
TaskMasterState::loadavg_thread_fn()
|
||||
{
|
||||
pthread_setname_np(pthread_self(), "load_tracker");
|
||||
pthread_setname("load_tracker");
|
||||
while (!m_cancelled) {
|
||||
adjust_thread_count();
|
||||
nanosleep(5.0);
|
||||
@@ -694,7 +744,7 @@ namespace crucible {
|
||||
TaskConsumer::consumer_thread()
|
||||
{
|
||||
// Keep a copy because we will be destroying *this later
|
||||
auto master_copy = m_master;
|
||||
const auto master_copy = m_master;
|
||||
|
||||
// Constructor is running with master locked.
|
||||
// Wait until that is done before trying to do anything.
|
||||
@@ -704,13 +754,13 @@ namespace crucible {
|
||||
m_thread->detach();
|
||||
|
||||
// Set thread name so it isn't empty or the name of some other thread
|
||||
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), "task_consumer"));
|
||||
pthread_setname("task_consumer");
|
||||
|
||||
// It is now safe to access our own shared_ptr
|
||||
TaskConsumerPtr this_consumer = shared_from_this();
|
||||
swap(this_consumer, tl_current_consumer);
|
||||
|
||||
while (!master_copy->m_cancelled) {
|
||||
while (!master_copy->m_paused) {
|
||||
if (master_copy->m_thread_max < master_copy->m_threads.size()) {
|
||||
// We are one of too many threads, exit now
|
||||
break;
|
||||
@@ -749,6 +799,7 @@ namespace crucible {
|
||||
// There is no longer a current consumer, but hold our own shared
|
||||
// state so it's still there in the destructor
|
||||
swap(this_consumer, tl_current_consumer);
|
||||
assert(!tl_current_consumer);
|
||||
|
||||
// Release lock to rescue queue (may attempt to queue a new task at TaskMaster).
|
||||
// rescue_queue normally sends tasks to the local queue of the current TaskConsumer thread,
|
||||
@@ -780,24 +831,16 @@ namespace crucible {
|
||||
void insert_task(Task t);
|
||||
};
|
||||
|
||||
Barrier::Barrier(shared_ptr<BarrierState> pbs) :
|
||||
m_barrier_state(pbs)
|
||||
{
|
||||
}
|
||||
|
||||
Barrier::Barrier() :
|
||||
m_barrier_state(make_shared<BarrierState>())
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
BarrierState::release()
|
||||
{
|
||||
set<Task> tasks_local;
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
for (auto i : m_tasks) {
|
||||
swap(tasks_local, m_tasks);
|
||||
lock.unlock();
|
||||
for (const auto &i : tasks_local) {
|
||||
i.run();
|
||||
}
|
||||
m_tasks.clear();
|
||||
}
|
||||
|
||||
BarrierState::~BarrierState()
|
||||
@@ -805,17 +848,6 @@ namespace crucible {
|
||||
release();
|
||||
}
|
||||
|
||||
BarrierLock::BarrierLock(shared_ptr<BarrierState> pbs) :
|
||||
m_barrier_state(pbs)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
BarrierLock::release()
|
||||
{
|
||||
m_barrier_state.reset();
|
||||
}
|
||||
|
||||
void
|
||||
BarrierState::insert_task(Task t)
|
||||
{
|
||||
@@ -823,122 +855,69 @@ namespace crucible {
|
||||
m_tasks.insert(t);
|
||||
}
|
||||
|
||||
Barrier::Barrier() :
|
||||
m_barrier_state(make_shared<BarrierState>())
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
Barrier::insert_task(Task t)
|
||||
{
|
||||
m_barrier_state->insert_task(t);
|
||||
}
|
||||
|
||||
BarrierLock
|
||||
Barrier::lock()
|
||||
{
|
||||
return BarrierLock(m_barrier_state);
|
||||
}
|
||||
|
||||
class ExclusionState {
|
||||
mutex m_mutex;
|
||||
bool m_locked = false;
|
||||
Task m_task;
|
||||
|
||||
public:
|
||||
ExclusionState(const string &title);
|
||||
~ExclusionState();
|
||||
void release();
|
||||
bool try_lock();
|
||||
void insert_task(Task t);
|
||||
};
|
||||
|
||||
Exclusion::Exclusion(shared_ptr<ExclusionState> pbs) :
|
||||
m_exclusion_state(pbs)
|
||||
{
|
||||
}
|
||||
|
||||
Exclusion::Exclusion(const string &title) :
|
||||
m_exclusion_state(make_shared<ExclusionState>(title))
|
||||
{
|
||||
}
|
||||
|
||||
ExclusionState::ExclusionState(const string &title) :
|
||||
m_task(title, [](){})
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
ExclusionState::release()
|
||||
Barrier::release()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_locked = false;
|
||||
m_task.run();
|
||||
m_barrier_state.reset();
|
||||
}
|
||||
|
||||
ExclusionState::~ExclusionState()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
ExclusionLock::ExclusionLock(shared_ptr<ExclusionState> pbs) :
|
||||
m_exclusion_state(pbs)
|
||||
ExclusionLock::ExclusionLock(shared_ptr<Task> owner) :
|
||||
m_owner(owner)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
ExclusionLock::release()
|
||||
{
|
||||
if (m_exclusion_state) {
|
||||
m_exclusion_state->release();
|
||||
m_exclusion_state.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ExclusionLock::~ExclusionLock()
|
||||
{
|
||||
release();
|
||||
m_owner.reset();
|
||||
}
|
||||
|
||||
void
|
||||
ExclusionState::insert_task(Task task)
|
||||
Exclusion::insert_task(const Task &task)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (m_locked) {
|
||||
const auto sp = m_owner.lock();
|
||||
lock.unlock();
|
||||
if (sp) {
|
||||
// If Exclusion is locked then queue task for release;
|
||||
m_task.append(task);
|
||||
sp->append(task);
|
||||
} else {
|
||||
// otherwise, run the inserted task immediately
|
||||
task.run();
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ExclusionState::try_lock()
|
||||
ExclusionLock
|
||||
Exclusion::try_lock(const Task &task)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (m_locked) {
|
||||
return false;
|
||||
const auto sp = m_owner.lock();
|
||||
if (sp) {
|
||||
if (task) {
|
||||
sp->append(task);
|
||||
}
|
||||
return ExclusionLock();
|
||||
} else {
|
||||
m_locked = true;
|
||||
return true;
|
||||
const auto rv = make_shared<Task>(task);
|
||||
m_owner = rv;
|
||||
return ExclusionLock(rv);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Exclusion::insert_task(Task t)
|
||||
{
|
||||
m_exclusion_state->insert_task(t);
|
||||
}
|
||||
|
||||
ExclusionLock::operator bool() const
|
||||
{
|
||||
return !!m_exclusion_state;
|
||||
return !!m_owner;
|
||||
}
|
||||
|
||||
ExclusionLock
|
||||
Exclusion::try_lock()
|
||||
{
|
||||
THROW_CHECK0(runtime_error, m_exclusion_state);
|
||||
if (m_exclusion_state->try_lock()) {
|
||||
return ExclusionLock(m_exclusion_state);
|
||||
} else {
|
||||
return ExclusionLock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
11
lib/uname.cc
Normal file
11
lib/uname.cc
Normal file
@@ -0,0 +1,11 @@
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/uname.h"
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
Uname::Uname()
|
||||
{
|
||||
DIE_IF_NON_ZERO(uname(static_cast<utsname*>(this)));
|
||||
}
|
||||
}
|
@@ -10,4 +10,4 @@ CCFLAGS = -Wall -Wextra -Werror -O3
|
||||
CCFLAGS += -I../include -D_FILE_OFFSET_BITS=64
|
||||
|
||||
BEES_CFLAGS = $(CCFLAGS) -std=c99 $(CFLAGS)
|
||||
BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast $(CXXFLAGS)
|
||||
BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast -Wno-missing-field-initializers $(CXXFLAGS)
|
||||
|
@@ -15,7 +15,7 @@ readonly AL128K="$((128*1024))"
|
||||
readonly AL16M="$((16*1024*1024))"
|
||||
readonly CONFIG_DIR=@ETC_PREFIX@/bees/
|
||||
|
||||
readonly bees_bin=$(realpath @LIBEXEC_PREFIX@/bees)
|
||||
readonly bees_bin=$(realpath @DESTDIR@/@LIBEXEC_PREFIX@/bees)
|
||||
|
||||
command -v "$bees_bin" &> /dev/null || ERRO "Missing 'bees' agent"
|
||||
|
||||
@@ -31,20 +31,18 @@ help(){
|
||||
exec "$bees_bin" --help
|
||||
}
|
||||
|
||||
get_bees_supp_opts(){
|
||||
"$bees_bin" --help |& awk '/--../ { gsub( ",", "" ); print $1 " " $2}'
|
||||
}
|
||||
|
||||
SUPPORTED_ARGS=(
|
||||
$(get_bees_supp_opts)
|
||||
)
|
||||
for i in $("$bees_bin" --help 2>&1 | grep -E " --" | sed -e "s/^[^-]*-/-/" -e "s/,[^-]*--/ --/" -e "s/ [^-]*$//")
|
||||
do
|
||||
TMP_ARGS="$TMP_ARGS $i"
|
||||
done
|
||||
IFS=" " read -r -a SUPPORTED_ARGS <<< $TMP_ARGS
|
||||
NOT_SUPPORTED_ARGS=()
|
||||
ARGUMENTS=()
|
||||
|
||||
for arg in "${@}"; do
|
||||
supp=false
|
||||
for supp_arg in "${SUPPORTED_ARGS[@]}"; do
|
||||
if [ "$arg" == "$supp_arg" ]; then
|
||||
if [[ "$arg" == ${supp_arg}* ]]; then
|
||||
supp=true
|
||||
break
|
||||
fi
|
||||
@@ -73,7 +71,7 @@ done
|
||||
[ -z "$UUID" ] && help
|
||||
|
||||
|
||||
FILE_CONFIG="$(egrep -l '^[^#]*UUID\s*=\s*"?'"$UUID" "$CONFIG_DIR"/*.conf | head -1)"
|
||||
FILE_CONFIG="$(grep -E -l '^[^#]*UUID\s*=\s*"?'"$UUID" "$CONFIG_DIR"/*.conf | head -1)"
|
||||
[ ! -f "$FILE_CONFIG" ] && ERRO "No config for $UUID"
|
||||
INFO "Find $UUID in $FILE_CONFIG, use as conf"
|
||||
source "$FILE_CONFIG"
|
||||
@@ -130,7 +128,7 @@ fi
|
||||
fi
|
||||
if (( "$OLD_SIZE" != "$NEW_SIZE" )); then
|
||||
INFO "Resize db: $OLD_SIZE -> $NEW_SIZE"
|
||||
[ -f "$BEESHOME/beescrawl.$UUID.dat" ] && rm "$BEESHOME/beescrawl.$UUID.dat"
|
||||
rm -f "$BEESHOME/beescrawl.dat"
|
||||
truncate -s $NEW_SIZE $DB_PATH
|
||||
fi
|
||||
chmod 700 "$DB_PATH"
|
||||
|
@@ -17,6 +17,7 @@ KillSignal=SIGTERM
|
||||
MemoryAccounting=true
|
||||
Nice=19
|
||||
Restart=on-abnormal
|
||||
RuntimeDirectory=bees
|
||||
StartupCPUWeight=25
|
||||
StartupIOWeight=25
|
||||
|
||||
|
29
src/Makefile
29
src/Makefile
@@ -1,11 +1,6 @@
|
||||
BEES = ../bin/bees
|
||||
PROGRAMS = \
|
||||
../bin/fiemap \
|
||||
../bin/fiewalk \
|
||||
|
||||
PROGRAM_OBJS = $(foreach b,$(PROGRAMS),$(patsubst ../bin/%,%.o,$(b)))
|
||||
|
||||
all: $(BEES) $(PROGRAMS)
|
||||
all: $(BEES)
|
||||
|
||||
include ../makeflags
|
||||
-include ../localconf
|
||||
@@ -25,25 +20,18 @@ BEES_OBJS = \
|
||||
|
||||
ALL_OBJS = $(BEES_OBJS) $(PROGRAM_OBJS)
|
||||
|
||||
bees-version.c: bees.h $(BEES_OBJS:.o=.cc) Makefile
|
||||
echo "const char *BEES_VERSION = \"$(BEES_VERSION)\";" > bees-version.new.c
|
||||
mv -f bees-version.new.c bees-version.c
|
||||
bees-version.c: bees.h $(BEES_OBJS:.o=.cc) Makefile ../lib/libcrucible.a
|
||||
echo "const char *BEES_VERSION = \"$(BEES_VERSION)\";" > bees-version.c.new
|
||||
if ! [ -e "$@" ] || ! cmp -s "$@.new" "$@"; then mv -fv $@.new $@; fi
|
||||
|
||||
bees-usage.c: bees-usage.txt Makefile
|
||||
(echo 'const char *BEES_USAGE = '; sed -r 's/^(.*)$$/"\1\\n"/' < bees-usage.txt; echo ';') > bees-usage.new.c
|
||||
mv -f bees-usage.new.c bees-usage.c
|
||||
|
||||
.depends:
|
||||
mkdir -p $@
|
||||
|
||||
.depends/%.dep: %.cc Makefile | .depends
|
||||
%.dep: %.cc Makefile
|
||||
$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<
|
||||
|
||||
depends.mk: $(ALL_OBJS:%.o=.depends/%.dep)
|
||||
cat $^ > $@.new
|
||||
mv -f $@.new $@
|
||||
|
||||
include depends.mk
|
||||
include $(ALL_OBJS:%.o=%.dep)
|
||||
|
||||
%.o: %.c ../makeflags
|
||||
$(CC) $(BEES_CFLAGS) -o $@ -c $<
|
||||
@@ -51,11 +39,6 @@ include depends.mk
|
||||
%.o: %.cc ../makeflags
|
||||
$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<
|
||||
|
||||
$(PROGRAMS): ../bin/%: %.o
|
||||
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
$(PROGRAMS): ../lib/libcrucible.a
|
||||
|
||||
$(BEES): $(BEES_OBJS) bees-version.o bees-usage.o ../lib/libcrucible.a
|
||||
$(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
|
@@ -43,12 +43,13 @@ BeesFdCache::BeesFdCache(shared_ptr<BeesContext> ctx) :
|
||||
void
|
||||
BeesFdCache::clear()
|
||||
{
|
||||
BEESNOTE("Clearing root FD cache to enable subvol delete");
|
||||
BEESLOGDEBUG("Clearing root FD cache to enable subvol delete");
|
||||
BEESLOGDEBUG("Clearing root FD cache with size " << m_root_cache.size() << " to enable subvol delete");
|
||||
BEESNOTE("Clearing root FD cache with size " << m_root_cache.size());
|
||||
m_root_cache.clear();
|
||||
BEESCOUNT(root_clear);
|
||||
BEESLOGDEBUG("Clearing open FD cache to enable file delete");
|
||||
BEESNOTE("Clearing open FD cache to enable file delete");
|
||||
|
||||
BEESLOGDEBUG("Clearing open FD cache with size " << m_file_cache.size() << " to enable file delete");
|
||||
BEESNOTE("Clearing open FD cache with size " << m_file_cache.size());
|
||||
m_file_cache.clear();
|
||||
BEESCOUNT(open_clear);
|
||||
}
|
||||
@@ -84,11 +85,11 @@ BeesContext::dump_status()
|
||||
ofs << "RATES:\n";
|
||||
ofs << "\t" << avg_rates << "\n";
|
||||
|
||||
ofs << "THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers):\n";
|
||||
const auto load_stats = TaskMaster::get_current_load();
|
||||
ofs << "THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers, load: current " << load_stats.current_load << " target " << load_stats.thread_target << " average " << load_stats.loadavg << "):\n";
|
||||
for (auto t : BeesNote::get_status()) {
|
||||
ofs << "\ttid " << t.first << ": " << t.second << "\n";
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Huge amount of data, not a lot of information (yet)
|
||||
ofs << "WORKERS:\n";
|
||||
@@ -152,8 +153,8 @@ BeesContext::show_progress()
|
||||
BEESLOGINFO("\t" << deltaRates);
|
||||
|
||||
BEESNOTE("logging current thread status");
|
||||
BEESLOGINFO("THREADS:");
|
||||
|
||||
const auto load_stats = TaskMaster::get_current_load();
|
||||
BEESLOGINFO("THREADS (work queue " << TaskMaster::get_queue_count() << " of " << Task::instance_count() << " tasks, " << TaskMaster::get_thread_count() << " workers, load: current " << load_stats.current_load << " target " << load_stats.thread_target << " average " << load_stats.loadavg << "):");
|
||||
for (auto t : BeesNote::get_status()) {
|
||||
BEESLOGINFO("\ttid " << t.first << ": " << t.second);
|
||||
}
|
||||
@@ -187,29 +188,26 @@ BeesContext::is_root_ro(uint64_t root)
|
||||
}
|
||||
|
||||
bool
|
||||
BeesContext::dedup(const BeesRangePair &brp)
|
||||
BeesContext::dedup(const BeesRangePair &brp_in)
|
||||
{
|
||||
// TOOLONG and NOTE can retroactively fill in the filename details, but LOG can't
|
||||
BEESNOTE("dedup " << brp);
|
||||
BEESNOTE("dedup " << brp_in);
|
||||
|
||||
brp.second.fd(shared_from_this());
|
||||
|
||||
if (is_root_ro(brp.second.fid().root())) {
|
||||
// BEESLOGDEBUG("WORKAROUND: dst root is read-only in " << name_fd(brp.second.fd()));
|
||||
if (is_root_ro(brp_in.second.fid().root())) {
|
||||
// BEESLOGDEBUG("WORKAROUND: dst root " << (brp_in.second.fid().root()) << " is read-only);
|
||||
BEESCOUNT(dedup_workaround_btrfs_send);
|
||||
return false;
|
||||
}
|
||||
|
||||
auto brp = brp_in;
|
||||
brp.first.fd(shared_from_this());
|
||||
brp.second.fd(shared_from_this());
|
||||
|
||||
BEESTOOLONG("dedup " << brp);
|
||||
|
||||
BeesAddress first_addr(brp.first.fd(), brp.first.begin());
|
||||
BeesAddress second_addr(brp.second.fd(), brp.second.begin());
|
||||
|
||||
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||
|
||||
if (first_addr.get_physical_or_zero() == second_addr.get_physical_or_zero()) {
|
||||
BEESLOGTRACE("equal physical addresses in dedup");
|
||||
BEESCOUNT(bug_dedup_same_physical);
|
||||
@@ -219,8 +217,18 @@ BeesContext::dedup(const BeesRangePair &brp)
|
||||
THROW_CHECK1(invalid_argument, brp, brp.first.size() == brp.second.size());
|
||||
|
||||
BEESCOUNT(dedup_try);
|
||||
|
||||
BEESNOTE("waiting to dedup " << brp);
|
||||
const auto lock = MultiLocker::get_lock("dedupe");
|
||||
|
||||
Timer dedup_timer;
|
||||
bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
|
||||
|
||||
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||
BEESNOTE("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||
|
||||
const bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
|
||||
BEESCOUNTADD(dedup_ms, dedup_timer.age() * 1000);
|
||||
|
||||
if (rv) {
|
||||
@@ -292,6 +300,15 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
BEESTRACE("scan extent " << e);
|
||||
BEESCOUNT(scan_extent);
|
||||
|
||||
// EXPERIMENT: Don't bother with tiny extents unless they are the entire file.
|
||||
// We'll take a tiny extent at BOF or EOF but not in between.
|
||||
if (e.begin() && e.size() < 128 * 1024 && e.end() != Stat(bfr.fd()).st_size) {
|
||||
BEESCOUNT(scan_extent_tiny);
|
||||
// This doesn't work properly with the current architecture,
|
||||
// so we don't do an early return here.
|
||||
// return bfr;
|
||||
}
|
||||
|
||||
// We keep moving this method around
|
||||
auto m_ctx = shared_from_this();
|
||||
|
||||
@@ -317,29 +334,23 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
if (e.flags() & Extent::PREALLOC) {
|
||||
// Prealloc is all zero and we replace it with a hole.
|
||||
// No special handling is required here. Nuke it and move on.
|
||||
Task(
|
||||
"dedup_prealloc",
|
||||
[m_ctx, bfr, e]() {
|
||||
BEESLOGINFO("prealloc extent " << e);
|
||||
// Must not extend past EOF
|
||||
auto extent_size = min(e.end(), bfr.file_size()) - e.begin();
|
||||
// Must hold tmpfile until dedupe is done
|
||||
auto tmpfile = m_ctx->tmpfile();
|
||||
BeesFileRange prealloc_bfr(tmpfile->make_hole(extent_size));
|
||||
// Apparently they can both extend past EOF
|
||||
BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
|
||||
BeesRangePair brp(prealloc_bfr, copy_bfr);
|
||||
// Raw dedupe here - nothing else to do with this extent, nothing to merge with
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(dedup_prealloc_hit);
|
||||
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
||||
// return bfr;
|
||||
} else {
|
||||
BEESCOUNT(dedup_prealloc_miss);
|
||||
}
|
||||
}
|
||||
).run();
|
||||
return bfr; // if dedupe success, which we now blindly assume
|
||||
BEESLOGINFO("prealloc extent " << e);
|
||||
// Must not extend past EOF
|
||||
auto extent_size = min(e.end(), bfr.file_size()) - e.begin();
|
||||
// Must hold tmpfile until dedupe is done
|
||||
const auto tmpfile = m_ctx->tmpfile();
|
||||
BeesFileRange prealloc_bfr(tmpfile->make_hole(extent_size));
|
||||
// Apparently they can both extend past EOF
|
||||
BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
|
||||
BeesRangePair brp(prealloc_bfr, copy_bfr);
|
||||
// Raw dedupe here - nothing else to do with this extent, nothing to merge with
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(dedup_prealloc_hit);
|
||||
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
||||
return bfr;
|
||||
} else {
|
||||
BEESCOUNT(dedup_prealloc_miss);
|
||||
}
|
||||
}
|
||||
|
||||
// OK we need to read extent now
|
||||
@@ -591,57 +602,6 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
BEESCOUNT(scan_zero_compressed);
|
||||
}
|
||||
|
||||
// Turning this off because it's a waste of time on small extents
|
||||
// and it's incorrect for large extents.
|
||||
#if 0
|
||||
// If the extent contains obscured blocks, and we can find no
|
||||
// other refs to the extent that reveal those blocks, nuke the incoming extent.
|
||||
// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
|
||||
// because we can't make extents that large with dedupe.
|
||||
// Don't rewrite small extents because it is a waste of time without being
|
||||
// able to combine them into bigger extents.
|
||||
if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {
|
||||
BEESCOUNT(scan_obscured);
|
||||
BEESNOTE("obscured extent " << e);
|
||||
// We have to map all the source blocks to see if any of them
|
||||
// (or all of them aggregated) provide a path through the FS to the blocks
|
||||
BeesResolver br(m_ctx, BeesAddress(e, e.begin()));
|
||||
BeesBlockData ref_bbd(bfr.fd(), bfr.begin(), min(BLOCK_SIZE_SUMS, bfr.size()));
|
||||
// BEESLOG("ref_bbd " << ref_bbd);
|
||||
auto bfr_set = br.find_all_matches(ref_bbd);
|
||||
bool non_obscured_extent_found = false;
|
||||
set<off_t> blocks_to_find;
|
||||
for (off_t j = 0; j < e.physical_len(); j += BLOCK_SIZE_CLONE) {
|
||||
blocks_to_find.insert(j);
|
||||
}
|
||||
// Don't bother if saving less than 1%
|
||||
auto maximum_hidden_count = blocks_to_find.size() / 100;
|
||||
for (auto i : bfr_set) {
|
||||
BtrfsExtentWalker ref_ew(bfr.fd(), bfr.begin(), m_ctx->root_fd());
|
||||
Extent ref_e = ref_ew.current();
|
||||
// BEESLOG("\tref_e " << ref_e);
|
||||
THROW_CHECK2(out_of_range, ref_e, e, ref_e.offset() + ref_e.logical_len() <= e.physical_len());
|
||||
for (off_t j = ref_e.offset(); j < ref_e.offset() + ref_e.logical_len(); j += BLOCK_SIZE_CLONE) {
|
||||
blocks_to_find.erase(j);
|
||||
}
|
||||
if (blocks_to_find.size() <= maximum_hidden_count) {
|
||||
BEESCOUNT(scan_obscured_miss);
|
||||
BEESLOG("Found references to all but " << blocks_to_find.size() << " blocks");
|
||||
non_obscured_extent_found = true;
|
||||
break;
|
||||
} else {
|
||||
BEESCOUNT(scan_obscured_hit);
|
||||
// BEESLOG("blocks_to_find: " << blocks_to_find.size() << " from " << *blocks_to_find.begin() << ".." << *blocks_to_find.rbegin());
|
||||
}
|
||||
}
|
||||
if (!non_obscured_extent_found) {
|
||||
// BEESLOG("No non-obscured extents found");
|
||||
rewrite_extent = true;
|
||||
BEESCOUNT(scan_obscured_rewrite);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// If we deduped any blocks then we must rewrite the remainder of the extent
|
||||
if (!noinsert_set.empty()) {
|
||||
rewrite_extent = true;
|
||||
@@ -708,27 +668,34 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
BEESLOGINFO("scan: " << pretty(e.size()) << " " << to_hex(e.begin()) << " [" << bar << "] " << to_hex(e.end()) << ' ' << name_fd(bfr.fd()));
|
||||
}
|
||||
|
||||
// Costs 10% on benchmarks
|
||||
// bees_unreadahead(bfr.fd(), bfr.begin(), bfr.size());
|
||||
return bfr;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesContext::scan_forward(const BeesFileRange &bfr)
|
||||
shared_ptr<Exclusion>
|
||||
BeesContext::get_inode_mutex(const uint64_t inode)
|
||||
{
|
||||
// What are we doing here?
|
||||
BEESTRACE("scan_forward " << bfr);
|
||||
return m_inode_locks(inode);
|
||||
}
|
||||
|
||||
bool
|
||||
BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
{
|
||||
BEESTRACE("scan_forward " << bfr_in);
|
||||
BEESCOUNT(scan_forward);
|
||||
|
||||
Timer scan_timer;
|
||||
|
||||
// Silently filter out blacklisted files
|
||||
if (is_blacklisted(bfr.fid())) {
|
||||
if (is_blacklisted(bfr_in.fid())) {
|
||||
BEESCOUNT(scan_blacklisted);
|
||||
return bfr;
|
||||
return false;
|
||||
}
|
||||
|
||||
BEESNOTE("scan open " << bfr);
|
||||
|
||||
// Reconstitute FD
|
||||
BEESNOTE("scan open " << bfr_in);
|
||||
auto bfr = bfr_in;
|
||||
bfr.fd(shared_from_this());
|
||||
|
||||
BEESNOTE("scan extent " << bfr);
|
||||
@@ -737,31 +704,35 @@ BeesContext::scan_forward(const BeesFileRange &bfr)
|
||||
if (!bfr.fd()) {
|
||||
// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
|
||||
BEESCOUNT(scan_no_fd);
|
||||
return bfr;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
if (bfr.begin() >= bfr.file_size()) {
|
||||
BEESLOGWARN("past EOF: " << bfr);
|
||||
BEESCOUNT(scan_eof);
|
||||
return bfr;
|
||||
return false;
|
||||
}
|
||||
|
||||
BtrfsExtentWalker ew(bfr.fd(), bfr.begin(), root_fd());
|
||||
|
||||
BeesFileRange return_bfr(bfr);
|
||||
|
||||
Extent e;
|
||||
bool start_over = false;
|
||||
catch_all([&]() {
|
||||
while (!stop_requested()) {
|
||||
while (!stop_requested() && !start_over) {
|
||||
e = ew.current();
|
||||
|
||||
catch_all([&]() {
|
||||
uint64_t extent_bytenr = e.bytenr();
|
||||
BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr));
|
||||
auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr);
|
||||
auto extent_mutex = m_extent_locks(extent_bytenr);
|
||||
const auto extent_lock = extent_mutex->try_lock(Task::current_task());
|
||||
if (!extent_lock) {
|
||||
// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
|
||||
BEESCOUNT(scanf_deferred_extent);
|
||||
start_over = true;
|
||||
}
|
||||
Timer one_extent_timer;
|
||||
return_bfr = scan_one_extent(bfr, e);
|
||||
scan_one_extent(bfr, e);
|
||||
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
|
||||
BEESCOUNT(scanf_extent);
|
||||
});
|
||||
@@ -779,51 +750,13 @@ BeesContext::scan_forward(const BeesFileRange &bfr)
|
||||
BEESCOUNTADD(scanf_total_ms, scan_timer.age() * 1000);
|
||||
BEESCOUNT(scanf_total);
|
||||
|
||||
return return_bfr;
|
||||
return start_over;
|
||||
}
|
||||
|
||||
BeesResolveAddrResult::BeesResolveAddrResult()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::wait_for_balance()
|
||||
{
|
||||
if (!BEES_SERIALIZE_BALANCE) {
|
||||
return;
|
||||
}
|
||||
|
||||
Timer balance_timer;
|
||||
BEESNOTE("WORKAROUND: waiting for balance to stop");
|
||||
while (true) {
|
||||
btrfs_ioctl_balance_args args;
|
||||
memset_zero<btrfs_ioctl_balance_args>(&args);
|
||||
const int ret = ioctl(root_fd(), BTRFS_IOC_BALANCE_PROGRESS, &args);
|
||||
if (ret < 0) {
|
||||
// Either can't get balance status or not running, exit either way
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(args.state & BTRFS_BALANCE_STATE_RUNNING)) {
|
||||
// Balance not running, doesn't matter if paused or cancelled
|
||||
break;
|
||||
}
|
||||
|
||||
BEESLOGDEBUG("WORKAROUND: Waiting " << balance_timer << "s for balance to stop");
|
||||
unique_lock<mutex> lock(m_abort_mutex);
|
||||
if (m_abort_requested) {
|
||||
// Force the calling function to stop. We cannot
|
||||
// proceed to LOGICAL_INO while balance is running
|
||||
// until the bugs are fixed, and it's probably
|
||||
// not going to be particularly fast to have
|
||||
// both bees and balance banging the disk anyway.
|
||||
BeesTracer::set_silent();
|
||||
throw std::runtime_error("Stop requested while balance running");
|
||||
}
|
||||
m_abort_condvar.wait_for(lock, chrono::duration<double>(BEES_BALANCE_POLL_INTERVAL));
|
||||
}
|
||||
}
|
||||
|
||||
BeesResolveAddrResult
|
||||
BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
{
|
||||
@@ -835,37 +768,21 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
// transaction latency, competing threads, and freeze/SIGSTOP
|
||||
// pausing the bees process.
|
||||
|
||||
// There can be only one of these running at a time, or some lingering
|
||||
// backref bug will kill the whole system. Also it looks like there
|
||||
// are so many locks held while LOGICAL_INO runs that there is no
|
||||
// point in trying to run two of them on the same filesystem.
|
||||
// ...but it works most of the time, and the performance hit from
|
||||
// not running resolve in multiple threads is significant.
|
||||
// But "most of the time" really just means "between forced reboots",
|
||||
// and with recent improvements in kernel uptime, this is now in the
|
||||
// top 3 crash causes.
|
||||
static mutex s_resolve_mutex;
|
||||
unique_lock<mutex> lock(s_resolve_mutex, defer_lock);
|
||||
if (BEES_SERIALIZE_RESOLVE) {
|
||||
BEESNOTE("waiting to resolve addr " << addr);
|
||||
lock.lock();
|
||||
}
|
||||
|
||||
// Is there a bug where resolve and balance cause a crash (BUG_ON at fs/btrfs/ctree.c:1227)?
|
||||
// Apparently yes, and more than one.
|
||||
// Wait for the balance to finish before we run LOGICAL_INO
|
||||
wait_for_balance();
|
||||
BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
|
||||
|
||||
// Time how long this takes
|
||||
Timer resolve_timer;
|
||||
|
||||
BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero());
|
||||
|
||||
// Get this thread's system CPU usage
|
||||
struct rusage usage_before;
|
||||
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_before));
|
||||
|
||||
{
|
||||
BEESNOTE("waiting to resolve addr " << addr << " with LOGICAL_INO");
|
||||
const auto lock = MultiLocker::get_lock("logical_ino");
|
||||
|
||||
// Get this thread's system CPU usage
|
||||
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_before));
|
||||
|
||||
// Restart timer now that we're no longer waiting for lock
|
||||
resolve_timer.reset();
|
||||
BEESTOOLONG("Resolving addr " << addr << " in " << root_path() << " refs " << log_ino.m_iors.size());
|
||||
BEESNOTE("resolving addr " << addr << " with LOGICAL_INO");
|
||||
if (log_ino.do_ioctl_nothrow(root_fd())) {
|
||||
@@ -880,22 +797,22 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
struct rusage usage_after;
|
||||
DIE_IF_MINUS_ONE(getrusage(RUSAGE_THREAD, &usage_after));
|
||||
|
||||
double sys_usage_delta =
|
||||
const double sys_usage_delta =
|
||||
(usage_after.ru_stime.tv_sec + usage_after.ru_stime.tv_usec / 1000000.0) -
|
||||
(usage_before.ru_stime.tv_sec + usage_before.ru_stime.tv_usec / 1000000.0);
|
||||
|
||||
double user_usage_delta =
|
||||
const double user_usage_delta =
|
||||
(usage_after.ru_utime.tv_sec + usage_after.ru_utime.tv_usec / 1000000.0) -
|
||||
(usage_before.ru_utime.tv_sec + usage_before.ru_utime.tv_usec / 1000000.0);
|
||||
|
||||
auto rt_age = resolve_timer.age();
|
||||
const auto rt_age = resolve_timer.age();
|
||||
|
||||
BeesResolveAddrResult rv;
|
||||
|
||||
// Avoid performance problems - pretend resolve failed if there are too many refs
|
||||
const size_t rv_count = log_ino.m_iors.size();
|
||||
if (rv_count < BEES_MAX_EXTENT_REF_COUNT) {
|
||||
rv.m_biors = log_ino.m_iors;
|
||||
rv.m_biors = vector<BtrfsInodeOffsetRoot>(log_ino.m_iors.begin(), log_ino.m_iors.end());
|
||||
} else {
|
||||
BEESLOGINFO("addr " << addr << " refs " << rv_count << " overflows configured ref limit " << BEES_MAX_EXTENT_REF_COUNT);
|
||||
BEESCOUNT(resolve_overflow);
|
||||
@@ -912,12 +829,13 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
|
||||
|
||||
// Count how many times this happens so we can figure out how
|
||||
// important this case is
|
||||
static size_t most_refs_ever = 2730;
|
||||
static const size_t max_logical_ino_v1_refs = 2730; // (65536 - header_len) / (sizeof(uint64_t) * 3)
|
||||
static size_t most_refs_ever = max_logical_ino_v1_refs;
|
||||
if (rv_count > most_refs_ever) {
|
||||
BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever);
|
||||
most_refs_ever = rv_count;
|
||||
}
|
||||
if (rv_count > 2730) {
|
||||
if (rv_count > max_logical_ino_v1_refs) {
|
||||
BEESCOUNT(resolve_large);
|
||||
}
|
||||
|
||||
@@ -937,6 +855,14 @@ BeesContext::invalidate_addr(BeesAddress addr)
|
||||
return m_resolve_cache.expire(addr.get_physical_or_zero());
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::resolve_cache_clear()
|
||||
{
|
||||
BEESNOTE("clearing resolve cache with size " << m_resolve_cache.size());
|
||||
BEESLOGDEBUG("Clearing resolve cache with size " << m_resolve_cache.size());
|
||||
return m_resolve_cache.clear();
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::set_root_fd(Fd fd)
|
||||
{
|
||||
@@ -956,18 +882,21 @@ BeesContext::set_root_fd(Fd fd)
|
||||
});
|
||||
}
|
||||
|
||||
const char *
|
||||
BeesHalt::what() const noexcept
|
||||
{
|
||||
return "bees stop requested";
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::start()
|
||||
{
|
||||
BEESLOGNOTICE("Starting bees main loop...");
|
||||
BEESNOTE("starting BeesContext");
|
||||
|
||||
m_extent_locks.func([](uint64_t bytenr) {
|
||||
return make_shared<Exclusion>();
|
||||
(void)bytenr;
|
||||
});
|
||||
m_inode_locks.func([](const uint64_t fid) {
|
||||
return make_shared<Exclusion>();
|
||||
(void)fid;
|
||||
});
|
||||
m_progress_thread = make_shared<BeesThread>("progress_report");
|
||||
m_progress_thread = make_shared<BeesThread>("progress_report");
|
||||
m_status_thread = make_shared<BeesThread>("status_report");
|
||||
m_progress_thread->exec([=]() {
|
||||
@@ -1002,17 +931,37 @@ BeesContext::stop()
|
||||
Timer stop_timer;
|
||||
BEESLOGNOTICE("Stopping bees...");
|
||||
|
||||
BEESNOTE("aborting blocked tasks");
|
||||
BEESLOGDEBUG("Aborting blocked tasks");
|
||||
unique_lock<mutex> abort_lock(m_abort_mutex);
|
||||
m_abort_requested = true;
|
||||
m_abort_condvar.notify_all();
|
||||
abort_lock.unlock();
|
||||
|
||||
// Stop TaskConsumers without hurting the Task objects that carry the Crawl state
|
||||
BEESNOTE("pausing work queue");
|
||||
BEESLOGDEBUG("Pausing work queue");
|
||||
TaskMaster::set_thread_count(0);
|
||||
TaskMaster::pause();
|
||||
|
||||
// Stop crawlers first so we get good progress persisted on disk
|
||||
BEESNOTE("stopping crawlers and flushing crawl state");
|
||||
BEESLOGDEBUG("Stopping crawlers and flushing crawl state");
|
||||
if (m_roots) {
|
||||
m_roots->stop_request();
|
||||
} else {
|
||||
BEESLOGDEBUG("Crawlers not running");
|
||||
}
|
||||
|
||||
BEESNOTE("stopping and flushing hash table");
|
||||
BEESLOGDEBUG("Stopping and flushing hash table");
|
||||
if (m_hash_table) {
|
||||
m_hash_table->stop_request();
|
||||
} else {
|
||||
BEESLOGDEBUG("Hash table not running");
|
||||
}
|
||||
|
||||
// Wait for crawler writeback to finish
|
||||
BEESNOTE("waiting for crawlers to stop");
|
||||
BEESLOGDEBUG("Waiting for crawlers to stop");
|
||||
if (m_roots) {
|
||||
m_roots->stop_wait();
|
||||
}
|
||||
|
||||
// It is now no longer possible to update progress in $BEESHOME,
|
||||
// so we can destroy Tasks with reckless abandon.
|
||||
BEESNOTE("setting stop_request flag");
|
||||
BEESLOGDEBUG("Setting stop_request flag");
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
@@ -1020,49 +969,20 @@ BeesContext::stop()
|
||||
m_stop_condvar.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
// Stop crawlers first so we get good progress persisted on disk
|
||||
BEESNOTE("stopping crawlers");
|
||||
BEESLOGDEBUG("Stopping crawlers");
|
||||
if (m_roots) {
|
||||
m_roots->stop();
|
||||
m_roots.reset();
|
||||
} else {
|
||||
BEESLOGDEBUG("Crawlers not running");
|
||||
}
|
||||
|
||||
BEESNOTE("cancelling work queue");
|
||||
BEESLOGDEBUG("Cancelling work queue");
|
||||
TaskMaster::cancel();
|
||||
|
||||
BEESNOTE("stopping hash table");
|
||||
BEESLOGDEBUG("Stopping hash table");
|
||||
// Wait for hash table flush to complete
|
||||
BEESNOTE("waiting for hash table flush to stop");
|
||||
BEESLOGDEBUG("waiting for hash table flush to stop");
|
||||
if (m_hash_table) {
|
||||
m_hash_table->stop();
|
||||
m_hash_table.reset();
|
||||
} else {
|
||||
BEESLOGDEBUG("Hash table not running");
|
||||
m_hash_table->stop_wait();
|
||||
}
|
||||
|
||||
BEESNOTE("closing tmpfiles");
|
||||
BEESLOGDEBUG("Closing tmpfiles");
|
||||
m_tmpfile_pool.clear();
|
||||
// Write status once with this message...
|
||||
BEESNOTE("stopping status thread at " << stop_timer << " sec");
|
||||
lock.lock();
|
||||
m_stop_condvar.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
BEESNOTE("closing FD caches");
|
||||
BEESLOGDEBUG("Closing FD caches");
|
||||
if (m_fd_cache) {
|
||||
m_fd_cache->clear();
|
||||
BEESNOTE("destroying FD caches");
|
||||
BEESLOGDEBUG("Destroying FD caches");
|
||||
m_fd_cache.reset();
|
||||
}
|
||||
|
||||
BEESNOTE("waiting for progress thread");
|
||||
BEESLOGDEBUG("Waiting for progress thread");
|
||||
m_progress_thread->join();
|
||||
|
||||
// XXX: nobody can see this BEESNOTE because we are killing the
|
||||
// thread that publishes it
|
||||
BEESNOTE("waiting for status thread");
|
||||
// then wake the thread up one more time to exit the while loop
|
||||
BEESLOGDEBUG("Waiting for status thread");
|
||||
lock.lock();
|
||||
m_stop_status = true;
|
||||
@@ -1071,6 +991,9 @@ BeesContext::stop()
|
||||
m_status_thread->join();
|
||||
|
||||
BEESLOGNOTICE("bees stopped in " << stop_timer << " sec");
|
||||
|
||||
// Skip all destructors, do not pass GO, do not collect atexit() functions
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -1111,13 +1034,7 @@ shared_ptr<BeesTempFile>
|
||||
BeesContext::tmpfile()
|
||||
{
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
|
||||
if (m_stop_requested) {
|
||||
throw BeesHalt();
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
|
||||
return m_tmpfile_pool();
|
||||
}
|
||||
|
||||
@@ -1125,9 +1042,6 @@ shared_ptr<BeesFdCache>
|
||||
BeesContext::fd_cache()
|
||||
{
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
throw BeesHalt();
|
||||
}
|
||||
if (!m_fd_cache) {
|
||||
m_fd_cache = make_shared<BeesFdCache>(shared_from_this());
|
||||
}
|
||||
@@ -1138,9 +1052,6 @@ shared_ptr<BeesRoots>
|
||||
BeesContext::roots()
|
||||
{
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
throw BeesHalt();
|
||||
}
|
||||
if (!m_roots) {
|
||||
m_roots = make_shared<BeesRoots>(shared_from_this());
|
||||
}
|
||||
@@ -1151,9 +1062,6 @@ shared_ptr<BeesHashTable>
|
||||
BeesContext::hash_table()
|
||||
{
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
throw BeesHalt();
|
||||
}
|
||||
if (!m_hash_table) {
|
||||
m_hash_table = make_shared<BeesHashTable>(shared_from_this(), "beeshash.dat");
|
||||
}
|
||||
|
@@ -3,9 +3,9 @@
|
||||
#include "crucible/city.h"
|
||||
#include "crucible/crc64.h"
|
||||
#include "crucible/string.h"
|
||||
#include "crucible/uname.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
@@ -106,12 +106,6 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
||||
BEESNOTE("flushing extent #" << extent_index << " of " << m_extents << " extents");
|
||||
|
||||
auto lock = lock_extent_by_index(extent_index);
|
||||
|
||||
// Not dirty, nothing to do
|
||||
if (!m_extent_metadata.at(extent_index).m_dirty) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool wrote_extent = false;
|
||||
|
||||
catch_all([&]() {
|
||||
@@ -123,10 +117,7 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
||||
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||
// Copy the extent because we might be stuck writing for a while
|
||||
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
|
||||
|
||||
// Mark extent non-dirty while we still hold the lock
|
||||
m_extent_metadata.at(extent_index).m_dirty = false;
|
||||
ByteVector extent_copy(dirty_extent, dirty_extent_end);
|
||||
|
||||
// Release the lock
|
||||
lock.unlock();
|
||||
@@ -139,6 +130,10 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
||||
// const size_t dirty_extent_size = dirty_extent_end - dirty_extent;
|
||||
// bees_unreadahead(m_fd, dirty_extent_offset, dirty_extent_size);
|
||||
|
||||
// Mark extent clean if write was successful
|
||||
lock.lock();
|
||||
m_extent_metadata.at(extent_index).m_dirty = false;
|
||||
|
||||
wrote_extent = true;
|
||||
});
|
||||
|
||||
@@ -152,25 +147,28 @@ BeesHashTable::flush_dirty_extents(bool slowly)
|
||||
|
||||
uint64_t wrote_extents = 0;
|
||||
for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) {
|
||||
// Skip the clean ones
|
||||
auto lock = lock_extent_by_index(extent_index);
|
||||
if (!m_extent_metadata.at(extent_index).m_dirty) {
|
||||
continue;
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
if (flush_dirty_extent(extent_index)) {
|
||||
++wrote_extents;
|
||||
if (slowly) {
|
||||
if (m_stop_requested) {
|
||||
slowly = false;
|
||||
continue;
|
||||
}
|
||||
BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
|
||||
chrono::duration<double> sleep_time(m_flush_rate_limit.sleep_time(BLOCK_SIZE_HASHTAB_EXTENT));
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
BEESLOGDEBUG("Stop requested in hash table flush_dirty_extents");
|
||||
// This function is called by another thread with !slowly,
|
||||
// so we just get out of the way here.
|
||||
break;
|
||||
}
|
||||
m_stop_condvar.wait_for(lock, sleep_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!slowly) {
|
||||
BEESLOGINFO("Flushed " << wrote_extents << " of " << m_extents << " extents");
|
||||
}
|
||||
BEESLOGINFO("Flushed " << wrote_extents << " of " << m_extents << " hash table extents");
|
||||
return wrote_extents;
|
||||
}
|
||||
|
||||
@@ -204,10 +202,28 @@ BeesHashTable::writeback_loop()
|
||||
m_dirty_condvar.wait(lock);
|
||||
}
|
||||
}
|
||||
|
||||
// The normal loop exits at the end of one iteration when stop requested,
|
||||
// but stop request will be in the middle of the loop, and some extents
|
||||
// will still be dirty. Run the flush loop again to get those.
|
||||
BEESNOTE("flushing hash table, round 2");
|
||||
BEESLOGDEBUG("Flushing hash table");
|
||||
flush_dirty_extents(false);
|
||||
|
||||
// If there were any Tasks still running, they may have updated
|
||||
// some hash table pages during the second flush. These updates
|
||||
// will be lost. The Tasks will be repeated on the next run because
|
||||
// they were not completed prior to the stop request, and the
|
||||
// Crawl progress was already flushed out before the Hash table
|
||||
// started writing, so nothing is really lost here.
|
||||
|
||||
catch_all([&]() {
|
||||
// trigger writeback on our way out
|
||||
BEESTOOLONG("unreadahead hash table size " << pretty(m_size));
|
||||
bees_unreadahead(m_fd, 0, m_size);
|
||||
#if 0
|
||||
// seems to trigger huge latency spikes
|
||||
BEESTOOLONG("unreadahead hash table size " <<
|
||||
pretty(m_size)); bees_unreadahead(m_fd, 0, m_size);
|
||||
#endif
|
||||
});
|
||||
BEESLOGDEBUG("Exited hash table writeback_loop");
|
||||
}
|
||||
@@ -226,6 +242,7 @@ percent(size_t num, size_t den)
|
||||
void
|
||||
BeesHashTable::prefetch_loop()
|
||||
{
|
||||
Uname uname;
|
||||
bool not_locked = true;
|
||||
while (!m_stop_requested) {
|
||||
size_t width = 64;
|
||||
@@ -319,6 +336,7 @@ BeesHashTable::prefetch_loop()
|
||||
graph_blob << "Now: " << format_time(time(NULL)) << "\n";
|
||||
graph_blob << "Uptime: " << m_ctx->total_timer().age() << " seconds\n";
|
||||
graph_blob << "Version: " << BEES_VERSION << "\n";
|
||||
graph_blob << "Kernel: " << uname.sysname << " " << uname.release << " " << uname.machine << " " << uname.version << "\n";
|
||||
|
||||
graph_blob
|
||||
<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
|
||||
@@ -538,6 +556,8 @@ BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
||||
return found;
|
||||
}
|
||||
|
||||
thread_local uniform_int_distribution<size_t> BeesHashTable::tl_distribution(0, c_cells_per_bucket - 1);
|
||||
|
||||
/// Insert a hash entry at some unspecified point in the list.
|
||||
/// If entry is already present in list, returns true and does not
|
||||
/// modify list. If entry is not present in list, returns false and
|
||||
@@ -555,9 +575,7 @@ BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
||||
Cell *ip = find(er.first, er.second, mv);
|
||||
bool found = (ip < er.second);
|
||||
|
||||
thread_local default_random_engine generator;
|
||||
thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
|
||||
auto pos = distribution(generator);
|
||||
const auto pos = tl_distribution(bees_generator);
|
||||
|
||||
int case_cond = 0;
|
||||
#if 0
|
||||
@@ -789,7 +807,7 @@ BeesHashTable::~BeesHashTable()
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::stop()
|
||||
BeesHashTable::stop_request()
|
||||
{
|
||||
BEESNOTE("stopping BeesHashTable threads");
|
||||
BEESLOGDEBUG("Stopping BeesHashTable threads");
|
||||
@@ -803,7 +821,11 @@ BeesHashTable::stop()
|
||||
unique_lock<mutex> dirty_lock(m_dirty_mutex);
|
||||
m_dirty_condvar.notify_all();
|
||||
dirty_lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::stop_wait()
|
||||
{
|
||||
BEESNOTE("waiting for hash_prefetch thread");
|
||||
BEESLOGDEBUG("Waiting for hash_prefetch thread");
|
||||
m_prefetch_thread.join();
|
||||
@@ -812,11 +834,5 @@ BeesHashTable::stop()
|
||||
BEESLOGDEBUG("Waiting for hash_writeback thread");
|
||||
m_writeback_thread.join();
|
||||
|
||||
if (m_cell_ptr && m_size) {
|
||||
BEESLOGDEBUG("Flushing hash table");
|
||||
BEESNOTE("flushing hash table");
|
||||
flush_dirty_extents(false);
|
||||
}
|
||||
|
||||
BEESLOGDEBUG("BeesHashTable stopped");
|
||||
}
|
||||
|
@@ -385,14 +385,15 @@ BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFil
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr_in)
|
||||
{
|
||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr);
|
||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr_in);
|
||||
BEESCOUNT(replacedst_try);
|
||||
|
||||
// Open dst, reuse it for all src
|
||||
BEESNOTE("Opening dst bfr " << dst_bfr);
|
||||
BEESTRACE("Opening dst bfr " << dst_bfr);
|
||||
BEESNOTE("Opening dst bfr " << dst_bfr_in);
|
||||
BEESTRACE("Opening dst bfr " << dst_bfr_in);
|
||||
auto dst_bfr = dst_bfr_in;
|
||||
dst_bfr.fd(m_ctx);
|
||||
|
||||
BeesFileRange overlap_bfr;
|
||||
@@ -400,10 +401,11 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||
|
||||
BeesBlockData bbd(dst_bfr);
|
||||
|
||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
|
||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr_in) -> bool {
|
||||
// Open src
|
||||
BEESNOTE("Opening src bfr " << src_bfr);
|
||||
BEESTRACE("Opening src bfr " << src_bfr);
|
||||
BEESNOTE("Opening src bfr " << src_bfr_in);
|
||||
BEESTRACE("Opening src bfr " << src_bfr_in);
|
||||
auto src_bfr = src_bfr_in;
|
||||
src_bfr.fd(m_ctx);
|
||||
|
||||
if (dst_bfr.overlaps(src_bfr)) {
|
||||
@@ -418,7 +420,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||
BEESCOUNT(replacedst_same);
|
||||
// stop looping here, all the other srcs will probably fail this test too
|
||||
BeesTracer::set_silent();
|
||||
throw runtime_error("FIXME: bailing out here, need to fix this further up the call stack");
|
||||
throw runtime_error("FIXME: too many duplicate candidates, bailing out here");
|
||||
}
|
||||
|
||||
// Make pair(src, dst)
|
||||
|
1083
src/bees-roots.cc
1083
src/bees-roots.cc
File diff suppressed because it is too large
Load Diff
@@ -111,9 +111,7 @@ void
|
||||
BeesNote::set_name(const string &name)
|
||||
{
|
||||
tl_name = name;
|
||||
catch_all([&]() {
|
||||
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), name.c_str()));
|
||||
});
|
||||
pthread_setname(name);
|
||||
}
|
||||
|
||||
string
|
||||
@@ -134,19 +132,12 @@ BeesNote::get_name()
|
||||
}
|
||||
|
||||
// OK try the pthread name next.
|
||||
char buf[24];
|
||||
memset(buf, '\0', sizeof(buf));
|
||||
int err = pthread_getname_np(pthread_self(), buf, sizeof(buf));
|
||||
if (err) {
|
||||
return string("pthread_getname_np: ") + strerror(err);
|
||||
}
|
||||
buf[sizeof(buf) - 1] = '\0';
|
||||
|
||||
// thread_getname_np returns process name
|
||||
// ...by default? ...for the main thread?
|
||||
// ...except during exception handling?
|
||||
// ...randomly?
|
||||
return buf;
|
||||
return pthread_getname();
|
||||
}
|
||||
|
||||
BeesNote::ThreadStatusMap
|
||||
|
@@ -238,42 +238,6 @@ BeesFileRange::overlaps(const BeesFileRange &that) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
BeesFileRange::coalesce(const BeesFileRange &that)
|
||||
{
|
||||
// Let's define coalesce-with-null as identity,
|
||||
// and coalesce-null-with-null as coalesced
|
||||
if (!*this) {
|
||||
operator=(that);
|
||||
return true;
|
||||
}
|
||||
if (!that) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Can't coalesce different files
|
||||
if (!is_same_file(that)) return false;
|
||||
|
||||
pair<uint64_t, uint64_t> a(m_begin, m_end);
|
||||
pair<uint64_t, uint64_t> b(that.m_begin, that.m_end);
|
||||
|
||||
// range a starts lower than or equal b
|
||||
if (b.first < a.first) {
|
||||
swap(a, b);
|
||||
}
|
||||
|
||||
// if b starts within a, they overlap
|
||||
// (and the intersecting region is b.first..min(a.second, b.second))
|
||||
// (and the union region is a.first..max(a.second, b.second))
|
||||
if (b.first >= a.first && b.first < a.second) {
|
||||
m_begin = a.first;
|
||||
m_end = max(a.second, b.second);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
BeesFileRange::operator BeesBlockData() const
|
||||
{
|
||||
BEESTRACE("operator BeesBlockData " << *this);
|
||||
@@ -287,7 +251,7 @@ BeesFileRange::fd() const
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
|
||||
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx)
|
||||
{
|
||||
// If we don't have a fid we can't do much here
|
||||
if (m_fid) {
|
||||
|
64
src/bees.cc
64
src/bees.cc
@@ -215,38 +215,35 @@ BeesTooLong::operator=(const func_type &f)
|
||||
}
|
||||
|
||||
void
|
||||
bees_sync(int fd)
|
||||
{
|
||||
Timer sync_timer;
|
||||
BEESNOTE("syncing " << name_fd(fd));
|
||||
BEESTOOLONG("syncing " << name_fd(fd));
|
||||
DIE_IF_NON_ZERO(fsync(fd));
|
||||
BEESCOUNT(sync_count);
|
||||
BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
|
||||
}
|
||||
|
||||
void
|
||||
bees_readahead(int const fd, off_t offset, size_t size)
|
||||
bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||
{
|
||||
Timer readahead_timer;
|
||||
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
#if 0
|
||||
// In the kernel, readahead() is identical to posix_fadvise(..., POSIX_FADV_DONTNEED)
|
||||
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
||||
#if 0
|
||||
#else
|
||||
// Make sure this data is in page cache by brute force
|
||||
// This isn't necessary and it might even be slower
|
||||
// This isn't necessary and it might even be slower,
|
||||
// but the btrfs kernel code does readahead with lower ioprio
|
||||
// and might discard the readahead request entirely,
|
||||
// so it's maybe, *maybe*, worth doing both.
|
||||
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
while (size) {
|
||||
auto working_size = size;
|
||||
auto working_offset = offset;
|
||||
while (working_size) {
|
||||
// don't care about multithreaded writes to this buffer--it is garbage anyway
|
||||
static uint8_t dummy[BEES_READAHEAD_SIZE];
|
||||
size_t this_read_size = min(size, sizeof(dummy));
|
||||
// Ignore errors and short reads.
|
||||
// It turns out our size parameter isn't all that accurate.
|
||||
(void)!pread(fd, dummy, this_read_size, offset);
|
||||
const size_t this_read_size = min(working_size, sizeof(dummy));
|
||||
// Ignore errors and short reads. It turns out our size
|
||||
// parameter isn't all that accurate, so we can't use
|
||||
// the pread_or_die template.
|
||||
(void)!pread(fd, dummy, this_read_size, working_offset);
|
||||
BEESCOUNT(readahead_count);
|
||||
BEESCOUNTADD(readahead_bytes, this_read_size);
|
||||
offset += this_read_size;
|
||||
size -= this_read_size;
|
||||
working_offset += this_read_size;
|
||||
working_size -= this_read_size;
|
||||
}
|
||||
#endif
|
||||
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
||||
@@ -262,6 +259,13 @@ bees_unreadahead(int const fd, off_t offset, size_t size)
|
||||
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
||||
}
|
||||
|
||||
thread_local random_device bees_random_device;
|
||||
thread_local uniform_int_distribution<default_random_engine::result_type> bees_random_seed_dist(
|
||||
numeric_limits<default_random_engine::result_type>::min(),
|
||||
numeric_limits<default_random_engine::result_type>::max()
|
||||
);
|
||||
thread_local default_random_engine bees_generator(bees_random_seed_dist(bees_random_device));
|
||||
|
||||
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
||||
m_dir_fd(dir_fd),
|
||||
m_name(name),
|
||||
@@ -468,7 +472,6 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
auto src_p = src.begin();
|
||||
auto dst_p = begin;
|
||||
|
||||
bool did_block_write = false;
|
||||
while (dst_p < end) {
|
||||
auto len = min(BLOCK_SIZE_CLONE, end - dst_p);
|
||||
BeesBlockData bbd(src.fd(), src_p, len);
|
||||
@@ -479,7 +482,6 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
BEESNOTE("copying " << src << " to " << rv << "\n"
|
||||
"\tpwrite " << bbd << " to " << name_fd(m_fd) << " offset " << to_hex(dst_p) << " len " << len);
|
||||
pwrite_or_die(m_fd, bbd.data().data(), len, dst_p);
|
||||
did_block_write = true;
|
||||
BEESCOUNT(tmp_block);
|
||||
BEESCOUNTADD(tmp_bytes, len);
|
||||
}
|
||||
@@ -488,16 +490,6 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
}
|
||||
BEESCOUNTADD(tmp_copy_ms, copy_timer.age() * 1000);
|
||||
|
||||
if (did_block_write) {
|
||||
#if 0
|
||||
// There were a lot of kernel bugs leading to lockups.
|
||||
// Most of them are fixed now.
|
||||
// Unnecessary sync makes us slow, but maybe it has some robustness utility.
|
||||
// TODO: make this configurable.
|
||||
bees_sync(m_fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
BEESCOUNT(tmp_copy);
|
||||
return rv;
|
||||
}
|
||||
@@ -611,7 +603,7 @@ bees_main(int argc, char *argv[])
|
||||
unsigned thread_min = 0;
|
||||
double load_target = 0;
|
||||
bool workaround_btrfs_send = false;
|
||||
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_ZERO;
|
||||
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_INDEPENDENT;
|
||||
|
||||
// Configure getopt_long
|
||||
static const struct option long_options[] = {
|
||||
@@ -782,8 +774,8 @@ main(int argc, char *argv[])
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int rv = 1;
|
||||
catch_and_explain([&]() {
|
||||
int rv = EXIT_FAILURE;
|
||||
catch_all([&]() {
|
||||
rv = bees_main(argc, argv);
|
||||
});
|
||||
BEESLOGNOTICE("Exiting with status " << rv << " " << (rv ? "(failure)" : "(success)"));
|
||||
|
148
src/bees.h
148
src/bees.h
@@ -1,6 +1,7 @@
|
||||
#ifndef BEES_H
|
||||
#define BEES_H
|
||||
|
||||
#include "crucible/btrfs-tree.h"
|
||||
#include "crucible/cache.h"
|
||||
#include "crucible/chatter.h"
|
||||
#include "crucible/error.h"
|
||||
@@ -8,20 +9,21 @@
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/lockset.h"
|
||||
#include "crucible/multilock.h"
|
||||
#include "crucible/pool.h"
|
||||
#include "crucible/progress.h"
|
||||
#include "crucible/time.h"
|
||||
#include "crucible/task.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <syslog.h>
|
||||
#include <endian.h>
|
||||
#include <syslog.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
@@ -59,8 +61,9 @@ const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP;
|
||||
// Extent size for hash table (since the nocow file attribute does not seem to be working today)
|
||||
const off_t BLOCK_SIZE_HASHTAB_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
|
||||
|
||||
// Bytes per second we want to flush (8GB every two hours)
|
||||
const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;
|
||||
// Bytes per second we want to flush from hash table
|
||||
// Optimistic sustained write rate for SD cards
|
||||
const double BEES_FLUSH_RATE = 128 * 1024;
|
||||
|
||||
// Interval between writing crawl state to disk
|
||||
const int BEES_WRITEBACK_INTERVAL = 900;
|
||||
@@ -98,29 +101,8 @@ const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1;
|
||||
// How long between hash table histograms
|
||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
||||
|
||||
// Stop growing the work queue after we have this many tasks queued
|
||||
const size_t BEES_MAX_QUEUE_SIZE = 128;
|
||||
|
||||
// Read this many items at a time in SEARCHv2
|
||||
const size_t BEES_MAX_CRAWL_ITEMS = 8;
|
||||
|
||||
// Read this many bytes at a time in SEARCHv2 (one maximum-sized metadata page)
|
||||
const size_t BEES_MAX_CRAWL_BYTES = 64 * 1024;
|
||||
|
||||
// Insert this many items before switching to a new subvol
|
||||
const size_t BEES_MAX_CRAWL_BATCH = 128;
|
||||
|
||||
// Wait this many transids between crawls
|
||||
const size_t BEES_TRANSID_FACTOR = 10;
|
||||
|
||||
// Wait this long for a balance to stop
|
||||
const double BEES_BALANCE_POLL_INTERVAL = 60.0;
|
||||
|
||||
// Workaround for backref bugs
|
||||
const bool BEES_SERIALIZE_RESOLVE = false;
|
||||
|
||||
// Workaround for tree mod log bugs
|
||||
const bool BEES_SERIALIZE_BALANCE = false;
|
||||
// Wait at least this long for a new transid
|
||||
const double BEES_TRANSID_POLL_INTERVAL = 30.0;
|
||||
|
||||
// Workaround for silly dedupe / ineffective readahead behavior
|
||||
const size_t BEES_READAHEAD_SIZE = 1024 * 1024;
|
||||
@@ -269,7 +251,7 @@ ostream& operator<<(ostream &os, const BeesFileId &bfi);
|
||||
|
||||
class BeesFileRange {
|
||||
protected:
|
||||
mutable Fd m_fd;
|
||||
Fd m_fd;
|
||||
mutable BeesFileId m_fid;
|
||||
off_t m_begin = 0, m_end = 0;
|
||||
mutable off_t m_file_size = -1;
|
||||
@@ -291,35 +273,31 @@ public:
|
||||
bool is_same_file(const BeesFileRange &that) const;
|
||||
bool overlaps(const BeesFileRange &that) const;
|
||||
|
||||
// If file ranges overlap, extends this to include that.
|
||||
// Coalesce with empty bfr = non-empty bfr
|
||||
bool coalesce(const BeesFileRange &that);
|
||||
|
||||
// Remove that from this, creating 0, 1, or 2 new objects
|
||||
pair<BeesFileRange, BeesFileRange> subtract(const BeesFileRange &that) const;
|
||||
|
||||
off_t begin() const { return m_begin; }
|
||||
off_t end() const { return m_end; }
|
||||
off_t size() const;
|
||||
|
||||
// Lazy accessors
|
||||
/// @{ Lazy accessors
|
||||
off_t file_size() const;
|
||||
BeesFileId fid() const;
|
||||
/// @}
|
||||
|
||||
// Get the fd if there is one
|
||||
/// Get the fd if there is one
|
||||
Fd fd() const;
|
||||
|
||||
// Get the fd, opening it if necessary
|
||||
Fd fd(const shared_ptr<BeesContext> &ctx) const;
|
||||
/// Get the fd, opening it if necessary
|
||||
Fd fd(const shared_ptr<BeesContext> &ctx);
|
||||
|
||||
/// Copy the BeesFileId but not the Fd
|
||||
BeesFileRange copy_closed() const;
|
||||
|
||||
// Is it defined?
|
||||
/// Is it defined?
|
||||
operator bool() const { return !!m_fd || m_fid; }
|
||||
|
||||
// Make range larger
|
||||
/// @{ Make range larger
|
||||
off_t grow_end(off_t delta);
|
||||
off_t grow_begin(off_t delta);
|
||||
/// @}
|
||||
|
||||
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
||||
};
|
||||
@@ -345,6 +323,7 @@ public:
|
||||
BeesAddress(Type addr = ZERO) : m_addr(addr) {}
|
||||
BeesAddress(MagicValue addr) : m_addr(addr) {}
|
||||
BeesAddress& operator=(const BeesAddress &that) = default;
|
||||
BeesAddress(const BeesAddress &that) = default;
|
||||
operator Type() const { return m_addr; }
|
||||
bool operator==(const BeesAddress &that) const;
|
||||
bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
|
||||
@@ -405,6 +384,7 @@ public:
|
||||
HashType e_hash;
|
||||
AddrType e_addr;
|
||||
Cell(const Cell &) = default;
|
||||
Cell &operator=(const Cell &) = default;
|
||||
Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
|
||||
bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
|
||||
bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
|
||||
@@ -429,12 +409,14 @@ public:
|
||||
BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t size = BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
~BeesHashTable();
|
||||
|
||||
void stop();
|
||||
void stop_request();
|
||||
void stop_wait();
|
||||
|
||||
vector<Cell> find_cell(HashType hash);
|
||||
bool push_random_hash_addr(HashType hash, AddrType addr);
|
||||
void erase_hash_addr(HashType hash, AddrType addr);
|
||||
bool push_front_hash_addr(HashType hash, AddrType addr);
|
||||
bool flush_dirty_extent(uint64_t extent_index);
|
||||
|
||||
private:
|
||||
string m_filename;
|
||||
@@ -468,7 +450,7 @@ private:
|
||||
// Mutex/condvar for the writeback thread
|
||||
mutex m_dirty_mutex;
|
||||
condition_variable m_dirty_condvar;
|
||||
bool m_dirty;
|
||||
bool m_dirty = false;
|
||||
|
||||
// Mutex/condvar to stop
|
||||
mutex m_stop_mutex;
|
||||
@@ -494,7 +476,6 @@ private:
|
||||
void fetch_missing_extent_by_index(uint64_t extent_index);
|
||||
void set_extent_dirty_locked(uint64_t extent_index);
|
||||
size_t flush_dirty_extents(bool slowly);
|
||||
bool flush_dirty_extent(uint64_t extent_index);
|
||||
|
||||
size_t hash_to_extent_index(HashType ht);
|
||||
unique_lock<mutex> lock_extent_by_hash(HashType ht);
|
||||
@@ -502,6 +483,8 @@ private:
|
||||
|
||||
BeesHashTable(const BeesHashTable &) = delete;
|
||||
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
||||
|
||||
static thread_local uniform_int_distribution<size_t> tl_distribution;
|
||||
};
|
||||
|
||||
ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
|
||||
@@ -521,43 +504,49 @@ class BeesCrawl {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
mutex m_mutex;
|
||||
set<BeesFileRange> m_extents;
|
||||
BtrfsTreeItem m_next_extent_data;
|
||||
bool m_deferred = false;
|
||||
bool m_finished = false;
|
||||
|
||||
mutex m_state_mutex;
|
||||
ProgressTracker<BeesCrawlState> m_state;
|
||||
|
||||
BtrfsTreeObjectFetcher m_btof;
|
||||
|
||||
bool fetch_extents();
|
||||
void fetch_extents_harder();
|
||||
bool next_transid();
|
||||
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
||||
|
||||
public:
|
||||
BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
|
||||
BeesFileRange peek_front();
|
||||
BeesFileRange pop_front();
|
||||
ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesFileRange &bfr);
|
||||
ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesCrawlState &bcs);
|
||||
BeesCrawlState get_state_begin();
|
||||
BeesCrawlState get_state_end();
|
||||
BeesCrawlState get_state_end() const;
|
||||
void set_state(const BeesCrawlState &bcs);
|
||||
void deferred(bool def_setting);
|
||||
};
|
||||
|
||||
class BeesScanMode;
|
||||
|
||||
class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
BtrfsRootFetcher m_root_fetcher;
|
||||
BeesStringFile m_crawl_state_file;
|
||||
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
|
||||
mutex m_mutex;
|
||||
bool m_crawl_dirty = false;
|
||||
uint64_t m_crawl_dirty = 0;
|
||||
uint64_t m_crawl_clean = 0;
|
||||
Timer m_crawl_timer;
|
||||
BeesThread m_crawl_thread;
|
||||
BeesThread m_writeback_thread;
|
||||
RateEstimator m_transid_re;
|
||||
size_t m_transid_factor = BEES_TRANSID_FACTOR;
|
||||
Task m_crawl_task;
|
||||
bool m_workaround_btrfs_send = false;
|
||||
LRUCache<bool, uint64_t> m_root_ro_cache;
|
||||
|
||||
shared_ptr<BeesScanMode> m_scanner;
|
||||
|
||||
mutex m_tmpfiles_mutex;
|
||||
map<BeesFileId, Fd> m_tmpfiles;
|
||||
@@ -570,7 +559,6 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
void insert_root(const BeesCrawlState &bcs);
|
||||
Fd open_root_nocache(uint64_t root);
|
||||
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
||||
bool is_root_ro_nocache(uint64_t root);
|
||||
uint64_t transid_min();
|
||||
uint64_t transid_max();
|
||||
uint64_t transid_max_nocache();
|
||||
@@ -586,41 +574,38 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
uint64_t next_root(uint64_t root = 0);
|
||||
void current_state_set(const BeesCrawlState &bcs);
|
||||
RateEstimator& transid_re();
|
||||
size_t crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||
void clear_caches();
|
||||
void insert_tmpfile(Fd fd);
|
||||
void erase_tmpfile(Fd fd);
|
||||
|
||||
friend class BeesFdCache;
|
||||
friend class BeesCrawl;
|
||||
friend class BeesTempFile;
|
||||
friend class BeesFdCache;
|
||||
friend class BeesScanMode;
|
||||
|
||||
public:
|
||||
BeesRoots(shared_ptr<BeesContext> ctx);
|
||||
void start();
|
||||
void stop();
|
||||
void stop_request();
|
||||
void stop_wait();
|
||||
|
||||
void insert_tmpfile(Fd fd);
|
||||
void erase_tmpfile(Fd fd);
|
||||
|
||||
Fd open_root(uint64_t root);
|
||||
Fd open_root_ino(uint64_t root, uint64_t ino);
|
||||
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
||||
bool is_root_ro(uint64_t root);
|
||||
|
||||
// TODO: think of better names for these.
|
||||
// or TODO: do extent-tree scans instead
|
||||
// TODO: do extent-tree scans instead
|
||||
enum ScanMode {
|
||||
SCAN_MODE_ZERO,
|
||||
SCAN_MODE_ONE,
|
||||
SCAN_MODE_TWO,
|
||||
SCAN_MODE_LOCKSTEP,
|
||||
SCAN_MODE_INDEPENDENT,
|
||||
SCAN_MODE_SEQUENTIAL,
|
||||
SCAN_MODE_RECENT,
|
||||
SCAN_MODE_COUNT, // must be last
|
||||
};
|
||||
|
||||
void set_scan_mode(ScanMode new_mode);
|
||||
void set_workaround_btrfs_send(bool do_avoid);
|
||||
|
||||
private:
|
||||
ScanMode m_scan_mode = SCAN_MODE_ZERO;
|
||||
static string scan_mode_ntoa(ScanMode new_mode);
|
||||
|
||||
};
|
||||
|
||||
struct BeesHash {
|
||||
@@ -639,7 +624,7 @@ private:
|
||||
ostream & operator<<(ostream &os, const BeesHash &bh);
|
||||
|
||||
class BeesBlockData {
|
||||
using Blob = vector<uint8_t>;
|
||||
using Blob = ByteVector;
|
||||
|
||||
mutable Fd m_fd;
|
||||
off_t m_offset;
|
||||
@@ -723,13 +708,7 @@ struct BeesResolveAddrResult {
|
||||
bool is_toxic() const { return m_is_toxic; }
|
||||
};
|
||||
|
||||
struct BeesHalt : exception {
|
||||
const char *what() const noexcept override;
|
||||
};
|
||||
|
||||
class BeesContext : public enable_shared_from_this<BeesContext> {
|
||||
shared_ptr<BeesContext> m_parent_ctx;
|
||||
|
||||
Fd m_home_fd;
|
||||
|
||||
shared_ptr<BeesFdCache> m_fd_cache;
|
||||
@@ -747,30 +726,25 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
|
||||
|
||||
Timer m_total_timer;
|
||||
|
||||
LockSet<uint64_t> m_extent_lock_set;
|
||||
NamedPtr<Exclusion, uint64_t> m_extent_locks;
|
||||
NamedPtr<Exclusion, uint64_t> m_inode_locks;
|
||||
|
||||
mutable mutex m_stop_mutex;
|
||||
condition_variable m_stop_condvar;
|
||||
bool m_stop_requested = false;
|
||||
bool m_stop_status = false;
|
||||
|
||||
mutable mutex m_abort_mutex;
|
||||
condition_variable m_abort_condvar;
|
||||
bool m_abort_requested = false;
|
||||
|
||||
shared_ptr<BeesThread> m_progress_thread;
|
||||
shared_ptr<BeesThread> m_status_thread;
|
||||
|
||||
void set_root_fd(Fd fd);
|
||||
|
||||
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
||||
void wait_for_balance();
|
||||
|
||||
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||
void rewrite_file_range(const BeesFileRange &bfr);
|
||||
|
||||
public:
|
||||
BeesContext() = default;
|
||||
|
||||
void set_root_path(string path);
|
||||
|
||||
@@ -778,7 +752,7 @@ public:
|
||||
Fd home_fd();
|
||||
string root_path() const { return m_root_path; }
|
||||
|
||||
BeesFileRange scan_forward(const BeesFileRange &bfr);
|
||||
bool scan_forward(const BeesFileRange &bfr);
|
||||
|
||||
bool is_root_ro(uint64_t root);
|
||||
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
|
||||
@@ -788,8 +762,11 @@ public:
|
||||
void blacklist_erase(const BeesFileId &fid);
|
||||
bool is_blacklisted(const BeesFileId &fid) const;
|
||||
|
||||
shared_ptr<Exclusion> get_inode_mutex(uint64_t inode);
|
||||
|
||||
BeesResolveAddrResult resolve_addr(BeesAddress addr);
|
||||
void invalidate_addr(BeesAddress addr);
|
||||
void resolve_cache_clear();
|
||||
|
||||
void dump_status();
|
||||
void show_progress();
|
||||
@@ -804,7 +781,6 @@ public:
|
||||
shared_ptr<BeesTempFile> tmpfile();
|
||||
|
||||
const Timer &total_timer() const { return m_total_timer; }
|
||||
LockSet<uint64_t> &extent_lock_set() { return m_extent_lock_set; }
|
||||
};
|
||||
|
||||
class BeesResolver {
|
||||
@@ -812,7 +788,7 @@ class BeesResolver {
|
||||
BeesAddress m_addr;
|
||||
vector<BtrfsInodeOffsetRoot> m_biors;
|
||||
set<BeesFileRange> m_ranges;
|
||||
unsigned m_bior_count;
|
||||
size_t m_bior_count;
|
||||
|
||||
// We found matching data, so we can dedupe
|
||||
bool m_found_data = false;
|
||||
@@ -887,8 +863,8 @@ public:
|
||||
extern int bees_log_level;
|
||||
extern const char *BEES_USAGE;
|
||||
extern const char *BEES_VERSION;
|
||||
extern thread_local default_random_engine bees_generator;
|
||||
string pretty(double d);
|
||||
void bees_sync(int fd);
|
||||
void bees_readahead(int fd, off_t offset, size_t size);
|
||||
void bees_unreadahead(int fd, off_t offset, size_t size);
|
||||
string format_time(time_t t);
|
||||
|
@@ -1,53 +0,0 @@
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
catch_all([&]() {
|
||||
THROW_CHECK1(invalid_argument, argc, argc > 1);
|
||||
string filename = argv[1];
|
||||
|
||||
|
||||
cout << "File: " << filename << endl;
|
||||
Fd fd = open_or_die(filename, O_RDONLY);
|
||||
Fiemap fm;
|
||||
fm.fm_flags &= ~(FIEMAP_FLAG_SYNC);
|
||||
fm.m_max_count = 100;
|
||||
if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
|
||||
if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
|
||||
if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
|
||||
fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
|
||||
uint64_t stop_at = fm.fm_start + fm.fm_length;
|
||||
uint64_t last_byte = fm.fm_start;
|
||||
do {
|
||||
fm.do_ioctl(fd);
|
||||
// cerr << fm;
|
||||
uint64_t last_logical = FIEMAP_MAX_OFFSET;
|
||||
for (auto &extent : fm.m_extents) {
|
||||
if (extent.fe_logical > last_byte) {
|
||||
cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
|
||||
}
|
||||
cout << "Log " << to_hex(extent.fe_logical) << ".." << to_hex(extent.fe_logical + extent.fe_length)
|
||||
<< " Phy " << to_hex(extent.fe_physical) << ".." << to_hex(extent.fe_physical + extent.fe_length)
|
||||
<< " Flags " << fiemap_extent_flags_ntoa(extent.fe_flags) << endl;
|
||||
last_logical = extent.fe_logical + extent.fe_length;
|
||||
last_byte = last_logical;
|
||||
}
|
||||
fm.fm_start = last_logical;
|
||||
} while (fm.fm_start < stop_at);
|
||||
});
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
@@ -1,40 +0,0 @@
|
||||
#include "crucible/extentwalker.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
catch_all([&]() {
|
||||
THROW_CHECK1(invalid_argument, argc, argc > 1);
|
||||
string filename = argv[1];
|
||||
|
||||
cout << "File: " << filename << endl;
|
||||
Fd fd = open_or_die(filename, O_RDONLY);
|
||||
BtrfsExtentWalker ew(fd);
|
||||
off_t pos = 0;
|
||||
if (argc > 2) { pos = stoull(argv[2], nullptr, 0); }
|
||||
ew.seek(pos);
|
||||
do {
|
||||
// cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
|
||||
cout << ew.current() << endl;
|
||||
} while (ew.next());
|
||||
#if 0
|
||||
cout << "\n\n\nAnd now, backwards...\n\n\n" << endl;
|
||||
do {
|
||||
cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
|
||||
} while (ew.prev());
|
||||
cout << "\n\n\nDone!\n\n\n" << endl;
|
||||
#endif
|
||||
});
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ PROGRAMS = \
|
||||
path \
|
||||
process \
|
||||
progress \
|
||||
seeker \
|
||||
task \
|
||||
|
||||
all: test
|
||||
@@ -20,17 +21,10 @@ include ../makeflags
|
||||
LIBS = -lcrucible -lpthread
|
||||
BEES_LDFLAGS = -L../lib $(LDFLAGS)
|
||||
|
||||
.depends:
|
||||
mkdir -p $@
|
||||
|
||||
.depends/%.dep: %.cc tests.h Makefile | .depends
|
||||
%.dep: %.cc tests.h Makefile
|
||||
$(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $<
|
||||
|
||||
depends.mk: $(PROGRAMS:%=.depends/%.dep)
|
||||
cat $^ > $@.new
|
||||
mv -f $@.new $@
|
||||
|
||||
include depends.mk
|
||||
include $(PROGRAMS:%=%.dep)
|
||||
|
||||
$(PROGRAMS:%=%.o): %.o: %.cc ../makeflags Makefile
|
||||
$(CXX) $(BEES_CXXFLAGS) -o $@ -c $<
|
||||
|
101
test/seeker.cc
Normal file
101
test/seeker.cc
Normal file
@@ -0,0 +1,101 @@
|
||||
#include "tests.h"
|
||||
|
||||
#include "crucible/seeker.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
|
||||
static
|
||||
set<uint64_t>
|
||||
seeker_finder(const vector<uint64_t> &vec, uint64_t lower, uint64_t upper)
|
||||
{
|
||||
set<uint64_t> s(vec.begin(), vec.end());
|
||||
auto lb = s.lower_bound(lower);
|
||||
auto ub = lb;
|
||||
if (ub != s.end()) ++ub;
|
||||
if (ub != s.end()) ++ub;
|
||||
for (; ub != s.end(); ++ub) {
|
||||
if (*ub > upper) break;
|
||||
}
|
||||
return set<uint64_t>(lb, ub);
|
||||
}
|
||||
|
||||
static bool test_fails = false;
|
||||
|
||||
static
|
||||
void
|
||||
seeker_test(const vector<uint64_t> &vec, size_t target)
|
||||
{
|
||||
cerr << "Find " << target << " in {";
|
||||
for (auto i : vec) {
|
||||
cerr << " " << i;
|
||||
}
|
||||
cerr << " } = ";
|
||||
size_t loops = 0;
|
||||
bool excepted = catch_all([&]() {
|
||||
auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) {
|
||||
++loops;
|
||||
return seeker_finder(vec, lower, upper);
|
||||
});
|
||||
cerr << found;
|
||||
size_t my_found = 0;
|
||||
for (auto i : vec) {
|
||||
if (i <= target) {
|
||||
my_found = i;
|
||||
}
|
||||
}
|
||||
if (found == my_found) {
|
||||
cerr << " (correct)";
|
||||
} else {
|
||||
cerr << " (INCORRECT - right answer is " << my_found << ")";
|
||||
test_fails = true;
|
||||
}
|
||||
});
|
||||
cerr << " (" << loops << " loops)" << endl;
|
||||
if (excepted) {
|
||||
test_fails = true;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
test_seeker()
|
||||
{
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 3);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 5);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 0);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 1);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 4);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 2);
|
||||
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 2);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 25);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 52);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 99);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55, 56 }, 99);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 1);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 55);
|
||||
seeker_test(vector<uint64_t> { 11 }, 55);
|
||||
seeker_test(vector<uint64_t> { 11 }, 10);
|
||||
seeker_test(vector<uint64_t> { 55 }, 55);
|
||||
seeker_test(vector<uint64_t> { }, 55);
|
||||
seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max() - 1);
|
||||
seeker_test(vector<uint64_t> { }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1);
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max());
|
||||
}
|
||||
|
||||
|
||||
int main(int, const char **)
|
||||
{
|
||||
|
||||
RUN_A_TEST(test_seeker());
|
||||
|
||||
return test_fails ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
}
|
57
test/task.cc
57
test/task.cc
@@ -90,47 +90,51 @@ test_barrier(size_t count)
|
||||
|
||||
mutex mtx;
|
||||
condition_variable cv;
|
||||
bool done_flag = false;
|
||||
|
||||
unique_lock<mutex> lock(mtx);
|
||||
|
||||
auto b = make_shared<Barrier>();
|
||||
Barrier b;
|
||||
|
||||
// Run several tasks in parallel
|
||||
for (size_t c = 0; c < count; ++c) {
|
||||
auto bl = b->lock();
|
||||
ostringstream oss;
|
||||
oss << "task #" << c;
|
||||
auto b_hold = b;
|
||||
Task t(
|
||||
oss.str(),
|
||||
[c, &task_done, &mtx, bl]() mutable {
|
||||
// cerr << "Task #" << c << endl;
|
||||
[c, &task_done, &mtx, b_hold]() mutable {
|
||||
// ostringstream oss;
|
||||
// oss << "Task #" << c << endl;
|
||||
unique_lock<mutex> lock(mtx);
|
||||
// cerr << oss.str();
|
||||
task_done.at(c) = true;
|
||||
bl.release();
|
||||
b_hold.release();
|
||||
}
|
||||
);
|
||||
t.run();
|
||||
}
|
||||
|
||||
// Need completed to go out of local scope so it will release b
|
||||
{
|
||||
Task completed(
|
||||
"Waiting for Barrier",
|
||||
[&mtx, &cv, &done_flag]() {
|
||||
unique_lock<mutex> lock(mtx);
|
||||
// cerr << "Running cv notify" << endl;
|
||||
done_flag = true;
|
||||
cv.notify_all();
|
||||
}
|
||||
);
|
||||
b.insert_task(completed);
|
||||
}
|
||||
|
||||
// Get current status
|
||||
ostringstream oss;
|
||||
TaskMaster::print_queue(oss);
|
||||
TaskMaster::print_workers(oss);
|
||||
// TaskMaster::print_queue(cerr);
|
||||
// TaskMaster::print_workers(cerr);
|
||||
|
||||
bool done_flag = false;
|
||||
|
||||
Task completed(
|
||||
"Waiting for Barrier",
|
||||
[&mtx, &cv, &done_flag]() {
|
||||
unique_lock<mutex> lock(mtx);
|
||||
// cerr << "Running cv notify" << endl;
|
||||
done_flag = true;
|
||||
cv.notify_all();
|
||||
}
|
||||
);
|
||||
b->insert_task(completed);
|
||||
|
||||
b.reset();
|
||||
// Release our b
|
||||
b.release();
|
||||
|
||||
while (true) {
|
||||
size_t tasks_done = 0;
|
||||
@@ -139,7 +143,7 @@ test_barrier(size_t count)
|
||||
++tasks_done;
|
||||
}
|
||||
}
|
||||
// cerr << "Tasks done: " << tasks_done << " done_flag " << done_flag << endl;
|
||||
cerr << "Tasks done: " << tasks_done << " done_flag " << done_flag << endl;
|
||||
if (tasks_done == count && done_flag) {
|
||||
break;
|
||||
}
|
||||
@@ -153,7 +157,7 @@ void
|
||||
test_exclusion(size_t count)
|
||||
{
|
||||
mutex only_one;
|
||||
auto excl = make_shared<Exclusion>("test_excl");
|
||||
auto excl = make_shared<Exclusion>();
|
||||
|
||||
mutex mtx;
|
||||
condition_variable cv;
|
||||
@@ -174,9 +178,8 @@ test_exclusion(size_t count)
|
||||
[c, &only_one, excl, &lock_success_count, &lock_failure_count, &pings, &tasks_running, &cv, &mtx]() mutable {
|
||||
// cerr << "Task #" << c << endl;
|
||||
(void)c;
|
||||
auto lock = excl->try_lock();
|
||||
auto lock = excl->try_lock(Task::current_task());
|
||||
if (!lock) {
|
||||
excl->insert_task(Task::current_task());
|
||||
++lock_failure_count;
|
||||
return;
|
||||
}
|
||||
@@ -196,7 +199,7 @@ test_exclusion(size_t count)
|
||||
t.run();
|
||||
}
|
||||
|
||||
// excl.reset();
|
||||
excl.reset();
|
||||
|
||||
unique_lock<mutex> lock(mtx);
|
||||
while (tasks_running) {
|
||||
|
Reference in New Issue
Block a user