1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 05:15:45 +02:00

docs: finally concede that the consensus spelling is "dedupe"

Change documentation and comments to use the word "dedupe," not "dedup"
as found in circa-3.15 kernel sources.

No changes in code or program output--if they used "dedup" before, they
will continue to be spelled "dedup" now.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2021-04-28 22:14:56 -04:00
parent 06a46e2736
commit 0bbaddd54c
9 changed files with 19 additions and 19 deletions

View File

@ -140,7 +140,7 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
dedup
-----
The `dedup` event group consists of operations that deduplicate data.
The `dedup` (sic) event group consists of operations that deduplicate data.
* `dedup_bytes`: Total bytes in extent references deduplicated.
* `dedup_copy`: Total bytes copied to eliminate unique data in extents containing a mix of unique and duplicate data.

View File

@ -162,7 +162,7 @@ enum btrfs_compression_type {
__u64 bytes_deduped; /* out - total # of bytes we were able
* to dedupe from this file */
/* status of this dedupe operation:
* 0 if dedup succeeds
* 0 if dedupe succeeds
* < 0 for error
* == BTRFS_SAME_DATA_DIFFERS if data differs
*/

View File

@ -23,7 +23,7 @@ UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
# sHash table entries are 16 bytes each
# (64-bit hash, 52-bit block number, and some metadata bits)
# Each entry represents a minimum of 4K on disk.
# unique data size hash table size average dedup block size
# unique data size hash table size average dedupe block size
# 1TB 4GB 4K
# 1TB 1GB 16K
# 1TB 256MB 64K

View File

@ -328,7 +328,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
// Apparently they can both extend past EOF
BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
BeesRangePair brp(prealloc_bfr, copy_bfr);
// Raw dedup here - nothing else to do with this extent, nothing to merge with
// Raw dedupe here - nothing else to do with this extent, nothing to merge with
if (m_ctx->dedup(brp)) {
BEESCOUNT(dedup_prealloc_hit);
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
@ -338,7 +338,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
}
}
).run();
return bfr; // if dedup success, which we now blindly assume
return bfr; // if dedupe success, which we now blindly assume
}
// OK we need to read extent now
@ -596,7 +596,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
// If the extent contains obscured blocks, and we can find no
// other refs to the extent that reveal those blocks, nuke the incoming extent.
// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
// because we can't make extents that large with dedup.
// because we can't make extents that large with dedupe.
// Don't rewrite small extents because it is a waste of time without being
// able to combine them into bigger extents.
if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {

View File

@ -438,7 +438,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
BEESCOUNT(replacedst_dedup_hit);
m_found_dup = true;
overlap_bfr = brp.second;
// FIXME: find best range first, then dedup that
// FIXME: find best range first, then dedupe that
return true; // i.e. break
} else {
BEESCOUNT(replacedst_dedup_miss);

View File

@ -820,7 +820,7 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
for (auto file_path : ipa.m_paths) {
BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
BEESCOUNT(open_file);
// Just open file RO. root can do the dedup ioctl without
// Just open file RO. root can do the dedupe ioctl without
// opening in write mode, and if we do open in write mode,
// we can't exec the file while we have it open.
const char *fp_cstr = file_path.c_str();
@ -864,19 +864,19 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
break;
}
// The kernel rejects dedup requests with
// The kernel rejects dedupe requests with
// src and dst that have different datasum flags
// (datasum is a flag in the inode).
//
// We can detect the common case where a file is
// marked with nodatacow (which implies nodatasum).
// nodatacow files are arguably out of scope for dedup,
// since dedup would just make them datacow again.
// nodatacow files are arguably out of scope for dedupe,
// since dedupe would just make them datacow again.
// To handle these we pretend we couldn't open them.
//
// A less common case is nodatasum + datacow files.
// Those are availble for dedup but we have to solve
// some other problems before we can dedup them. They
// Those are availble for dedupe but we have to solve
// some other problems before we can dedupe them. They
// require a separate hash table namespace from datasum
// + datacow files, and we have to create nodatasum
// temporary files when we rewrite extents.
@ -992,7 +992,7 @@ BeesCrawl::fetch_extents()
// Check for btrfs send workaround: don't scan RO roots at all, pretend
// they are just empty. We can't free any space there, and we
// don't have the necessary analysis logic to be able to use
// them as dedup src extents (yet).
// them as dedupe src extents (yet).
//
// This will keep the max_transid up to date so if the root
// is ever switched back to read-write, it won't trigger big

View File

@ -960,7 +960,7 @@ BeesHash
BeesBlockData::hash() const
{
if (!m_hash_done) {
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
// We can only dedupe unaligned EOF blocks against other unaligned EOF blocks,
// so we do NOT round up to a full sum block size.
const Blob &blob = data();
m_hash = BeesHash(blob.data(), blob.size());

View File

@ -426,7 +426,7 @@ BeesStringFile::write(string contents)
write_or_die(ofd, contents);
#if 0
// This triggers too many btrfs bugs. I wish I was kidding.
// Forget snapshots, balance, compression, and dedup:
// Forget snapshots, balance, compression, and dedupe:
// the system call you have to fear on btrfs is fsync().
// Also note that when bees renames a temporary over an
// existing file, it flushes the temporary, so we get

View File

@ -29,7 +29,7 @@ using namespace std;
// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
const off_t BLOCK_SIZE_CLONE = 4096;
// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
// Block size for dedupe checksums (arbitrary, but must be a multiple of clone alignment)
const off_t BLOCK_SIZE_SUMS = 4096;
// Block size for memory allocations and file mappings (FIXME: should be CPU page size)
@ -805,10 +805,10 @@ class BeesResolver {
set<BeesFileRange> m_ranges;
unsigned m_bior_count;
// We found matching data, so we can dedup
// We found matching data, so we can dedupe
bool m_found_data = false;
// We found matching data, so we *did* dedup
// We found matching data, so we *did* dedupe
bool m_found_dup = false;
// We found matching hash, so the hash table is still correct