From f9a697518d0f87fc69431cfc5720051378d10e1b Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 27 Jan 2025 19:54:07 -0500 Subject: [PATCH] btrfs-tree: introduce BtrfsDataExtentTreeFetcher to read data extents without metadata Binary searches can be extremely slow if the target bytenr is near a metadata block group, because metadata items are not visible to the binary search algorithm. In a non-mixed-bg filesystem, there can be hundreds of thousands of metadata items between data extent items, and since the binary search algorithm can't see them, it will run searches that iterate over hundreds of thousands of objects about a dozen times. This is less of a problem for mixed-bg filesystems because the data and metadata blocks are not isolated from each other. The binary search algorithm still can't see the metadata items, but there are usually some data items close by to prevent the linear item filter from running too long. Introduce a new fetcher class (all the good names were taken) that tracks where the end of the current block group is. When the end of the current block group is reached in the linear search, skip ahead to a block group that can contain data items. Signed-off-by: Zygo Blaxell --- include/crucible/btrfs-tree.h | 10 +++++++ lib/btrfs-tree.cc | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/include/crucible/btrfs-tree.h b/include/crucible/btrfs-tree.h index 8599ae6..b4d14a1 100644 --- a/include/crucible/btrfs-tree.h +++ b/include/crucible/btrfs-tree.h @@ -203,6 +203,16 @@ namespace crucible { BtrfsTreeItem root(uint64_t subvol); }; + /// Fetch data extent items from extent tree, skipping metadata-only block groups + class BtrfsDataExtentTreeFetcher : public BtrfsExtentItemFetcher { + BtrfsTreeItem m_current_bg; + BtrfsTreeOffsetFetcher m_chunk_tree; + protected: + virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr) override; + public: + BtrfsDataExtentTreeFetcher(const Fd &fd); + }; + } #endif diff --git a/lib/btrfs-tree.cc b/lib/btrfs-tree.cc index 545d454..ed91f0a 100644 --- a/lib/btrfs-tree.cc +++ b/lib/btrfs-tree.cc @@ -707,4 +707,58 @@ namespace crucible { } return item; } + + BtrfsDataExtentTreeFetcher::BtrfsDataExtentTreeFetcher(const Fd &fd) : + BtrfsExtentItemFetcher(fd), + m_chunk_tree(fd) + { + tree(BTRFS_EXTENT_TREE_OBJECTID); + type(BTRFS_EXTENT_ITEM_KEY); + m_chunk_tree.tree(BTRFS_CHUNK_TREE_OBJECTID); + m_chunk_tree.type(BTRFS_CHUNK_ITEM_KEY); + m_chunk_tree.objectid(BTRFS_FIRST_CHUNK_TREE_OBJECTID); + } + + void + BtrfsDataExtentTreeFetcher::next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr) + { + key.min_type = key.max_type = type(); + key.max_objectid = key.max_offset = numeric_limits::max(); + key.min_offset = 0; + key.min_objectid = hdr.objectid; + const auto step = scale_size(); + if (key.min_objectid < numeric_limits::max() - step) { + key.min_objectid += step; + } else { + key.min_objectid = numeric_limits::max(); + } + // If we're still in our current block group, check here + if (!!m_current_bg) { + const auto bg_begin = m_current_bg.offset(); + const auto bg_end = bg_begin + m_current_bg.chunk_length(); + // If we are still in our current block group, return early + if (key.min_objectid >= bg_begin && key.min_objectid < bg_end) return; + } + // We don't have a current block group or we're out of range + // Find the chunk that this bytenr belongs to + m_current_bg = m_chunk_tree.rlower_bound(key.min_objectid); + // Make sure it's a data block group + while (!!m_current_bg) { + // Data block group, stop here + if (m_current_bg.chunk_type() & BTRFS_BLOCK_GROUP_DATA) break; + // Not a data block group, skip to end + key.min_objectid = m_current_bg.offset() + m_current_bg.chunk_length(); + m_current_bg = m_chunk_tree.lower_bound(key.min_objectid); + } + if (!m_current_bg) { + // Ran out of data block groups, stop here + return; + } + // Check to see if bytenr is in the current data block group + const auto bg_begin = m_current_bg.offset(); + if (key.min_objectid < bg_begin) { + // Move forward to start of data block group + key.min_objectid = bg_begin; + } + } }