From 7f660f50b892880f87b6e436be390f2f4dde74c1 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 22 Feb 2021 03:07:37 -0500 Subject: [PATCH] lib: fs: stop using libbtrfs-dev helper functions to re-enable buffer length checks The Linux kernel's btrfs headers are better than the libbtrfs-dev headers: - the libbtrfs-dev headers have C++ language compatibility issues - upstream version in Linux kernel is more accurate and up to date - macros in libbtrfs-dev's ctree.h hide information that would enable bees to perform runtime buffer length checking - enum types whose presence cannot be detected with #ifdef When accessing members of metadata items from the filesystem, we want to verify that the member we are accessing is within the boundaries of the item that was retrieved; otherwise, a memory access violation may occur or garbage may be returned to the caller. A simple C++ template, given a pointer to a structure member and a buffer, can determine that the buffer contains enough bytes to safely access a struct member. This was implemented back in 2016, but left unused due to ctree.h issues. Some btrfs metadata structures have variable length despite using a fixed-size in-memory structure. The members that appear earliest in the structure contain information about which following members of the structure are used. The item stored in the filesystem is truncated after the last used member, and all following members must not be accessed. 'btrfs_stack_*' accessor macros obscure the memory boundaries of the members they access, which makes it impossible for a C++ template to verify the memory access. If the template checks the length of the entire structure, it will find an access violation for variable-length metadata items because the item is rarely large enough for the entire structure. Get rid of all the libbtrfs-dev accessor macros and reimplement them with the necessary buffer length checks. Signed-off-by: Zygo Blaxell --- include/crucible/btrfs.h | 26 ++++++++++-------- include/crucible/endian.h | 58 +++++++++++++++++++++++++++++++++++++++ include/crucible/fs.h | 41 ++++++--------------------- lib/extentwalker.cc | 14 +++++----- lib/fs.cc | 2 +- src/bees-roots.cc | 16 +++++------ 6 files changed, 96 insertions(+), 61 deletions(-) create mode 100644 include/crucible/endian.h diff --git a/include/crucible/btrfs.h b/include/crucible/btrfs.h index 568ead4..63c67d6 100644 --- a/include/crucible/btrfs.h +++ b/include/crucible/btrfs.h @@ -13,20 +13,22 @@ // __u64 typedef and friends #include -// try Linux headers first -#include +// the btrfs headers +#include +#include -// Supply any missing definitions -#define mutex not_mutex -#include -// Repair the damage -#undef crc32c -#undef min -#undef max -#undef mutex -#undef swap +// And now all the things that have been missing in some version of +// the headers. -#ifndef BTRFS_FIRST_FREE_OBJECTID +enum btrfs_compression_type { + BTRFS_COMPRESS_NONE, + BTRFS_COMPRESS_ZLIB, + BTRFS_COMPRESS_LZO, + BTRFS_COMPRESS_ZSTD, +}; + +// BTRFS_CSUM_ITEM_KEY is not defined in include/uapi +#ifndef BTRFS_CSUM_ITEM_KEY #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL diff --git a/include/crucible/endian.h b/include/crucible/endian.h new file mode 100644 index 0000000..e4ca7eb --- /dev/null +++ b/include/crucible/endian.h @@ -0,0 +1,58 @@ +#ifndef CRUCIBLE_ENDIAN_H +#define CRUCIBLE_ENDIAN_H + +#include + +#include + +namespace crucible { + + template + struct le_to_cpu_helper { + T operator()(const T v); + }; + + template<> struct le_to_cpu_helper { + uint64_t operator()(const uint64_t v) { return le64toh(v); } + }; + +#if __SIZEOF_LONG__ == 8 + // uint64_t is unsigned long on LP64 platforms + template<> struct le_to_cpu_helper { + unsigned long long operator()(const unsigned long long v) { return le64toh(v); } + }; +#endif + + template<> struct le_to_cpu_helper { + uint32_t operator()(const uint32_t v) { return le32toh(v); } + }; + + template<> struct le_to_cpu_helper { + uint16_t operator()(const uint16_t v) { return le64toh(v); } + }; + + template<> struct le_to_cpu_helper { + uint8_t operator()(const uint8_t v) { return v; } + }; + + template + T + le_to_cpu(const T v) + { + return le_to_cpu_helper()(v); + } + + template + T + get_unaligned(const void *const p) + { + struct not_aligned { + T v; + } __attribute__((packed)); + const not_aligned *const nap = reinterpret_cast(p); + return nap->v; + } + +} + +#endif // CRUCIBLE_ENDIAN_H diff --git a/include/crucible/fs.h b/include/crucible/fs.h index 2e5890f..4ce5b94 100644 --- a/include/crucible/fs.h +++ b/include/crucible/fs.h @@ -1,6 +1,7 @@ #ifndef CRUCIBLE_FS_H #define CRUCIBLE_FS_H +#include "crucible/endian.h" #include "crucible/error.h" #include "crucible/spanner.h" @@ -206,47 +207,21 @@ namespace crucible { const T* get_struct_ptr(const V &v, size_t offset = 0) { - // OK so sometimes btrfs overshoots a little - // if (offset + sizeof(T) > v.size()) { - // v.resize(offset + sizeof(T), 0); - // } - // THROW_CHECK2(invalid_argument, v.size(), offset + sizeof(T), offset + sizeof(T) <= v.size()); - return reinterpret_cast(v.data() + offset); + THROW_CHECK2(out_of_range, v.size(), offset + sizeof(T), offset + sizeof(T) <= v.size()); + const uint8_t *const data_ptr = v.data(); + return reinterpret_cast(data_ptr + offset); } - template - R - call_btrfs_get(R (*func)(const A*), const V &v, size_t offset = 0) - { - return func(get_struct_ptr(v, offset)); - } - - template struct btrfs_get_le; - - template<> struct btrfs_get_le<__le64> { - uint64_t operator()(const void *p) { return get_unaligned_le64(p); } - }; - - template<> struct btrfs_get_le<__le32> { - uint32_t operator()(const void *p) { return get_unaligned_le32(p); } - }; - - template<> struct btrfs_get_le<__le16> { - uint16_t operator()(const void *p) { return get_unaligned_le16(p); } - }; - - template<> struct btrfs_get_le<__le8> { - uint8_t operator()(const void *p) { return get_unaligned_le8(p); } - }; - template T btrfs_get_member(T S::* member, V &v, size_t offset = 0) { const S *const sp = nullptr; const T *const spm = &(sp->*member); - auto member_offset = reinterpret_cast(spm) - reinterpret_cast(sp); - return btrfs_get_le()(get_struct_ptr(v, offset + member_offset)); + const auto member_offset = reinterpret_cast(spm) - reinterpret_cast(sp); + const void *struct_ptr = get_struct_ptr(v, offset + member_offset); + const T unaligned_t = get_unaligned(struct_ptr); + return le_to_cpu(unaligned_t); } struct Statvfs : public statvfs { diff --git a/lib/extentwalker.cc b/lib/extentwalker.cc index 1f6c232..df76f7c 100644 --- a/lib/extentwalker.cc +++ b/lib/extentwalker.cc @@ -512,20 +512,20 @@ namespace crucible { Extent e; e.m_begin = i.offset; - auto compressed = call_btrfs_get(btrfs_stack_file_extent_compression, i.m_data); + auto compressed = btrfs_get_member(&btrfs_file_extent_item::compression, i.m_data); // FIEMAP told us about compressed extents and we can too if (compressed) { e.m_flags |= FIEMAP_EXTENT_ENCODED; } - auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data); + auto type = btrfs_get_member(&btrfs_file_extent_item::type, i.m_data); off_t len = -1; switch (type) { default: cerr << "Unhandled file extent type " << type << " in root " << m_tree_id << " ino " << m_stat.st_ino << endl; break; case BTRFS_FILE_EXTENT_INLINE: - len = ranged_cast(call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data)); + len = ranged_cast(btrfs_get_member(&btrfs_file_extent_item::ram_bytes, i.m_data)); e.m_flags |= FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; // Inline extents are never obscured, so don't bother filling in m_physical_len, etc. break; @@ -533,17 +533,17 @@ namespace crucible { e.m_flags |= Extent::PREALLOC; // fallthrough case BTRFS_FILE_EXTENT_REG: { - e.m_physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data); + e.m_physical = btrfs_get_member(&btrfs_file_extent_item::disk_bytenr, i.m_data); // This is the length of the full extent (decompressed) - off_t ram = ranged_cast(call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data)); + off_t ram = ranged_cast(btrfs_get_member(&btrfs_file_extent_item::ram_bytes, i.m_data)); // This is the length of the part of the extent appearing in the file (decompressed) - len = ranged_cast(call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data)); + len = ranged_cast(btrfs_get_member(&btrfs_file_extent_item::num_bytes, i.m_data)); // This is the offset from start of on-disk extent to the part we see in the file (decompressed) // May be negative due to the kind of bug we're stuck with forever, so no cast range check - off_t offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data); + off_t offset = btrfs_get_member(&btrfs_file_extent_item::offset, i.m_data); // If there is a physical address there must be size too if (e.m_physical) { diff --git a/lib/fs.cc b/lib/fs.cc index 284ad83..4345025 100644 --- a/lib/fs.cc +++ b/lib/fs.cc @@ -1018,7 +1018,7 @@ namespace crucible { } if (i.objectid == root_id && i.type == BTRFS_ROOT_ITEM_KEY) { - rv = max(rv, uint64_t(call_btrfs_get(btrfs_root_generation, i.m_data))); + rv = max(rv, uint64_t(btrfs_get_member(&btrfs_root_item::generation, i.m_data))); } } if (sk.min_offset < numeric_limits::max()) { diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 8f48301..05a812e 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -649,8 +649,8 @@ BeesRoots::open_root_nocache(uint64_t rootid) for (auto i : sk.m_result) { sk.next_min(i); if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) { - auto dirid = call_btrfs_get(btrfs_stack_root_ref_dirid, i.m_data); - auto name_len = call_btrfs_get(btrfs_stack_root_ref_name_len, i.m_data); + auto dirid = btrfs_get_member(&btrfs_root_ref::dirid, i.m_data); + auto name_len = btrfs_get_member(&btrfs_root_ref::name_len, i.m_data); auto name_start = sizeof(struct btrfs_root_ref); auto name_end = name_len + name_start; THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end); @@ -1100,7 +1100,7 @@ BeesCrawl::fetch_extents() continue; } - auto gen = call_btrfs_get(btrfs_stack_file_extent_generation, i.m_data); + auto gen = btrfs_get_member(&btrfs_file_extent_item::generation, i.m_data); if (gen < get_state_end().m_min_transid) { BEESCOUNT(crawl_gen_low); ++count_low; @@ -1126,7 +1126,7 @@ BeesCrawl::fetch_extents() continue; } - auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data); + auto type = btrfs_get_member(&btrfs_file_extent_item::type, i.m_data); switch (type) { default: BEESLOGDEBUG("Unhandled file extent type " << type << " in root " << get_state_end().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset)); @@ -1144,10 +1144,10 @@ BeesCrawl::fetch_extents() BEESCOUNT(crawl_prealloc); // fallthrough case BTRFS_FILE_EXTENT_REG: { - auto physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data); - auto ram = call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data); - auto len = call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data); - auto offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data); + auto physical = btrfs_get_member(&btrfs_file_extent_item::disk_bytenr, i.m_data); + auto ram = btrfs_get_member(&btrfs_file_extent_item::ram_bytes, i.m_data); + auto len = btrfs_get_member(&btrfs_file_extent_item::num_bytes, i.m_data); + auto offset = btrfs_get_member(&btrfs_file_extent_item::offset, i.m_data); BEESTRACE("Root " << get_state_end().m_root << " ino " << i.objectid << " physical " << to_hex(physical) << " logical " << to_hex(i.offset) << ".." << to_hex(i.offset + len) << " gen " << gen);