1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 13:25:45 +02:00

bees: remove local cruft, throw at github

This commit is contained in:
Zygo Blaxell 2016-11-15 23:32:44 -05:00
commit cca0ee26a8
66 changed files with 12785 additions and 0 deletions

11
.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
*.[ao]
*.bak
*.new
*.so*
Doxyfile
depends.mk
doxygen_*
html/
latex/
make.log
make.log.new

15
Makefile Normal file
View File

@ -0,0 +1,15 @@
default install all: lib src test
clean:
git clean -dfx
.PHONY: lib src
lib:
$(MAKE) -C lib
src: lib
$(MAKE) -C src
test: lib src
$(MAKE) -C test

1
bin/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*

View File

@ -0,0 +1,29 @@
#ifndef CRUCIBLE_BACKTRACE_H
#define CRUCIBLE_BACKTRACE_H
#include <string>
#include <vector>
#include <execinfo.h>
namespace crucible {
using namespace std;
class Backtrace {
vector<void *> m_buffer;
mutable vector<string> m_result_stringvec;
mutable char **m_result_cpp;
int m_result_size;
int m_desired_size;
public:
Backtrace(int size = 99);
~Backtrace();
const vector<string> &strings() const;
const vector<void *> &voids() const;
void symbols_fd(int fd) const;
bool overflowed() const;
};
}
#endif // CRUCIBLE_BACKTRACE_H

View File

@ -0,0 +1,76 @@
#ifndef CRUCIBLE_BENCODE_H
#define CRUCIBLE_BENCODE_H
#include "crucible/error.h"
#include <cctype>
#include <fstream>
#include <map>
#include <memory>
#include <iostream>
#include <string>
#include <vector>
namespace crucible {
using namespace std;
// So...much...forward declaration...
struct bencode_variant;
typedef shared_ptr<bencode_variant> bencode_variant_ptr;
struct bencode_variant {
virtual ~bencode_variant();
virtual ostream& print(ostream &os, const string &parent = "") const = 0;
virtual bencode_variant_ptr at(size_t i) const;
virtual bencode_variant_ptr at(const string &s) const;
virtual operator string() const;
};
ostream& operator<<(ostream &os, const bencode_variant_ptr &p);
// i<base-10-ascii>e
struct bencode_int : public bencode_variant {
~bencode_int();
bencode_int(int64_t i);
ostream & print(ostream &os, const string &parent = "") const override;
private:
int64_t m_i;
};
// <length>:contents
struct bencode_string : public bencode_variant {
~bencode_string();
bencode_string(string s);
ostream & print(ostream &os, const string &parent = "") const override;
operator string() const override;
private:
string m_s;
};
// l<contents>e
struct bencode_list : public bencode_variant {
~bencode_list();
bencode_list(const vector<bencode_variant_ptr> &l);
ostream & print(ostream &os, const string &parent = "") const override;
using bencode_variant::at;
bencode_variant_ptr at(size_t i) const override;
private:
vector<bencode_variant_ptr> m_l;
};
// d<contents>e (lexicographically sorted pairs of <key><value>, key is a string)
struct bencode_dict : public bencode_variant {
~bencode_dict();
bencode_dict(const map<string, bencode_variant_ptr> &m);
ostream& print(ostream &os, const string &parent = "") const override;
using bencode_variant::at;
bencode_variant_ptr at(const string &key) const override;
private:
map<string, bencode_variant_ptr> m_m;
};
bencode_variant_ptr bencode_decode_stream(istream &is);
};
#endif

13
include/crucible/bool.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef CRUCIBLE_BOOL_H
#define CRUCIBLE_BOOL_H
namespace crucible {
struct DefaultBool {
bool m_b;
DefaultBool(bool init = false) : m_b(init) {}
operator bool() const { return m_b; }
bool &operator=(const bool &that) { return m_b = that; }
};
}
#endif // CRUCIBLE_BOOL_H

205
include/crucible/btrfs.h Normal file
View File

@ -0,0 +1,205 @@
#ifndef CRUCIBLE_BTRFS_H
#define CRUCIBLE_BTRFS_H
// Copied from Linux kernel sources as of 3.15 or so.
// These are probably missing from /usr/include at the moment.
// NULL
#include <cstdio>
// _IOWR macro and friends
#include <asm-generic/ioctl.h>
// __u64 typedef and friends
#include <linux/types.h>
// try Linux headers first
#include <btrfs/ioctl.h>
// Supply any missing definitions
#define mutex not_mutex
#include <btrfs/ctree.h>
// Repair the damage
#undef min
#undef max
#undef mutex
#ifndef BTRFS_FIRST_FREE_OBJECTID
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
#define BTRFS_DEV_TREE_OBJECTID 4ULL
#define BTRFS_FS_TREE_OBJECTID 5ULL
#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
#define BTRFS_UUID_TREE_OBJECTID 9ULL
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
#define BTRFS_BALANCE_OBJECTID -4ULL
#define BTRFS_ORPHAN_OBJECTID -5ULL
#define BTRFS_TREE_LOG_OBJECTID -6ULL
#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
#define BTRFS_TREE_RELOC_OBJECTID -8ULL
#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
#define BTRFS_FREE_INO_OBJECTID -12ULL
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
#define BTRFS_FIRST_FREE_OBJECTID 256ULL
#define BTRFS_LAST_FREE_OBJECTID -256ULL
#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
#define BTRFS_INODE_ITEM_KEY 1
#define BTRFS_INODE_REF_KEY 12
#define BTRFS_INODE_EXTREF_KEY 13
#define BTRFS_XATTR_ITEM_KEY 24
#define BTRFS_ORPHAN_ITEM_KEY 48
#define BTRFS_DIR_LOG_ITEM_KEY 60
#define BTRFS_DIR_LOG_INDEX_KEY 72
#define BTRFS_DIR_ITEM_KEY 84
#define BTRFS_DIR_INDEX_KEY 96
#define BTRFS_EXTENT_DATA_KEY 108
#define BTRFS_CSUM_ITEM_KEY 120
#define BTRFS_EXTENT_CSUM_KEY 128
#define BTRFS_ROOT_ITEM_KEY 132
#define BTRFS_ROOT_BACKREF_KEY 144
#define BTRFS_ROOT_REF_KEY 156
#define BTRFS_EXTENT_ITEM_KEY 168
#define BTRFS_METADATA_ITEM_KEY 169
#define BTRFS_TREE_BLOCK_REF_KEY 176
#define BTRFS_EXTENT_DATA_REF_KEY 178
#define BTRFS_EXTENT_REF_V0_KEY 180
#define BTRFS_SHARED_BLOCK_REF_KEY 182
#define BTRFS_SHARED_DATA_REF_KEY 184
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
#define BTRFS_FREE_SPACE_INFO_KEY 198
#define BTRFS_FREE_SPACE_EXTENT_KEY 199
#define BTRFS_FREE_SPACE_BITMAP_KEY 200
#define BTRFS_DEV_EXTENT_KEY 204
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
#define BTRFS_BALANCE_ITEM_KEY 248
#define BTRFS_QGROUP_STATUS_KEY 240
#define BTRFS_QGROUP_INFO_KEY 242
#define BTRFS_QGROUP_LIMIT_KEY 244
#define BTRFS_QGROUP_RELATION_KEY 246
#define BTRFS_DEV_STATS_KEY 249
#define BTRFS_DEV_REPLACE_KEY 250
#define BTRFS_UUID_KEY_SUBVOL 251
#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252
#define BTRFS_STRING_ITEM_KEY 253
#endif
#ifndef BTRFS_DEFRAG_RANGE_START_IO
// For some reason uapi has BTRFS_DEFRAG_RANGE_COMPRESS and
// BTRFS_DEFRAG_RANGE_START_IO but not btrfs_ioctl_defrag_range_args
// Never mind, it's too broken to be useful anyway
struct btrfs_ioctl_defrag_range_args {
/* start of the defrag operation */
__u64 start;
/* number of bytes to defrag, use (u64)-1 to say all */
__u64 len;
/*
* flags for the operation, which can include turning
* on compression for this one defrag
*/
__u64 flags;
/*
* any extent bigger than this will be considered
* already defragged. Use 0 to take the kernel default
* Use 1 to say every single extent must be rewritten
*/
__u32 extent_thresh;
/*
* which compression method to use if turning on compression
* for this defrag operation. If unspecified, zlib will
* be used
*/
__u32 compress_type;
/* spare for later */
__u32 unused[4];
};
#endif
#ifndef BTRFS_IOC_CLONE_RANGE
struct btrfs_ioctl_clone_range_args {
__s64 src_fd;
__u64 src_offset, src_length;
__u64 dest_offset;
};
// We definitely have this
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
struct btrfs_ioctl_clone_range_args)
#endif
#ifndef BTRFS_SAME_DATA_DIFFERS
#define BTRFS_SAME_DATA_DIFFERS 1
/* For extent-same ioctl */
struct btrfs_ioctl_same_extent_info {
__s64 fd; /* in - destination file */
__u64 logical_offset; /* in - start of extent in destination */
__u64 bytes_deduped; /* out - total # of bytes we were able
* to dedupe from this file */
/* status of this dedupe operation:
* 0 if dedup succeeds
* < 0 for error
* == BTRFS_SAME_DATA_DIFFERS if data differs
*/
__s32 status; /* out - see above description */
__u32 reserved;
};
struct btrfs_ioctl_same_args {
__u64 logical_offset; /* in - start of extent in source */
__u64 length; /* in - length of extent */
__u16 dest_count; /* in - total elements in info array */
__u16 reserved1;
__u32 reserved2;
struct btrfs_ioctl_same_extent_info info[0];
};
#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
struct btrfs_ioctl_same_args)
#endif
#ifndef BTRFS_MAX_DEDUPE_LEN
#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
#endif
#ifndef BTRFS_IOC_TREE_SEARCH_V2
/*
* Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes.
* The allocated size of the buffer is set in buf_size.
*/
struct btrfs_ioctl_search_args_v2 {
struct btrfs_ioctl_search_key key; /* in/out - search parameters */
__u64 buf_size; /* in - size of buffer
* out - on EOVERFLOW: needed size
* to store item */
__u64 buf[0]; /* out - found items */
};
#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
struct btrfs_ioctl_search_args_v2)
#endif
#endif // CRUCIBLE_BTRFS_H

221
include/crucible/cache.h Normal file
View File

@ -0,0 +1,221 @@
#ifndef CRUCIBLE_CACHE_H
#define CRUCIBLE_CACHE_H
#include "crucible/lockset.h"
#include <algorithm>
#include <functional>
#include <map>
#include <mutex>
#include <tuple>
namespace crucible {
using namespace std;
template <class Return, class... Arguments>
class LRUCache {
public:
using Key = tuple<Arguments...>;
using Func = function<Return(Arguments...)>;
using Time = unsigned;
using Value = pair<Time, Return>;
private:
Func m_fn;
Time m_ctr;
map<Key, Value> m_map;
LockSet<Key> m_lockset;
size_t m_max_size;
mutex m_mutex;
void check_overflow();
public:
LRUCache(Func f = Func(), size_t max_size = 100);
void func(Func f);
void max_size(size_t new_max_size);
Return operator()(Arguments... args);
Return refresh(Arguments... args);
void expire(Arguments... args);
void prune(function<bool(const Return &)> predicate);
void insert(const Return &r, Arguments... args);
void clear();
};
template <class Return, class... Arguments>
LRUCache<Return, Arguments...>::LRUCache(Func f, size_t max_size) :
m_fn(f),
m_ctr(0),
m_max_size(max_size)
{
}
template <class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::check_overflow()
{
if (m_map.size() <= m_max_size) return;
vector<pair<Key, Time>> map_contents;
map_contents.reserve(m_map.size());
for (auto i : m_map) {
map_contents.push_back(make_pair(i.first, i.second.first));
}
sort(map_contents.begin(), map_contents.end(), [](const pair<Key, Time> &a, const pair<Key, Time> &b) {
return a.second < b.second;
});
for (size_t i = 0; i < map_contents.size() / 2; ++i) {
m_map.erase(map_contents[i].first);
}
}
template <class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::max_size(size_t new_max_size)
{
unique_lock<mutex> lock(m_mutex);
m_max_size = new_max_size;
check_overflow();
}
template <class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::func(Func func)
{
unique_lock<mutex> lock(m_mutex);
m_fn = func;
}
template <class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::clear()
{
unique_lock<mutex> lock(m_mutex);
m_map.clear();
}
template <class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::prune(function<bool(const Return &)> pred)
{
unique_lock<mutex> lock(m_mutex);
for (auto it = m_map.begin(); it != m_map.end(); ) {
auto next_it = ++it;
if (pred(it.second.second)) {
m_map.erase(it);
}
it = next_it;
}
}
template<class Return, class... Arguments>
Return
LRUCache<Return, Arguments...>::operator()(Arguments... args)
{
Key k(args...);
bool inserted = false;
// Do we have it cached?
unique_lock<mutex> lock(m_mutex);
auto found = m_map.find(k);
if (found == m_map.end()) {
// No, release cache lock and acquire key lock
lock.unlock();
typename LockSet<Key>::Lock key_lock(m_lockset, k);
// Did item appear in cache while we were waiting for key?
lock.lock();
found = m_map.find(k);
if (found == m_map.end()) {
// No, we hold key and cache locks, but item not in cache.
// Release cache lock and call function
auto ctr_copy = m_ctr++;
lock.unlock();
Value v(ctr_copy, m_fn(args...));
// Reacquire cache lock and insert return value
lock.lock();
tie(found, inserted) = m_map.insert(make_pair(k, v));
// We hold a lock on this key so we are the ones to insert it
THROW_CHECK0(runtime_error, inserted);
// Release key lock and clean out overflow
key_lock.unlock();
check_overflow();
}
}
// Item should be in cache now
THROW_CHECK0(runtime_error, found != m_map.end());
// We are using this object so update the timestamp
if (!inserted) {
found->second.first = m_ctr++;
}
return found->second.second;
}
template<class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::expire(Arguments... args)
{
Key k(args...);
unique_lock<mutex> lock(m_mutex);
m_map.erase(k);
}
template<class Return, class... Arguments>
Return
LRUCache<Return, Arguments...>::refresh(Arguments... args)
{
expire(args...);
return operator()(args...);
}
template<class Return, class... Arguments>
void
LRUCache<Return, Arguments...>::insert(const Return &r, Arguments... args)
{
Key k(args...);
bool inserted = false;
// Do we have it cached?
unique_lock<mutex> lock(m_mutex);
auto found = m_map.find(k);
if (found == m_map.end()) {
// No, release cache lock and acquire key lock
lock.unlock();
typename LockSet<Key>::Lock key_lock(m_lockset, k);
// Did item appear in cache while we were waiting for key?
lock.lock();
found = m_map.find(k);
if (found == m_map.end()) {
// No, we hold key and cache locks, but item not in cache.
// Release cache lock and insert the provided return value
auto ctr_copy = m_ctr++;
Value v(ctr_copy, r);
tie(found, inserted) = m_map.insert(make_pair(k, v));
// We hold a lock on this key so we are the ones to insert it
THROW_CHECK0(runtime_error, inserted);
// Release key lock and clean out overflow
key_lock.unlock();
check_overflow();
}
}
// Item should be in cache now
THROW_CHECK0(runtime_error, found != m_map.end());
// We are using this object so update the timestamp
if (!inserted) {
found->second.first = m_ctr++;
}
}
}
#endif // CRUCIBLE_CACHE_H

156
include/crucible/chatter.h Normal file
View File

@ -0,0 +1,156 @@
#ifndef CRUCIBLE_CHATTER_H
#define CRUCIBLE_CHATTER_H
#include <functional>
#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <typeinfo>
/** \brief Chatter wraps a std::ostream reference with a destructor that
writes a newline, and inserts timestamp, pid, and tid prefixes on output.
Typical usage is expressions like the following:
int six = 6, nine = 9; \n
Chatter() << "What you get when you multiply" << six
<< "by" << nine << '?'; \n
Chatter() << "forty two!";
which results in output like the following:
What you get when you multiply 6 by 9 ?\n
forty-two!
Note that newlines and timestamps are injected automatically in
the output by the Chatter destructor. You can also use std::endl
explicitly, although it will not have the effect of flushing the
buffer.
*/
namespace crucible {
using namespace std;
class Chatter {
string m_name;
ostream &m_os;
ostringstream m_oss;
public:
Chatter(string name, ostream &os = cerr);
Chatter(Chatter &&c);
ostream &get_os() { return m_oss; }
template <class T> Chatter &operator<<(const T& arg);
~Chatter();
};
template <class Argument>
struct ChatterTraits {
Chatter &operator()(Chatter &c, const Argument &arg)
{
c.get_os() << arg;
return c;
}
};
template <class T>
Chatter &
Chatter::operator<<(const T& arg)
{
return ChatterTraits<T>()(*this, arg);
}
template <class Argument>
struct ChatterTraits<const Argument *> {
Chatter &operator()(Chatter &c, const Argument *arg)
{
if (arg) {
c.get_os() << "(pointer to " << typeid(*arg).name() << ")(" << reinterpret_cast<const void *>(arg) << ")";
} else {
c.get_os() << "(NULL pointer to " << typeid(arg).name() << ')';
}
return c;
}
};
template <>
struct ChatterTraits<const char *> {
Chatter &
operator()(Chatter &c, const char *arg)
{
c.get_os() << arg;
return c;
}
};
template <>
struct ChatterTraits<ostream &> {
Chatter &
operator()(Chatter &c, ostream & arg)
{
c.get_os() << arg;
return c;
}
};
class ChatterBox {
string m_file;
int m_line;
string m_pretty_function;
bool m_enabled;
ostream& m_os;
static set<ChatterBox*> s_boxes;
public:
ChatterBox(string file, int line, string pretty_function, ostream &os = cerr);
~ChatterBox();
template <class T> Chatter operator<<(const T &t)
{
Chatter c(m_pretty_function, m_os);
c << t;
return c;
}
bool enabled() const { return m_enabled; }
void set_enable(bool en);
static set<ChatterBox*>& all_boxes();
};
class ChatterUnwinder {
function<void()> m_func;
public:
ChatterUnwinder(function<void()> f);
~ChatterUnwinder();
};
};
#define CHATTER(x) do { \
using namespace crucible; \
static ChatterBox crucible_chatterbox_cb(__FILE__, __LINE__, __func__); \
if (crucible_chatterbox_cb.enabled()) { \
crucible_chatterbox_cb << x; \
} \
} while (0)
#define CHATTER_TRACE(x) do { \
using namespace crucible; \
static ChatterBox crucible_chatterbox_cb(__FILE__, __LINE__, __func__); \
if (crucible_chatterbox_cb.enabled()) { \
crucible_chatterbox_cb << __FILE__ << ":" << __LINE__ << ": " << x; \
} \
} while (0)
#define WTF_C(x, y) x##y
#define SRSLY_WTF_C(x, y) WTF_C(x, y)
#define CHATTER_UNWIND(x) \
crucible::ChatterUnwinder SRSLY_WTF_C(chatterUnwinder_, __LINE__) ([&]() { \
CHATTER_TRACE(x); \
})
#endif // CRUCIBLE_CHATTER_H

16
include/crucible/crc64.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef CRUCIBLE_CRC64_H
#define CRUCIBLE_CRC64_H
#include <cstdint>
#include <cstdlib>
namespace crucible {
namespace Digest {
namespace CRC {
uint64_t crc64(const char *s);
uint64_t crc64(const void *p, size_t len);
};
};
};
#endif

161
include/crucible/error.h Normal file
View File

@ -0,0 +1,161 @@
#ifndef CRUCIBLE_ERROR_H
#define CRUCIBLE_ERROR_H
// Common error-handling idioms for C library calls
#include <cerrno>
#include <cstring>
#include <functional>
#include <sstream>
#include <stdexcept>
#include <system_error>
#include <unistd.h>
namespace crucible {
using namespace std;
// Common error-handling idioms for C library calls
template <class T> T die_if_minus_errno(const char *expr, T rv)
{
if (rv < 0) {
throw system_error(error_code(-rv, system_category()), expr);
}
return rv;
}
template <class T> T die_if_minus_one(const char *expr, T rv)
{
if (rv == -1) {
throw system_error(error_code(errno, system_category()), expr);
}
return rv;
}
template <class T> T die_if_zero(const char *expr, T rv)
{
if (rv == 0) {
throw system_error(error_code(errno, system_category()), expr);
}
return rv;
}
template <class T> T die_if_non_zero(const char *expr, T rv)
{
if (rv != 0) {
throw system_error(error_code(errno, system_category()), expr);
}
return rv;
}
// Usage: catch_all([&]() { /* insert body here */ } );
// Executes body with exceptions caught and reported to cerr.
// Returns:
// 0 if f() returns
// non-zero if f() throws an exception
// -1 for unknown exception
// 1 for std::exception or class derived thereof
void set_catch_explainer(function<void(string s)> f);
void default_catch_explainer(string s);
int catch_all(const function<void()> &f, const function<void(string)> &explainer = default_catch_explainer);
// catch_and_explain traps the exception, calls the explainer, then rethrows the original exception
void catch_and_explain(const function<void()> &f, const function<void(string)> &explainer = default_catch_explainer);
};
// 0 on success, -errno on error.
// Covers most pthread functions.
#define DIE_IF_MINUS_ERRNO(expr) crucible::die_if_minus_errno(#expr, expr)
// -1 on error, all other values mean success.
#define DIE_IF_MINUS_ONE(expr) crucible::die_if_minus_one(#expr, expr)
// 0 (or NULL) on error, all other values mean success.
#define DIE_IF_ZERO(expr) crucible::die_if_zero(#expr, expr)
// 0 (or NULL) on success, all other values mean error.
#define DIE_IF_NON_ZERO(expr) crucible::die_if_non_zero(#expr, expr)
// macro for throwing an error
#define THROW_ERROR(type, expr) do { \
std::ostringstream _te_oss; \
_te_oss << expr; \
throw type(_te_oss.str()); \
} while (0)
// macro for throwing a system_error with errno
#define THROW_ERRNO(expr) do { \
std::ostringstream _te_oss; \
_te_oss << expr; \
throw std::system_error(std::error_code(errno, std::system_category()), _te_oss.str()); \
} while (0)
// macro for throwing a system_error with some other variable
#define THROW_ERRNO_VALUE(value, expr) do { \
std::ostringstream _te_oss; \
_te_oss << expr; \
throw std::system_error(std::error_code((value), std::system_category()), _te_oss.str()); \
} while (0)
// macros for checking a constraint
#define CHECK_CONSTRAINT(value, expr) do { \
if (!(expr)) { \
THROW_ERROR(out_of_range, #value << " = " << value << " failed constraint check (" << #expr << ")"); \
} \
} while(0)
#define THROW_CHECK0(type, expr) do { \
if (!(expr)) { \
THROW_ERROR(type, "failed constraint check (" << #expr << ")"); \
} \
} while(0)
#define THROW_CHECK1(type, value, expr) do { \
if (!(expr)) { \
THROW_ERROR(type, #value << " = " << (value) << " failed constraint check (" << #expr << ")"); \
} \
} while(0)
#define THROW_CHECK2(type, value1, value2, expr) do { \
if (!(expr)) { \
THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) \
<< " failed constraint check (" << #expr << ")"); \
} \
} while(0)
#define THROW_CHECK3(type, value1, value2, value3, expr) do { \
if (!(expr)) { \
THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) \
<< " failed constraint check (" << #expr << ")"); \
} \
} while(0)
#define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \
if (!((value1) op (value2))) { \
THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \
} \
} while(0)
#define THROW_CHECK_PREFIX_OP(type, op, value1) do { \
if (!(op (value1))) { \
THROW_ERROR(type, "failed constraint check " << #op << " " << #value1 << " (" << (value1) << ")"); \
} \
} while(0)
#define THROW_CHECK_RANGE(type, value_min, value_test, value_max) do { \
if ((value_test) < (value_min) || (value_max) < (value_test)) { \
THROW_ERROR(type, "failed constraint check " << #value_min << " (" << (value_min) << ") <= " #value_test << " (" << (value_test) \
<< ") <= " << #value_max << " (" << (value_max) << ")"); \
} \
} while(0)
#define THROW_CHECK_ARRAY_RANGE(type, value_min, value_test, value_max) do { \
if ((value_test) < (value_min) || !((value_test) < (value_max))) { \
THROW_ERROR(type, "failed constraint check " << #value_min << " (" << (value_min) << ") <= " #value_test << " (" << (value_test) \
<< ") < " << #value_max << " (" << (value_max) << ")"); \
} \
} while(0)
#endif // CRUCIBLE_ERROR_H

View File

@ -0,0 +1,28 @@
#ifndef CRUCIBLE_EXECPIPE_H
#define CRUCIBLE_EXECPIPE_H
#include "crucible/fd.h"
#include <functional>
#include <limits>
#include <string>
namespace crucible {
using namespace std;
void redirect_stdin(const Fd &child_fd);
void redirect_stdin_stdout(const Fd &child_fd);
void redirect_stdin_stdout_stderr(const Fd &child_fd);
void redirect_stdout(const Fd &child_fd);
void redirect_stdout_stderr(const Fd &child_fd);
// Open a pipe (actually socketpair) to child process, then execute code in that process.
// e.g. popen([] () { system("echo Hello, World!"); });
// Forked process will exit when function returns.
Fd popen(function<int()> f, function<void(const Fd &child_fd)> import_fd_fn = redirect_stdin_stdout);
// Read all the data from fd into a string
string read_all(Fd fd, size_t max_bytes = numeric_limits<size_t>::max(), size_t chunk_bytes = 4096);
};
#endif // CRUCIBLE_EXECPIPE_H

View File

@ -0,0 +1,101 @@
#ifndef CRUCIBLE_EXTENTWALKER_H
#define CRUCIBLE_EXTENTWALKER_H
#include "crucible/fd.h"
namespace crucible {
using namespace std;
// FIXME: ExtentCursor is probably a better name
struct Extent {
off_t m_begin;
off_t m_end;
uint64_t m_physical;
uint64_t m_flags;
// Btrfs extent reference details
off_t m_physical_len;
off_t m_logical_len;
off_t m_offset;
// fiemap flags are uint32_t, so bits 32..63 are OK for us
// no extent here
static const uint64_t HOLE = (1ULL << 32);
// extent is physical space full of zeros
static const uint64_t PREALLOC = (1ULL << 33);
// extent's physical (RAM) size does not match logical (can we know this?)
static const uint64_t OBSCURED = (1ULL << 34);
operator bool() const;
off_t size() const;
off_t begin() const { return m_begin; }
off_t end() const { return m_end; }
uint64_t flags() const { return m_flags; }
uint64_t physical() const { return m_physical; }
off_t physical_len() const { return m_physical_len; }
off_t logical_len() const { return m_logical_len; }
off_t offset() const { return m_offset; }
bool operator==(const Extent &that) const;
bool operator!=(const Extent &that) const { return !(*this == that); }
Extent();
Extent(const Extent &e) = default;
};
class ExtentWalker {
public:
using Vec = vector<Extent>;
using Itr = Vec::iterator;
protected:
Fd m_fd;
Stat m_stat;
virtual Vec get_extent_map(off_t pos);
static const unsigned sc_extent_fetch_max = 64;
static const unsigned sc_extent_fetch_min = 4;
static const off_t sc_step_size = 0x1000 * (sc_extent_fetch_max / 2);
private:
Vec m_extents;
Itr m_current;
Itr find_in_cache(off_t pos);
void run_fiemap(off_t pos);
public:
ExtentWalker(Fd fd = Fd());
ExtentWalker(Fd fd, off_t initial_pos);
virtual ~ExtentWalker();
void reset();
Extent current();
bool next();
bool prev();
void seek(off_t new_pos);
friend ostream & operator<<(ostream &os, const ExtentWalker &ew);
};
class BtrfsExtentWalker : public ExtentWalker {
uint64_t m_tree_id;
Fd m_root_fd;
protected:
Vec get_extent_map(off_t pos) override;
public:
BtrfsExtentWalker(Fd fd);
BtrfsExtentWalker(Fd fd, off_t initial_pos);
BtrfsExtentWalker(Fd fd, off_t initial_pos, Fd root_fd);
void set_root_fd(Fd fd);
};
ostream &operator<<(ostream &os, const Extent &e);
};
#endif // CRUCIBLE_EXTENTWALKER_H

178
include/crucible/fd.h Normal file
View File

@ -0,0 +1,178 @@
#ifndef CRUCIBLE_FD_H
#define CRUCIBLE_FD_H
#include "crucible/resource.h"
#include <cstring>
#include <string>
#include <vector>
// open
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
// socket
#include <sys/socket.h>
// pread/pwrite
#include <unistd.h>
namespace crucible {
using namespace std;
// IOHandle is a file descriptor owner object. It closes them when destroyed.
// Most of the functions here don't use it because these functions don't own FDs.
// All good names for such objects are taken.
class IOHandle {
IOHandle(const IOHandle &) = delete;
IOHandle(IOHandle &&) = delete;
IOHandle& operator=(IOHandle &&) = delete;
IOHandle& operator=(const IOHandle &) = delete;
protected:
int m_fd;
IOHandle& operator=(int that) { m_fd = that; return *this; }
public:
virtual ~IOHandle();
IOHandle(int fd);
IOHandle();
void close();
int get_fd() const { return m_fd; }
int release_fd();
};
template <>
struct ResourceTraits<int, IOHandle> {
int get_key(const IOHandle &res) const { return res.get_fd(); }
shared_ptr<IOHandle> make_resource(int fd) const { return make_shared<IOHandle>(fd); }
bool is_null_key(const int &key) const { return key < 0; }
int get_null_key() const { return -1; }
};
typedef ResourceHandle<int, IOHandle> Fd;
// Functions named "foo_or_die" throw exceptions on failure.
// Attempt to open the file with the given mode
int open_or_die(const string &file, int flags = O_RDONLY, mode_t mode = 0777);
int openat_or_die(int dir_fd, const string &file, int flags = O_RDONLY, mode_t mode = 0777);
// Decode open parameters
string o_flags_ntoa(int flags);
string o_mode_ntoa(mode_t mode);
// mmap with its one weird error case
void *mmap_or_die(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
// Decode mmap parameters
string mmap_prot_ntoa(int prot);
string mmap_flags_ntoa(int flags);
// Unlink, rename
void unlink_or_die(const string &file);
void rename_or_die(const string &from, const string &to);
void renameat_or_die(int fromfd, const string &frompath, int tofd, const string &topath);
// Read or write structs:
// There is a template specialization to read or write strings
// Three-arg version of read_or_die/write_or_die throws an error on incomplete read/writes
// Four-arg version returns number of bytes read/written through reference arg
void read_or_die(int fd, void *buf, size_t size);
template <class T> void read_or_die(int fd, T& buf)
{
return read_or_die(fd, static_cast<void *>(&buf), sizeof(buf));
}
void read_partial_or_die(int fd, void *buf, size_t size_wanted, size_t &size_read);
template <class T> void read_partial_or_die(int fd, T& buf, size_t &size_read)
{
return read_partial_or_die(fd, static_cast<void *>(&buf), sizeof(buf), size_read);
}
void pread_or_die(int fd, void *buf, size_t size, off_t offset);
template <class T> void pread_or_die(int fd, T& buf, off_t offset)
{
return pread_or_die(fd, static_cast<void *>(&buf), sizeof(buf), offset);
}
void write_or_die(int fd, const void *buf, size_t size);
template <class T> void write_or_die(int fd, const T& buf)
{
return write_or_die(fd, static_cast<const void *>(&buf), sizeof(buf));
}
void write_partial_or_die(int fd, const void *buf, size_t size_wanted, size_t &size_written);
template <class T> void write_partial_or_die(int fd, const T& buf, size_t &size_written)
{
return write_partial_or_die(fd, static_cast<const void *>(&buf), sizeof(buf), size_written);
}
void pwrite_or_die(int fd, const void *buf, size_t size, off_t offset);
template <class T> void pwrite_or_die(int fd, const T& buf, off_t offset)
{
return pwrite_or_die(fd, static_cast<const void *>(&buf), sizeof(buf), offset);
}
// Specialization for strings which reads/writes the string content, not the struct string
template<> void write_or_die<string>(int fd, const string& str);
template<> void pread_or_die<string>(int fd, string& str, off_t offset);
template<> void pread_or_die<vector<char>>(int fd, vector<char>& str, off_t offset);
template<> void pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t>& str, off_t offset);
// A different approach to reading a simple string
string read_string(int fd, size_t size);
// A lot of Unix API wants you to initialize a struct and call
// one function to fill it, another function to throw it away,
// and has some unknown third thing you have to do when there's
// an error. That's also a C++ object with an exception-throwing
// constructor.
struct Stat : public stat {
Stat();
Stat(int f);
Stat(const string &filename);
Stat &fstat(int fd);
Stat &lstat(const string &filename);
};
string st_mode_ntoa(mode_t mode);
// Because it's not trivial to do correctly
string readlink_or_die(const string &path);
// Determine the name of a FD by readlink through /proc/self/fd/
string name_fd(int fd);
// Returns Fd objects because it does own them.
pair<Fd, Fd> socketpair_or_die(int domain = AF_UNIX, int type = SOCK_STREAM, int protocol = 0);
// like unique_lock but for flock instead of mutexes...and not trying
// to hide the many and subtle differences between those two things *at all*.
class Flock {
int m_fd;
bool m_locked;
Flock(const Flock &) = delete;
Flock(Flock &&) = delete;
Flock &operator=(const Flock &) = delete;
Flock &operator=(Flock &&) = delete;
public:
Flock();
Flock(int fd);
Flock(int fd, bool init_locked_state);
~Flock();
void lock();
void try_lock();
void unlock();
bool owns_lock();
operator bool();
int fd();
};
// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr.
void dup2_or_die(int fd_in, int fd_out);
}
#endif // CRUCIBLE_FD_H

246
include/crucible/fs.h Normal file
View File

@ -0,0 +1,246 @@
#ifndef CRUCIBLE_FS_H
#define CRUCIBLE_FS_H
#include "crucible/error.h"
// Terribly Linux-specific FS-wrangling functions
// BTRFS
#include "crucible/btrfs.h"
// FIEMAP_* structs and flags
#include <linux/fiemap.h>
#include <cstdint>
#include <iosfwd>
#include <vector>
#include <fcntl.h>
#include <sys/statvfs.h>
namespace crucible {
using namespace std;
// wrapper around fallocate(...FALLOC_FL_PUNCH_HOLE...)
void punch_hole(int fd, off_t offset, off_t len);
struct BtrfsExtentInfo : public btrfs_ioctl_same_extent_info {
BtrfsExtentInfo(int dst_fd, off_t dst_offset);
};
struct BtrfsExtentSame : public btrfs_ioctl_same_args {
virtual ~BtrfsExtentSame();
BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length);
void add(int fd, off_t offset);
virtual void do_ioctl();
int m_fd;
vector<BtrfsExtentInfo> m_info;
};
struct BtrfsExtentSameByClone : public BtrfsExtentSame {
using BtrfsExtentSame::BtrfsExtentSame;
void do_ioctl() override;
};
ostream & operator<<(ostream &os, const btrfs_ioctl_same_extent_info *info);
ostream & operator<<(ostream &os, const btrfs_ioctl_same_args *info);
ostream & operator<<(ostream &os, const BtrfsExtentSame &bes);
struct BtrfsInodeOffsetRoot {
uint64_t m_inum;
uint64_t m_offset;
uint64_t m_root;
};
ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p);
struct BtrfsDataContainer : public btrfs_data_container {
BtrfsDataContainer(size_t size = 64 * 1024);
void *prepare();
size_t get_size() const;
decltype(bytes_left) get_bytes_left() const;
decltype(bytes_missing) get_bytes_missing() const;
decltype(elem_cnt) get_elem_cnt() const;
decltype(elem_missed) get_elem_missed() const;
vector<char> m_data;
};
struct BtrfsIoctlLogicalInoArgs : public btrfs_ioctl_logical_ino_args {
BtrfsIoctlLogicalInoArgs(uint64_t logical, size_t buf_size = 64 * 1024);
virtual void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd);
BtrfsDataContainer m_container;
vector<BtrfsInodeOffsetRoot> m_iors;
};
ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs &p);
struct BtrfsIoctlInoPathArgs : public btrfs_ioctl_ino_path_args {
BtrfsIoctlInoPathArgs(uint64_t inode, size_t buf_size = 64 * 1024);
virtual void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd);
BtrfsDataContainer m_container;
vector<string> m_paths;
};
ostream & operator<<(ostream &os, const BtrfsIoctlInoPathArgs &p);
struct BtrfsIoctlInoLookupArgs : public btrfs_ioctl_ino_lookup_args {
BtrfsIoctlInoLookupArgs(uint64_t objectid);
virtual void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd);
// use objectid = BTRFS_FIRST_FREE_OBJECTID
// this->treeid is the rootid for the path (we get the path too)
};
struct BtrfsIoctlDefragRangeArgs : public btrfs_ioctl_defrag_range_args {
BtrfsIoctlDefragRangeArgs();
virtual void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd);
};
ostream & operator<<(ostream &os, const BtrfsIoctlDefragRangeArgs *p);
// in btrfs/ctree.h, but that's a nightmare to #include here
typedef enum {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_TYPES = 2,
BTRFS_COMPRESS_LAST = 3,
} btrfs_compression_type;
struct FiemapExtent : public fiemap_extent {
FiemapExtent();
FiemapExtent(const fiemap_extent &that);
operator bool() const;
off_t begin() const;
off_t end() const;
};
struct Fiemap : public fiemap {
// Get entire file
Fiemap(uint64_t start = 0, uint64_t length = FIEMAP_MAX_OFFSET);
void do_ioctl(int fd);
vector<FiemapExtent> m_extents;
uint64_t m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent);
uint64_t m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent);
};
ostream & operator<<(ostream &os, const fiemap_extent *info);
ostream & operator<<(ostream &os, const FiemapExtent &info);
ostream & operator<<(ostream &os, const fiemap *info);
ostream & operator<<(ostream &os, const Fiemap &info);
string fiemap_extent_flags_ntoa(unsigned long flags);
// Helper functions
void btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset);
bool btrfs_extent_same(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset);
struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header {
BtrfsIoctlSearchHeader();
vector<char> m_data;
size_t set_data(const vector<char> &v, size_t offset);
};
ostream & operator<<(ostream &os, const btrfs_ioctl_search_header &hdr);
ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr);
struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key {
BtrfsIoctlSearchKey(size_t buf_size = 1024 * 1024);
virtual bool do_ioctl_nothrow(int fd);
virtual void do_ioctl(int fd);
// Copy objectid/type/offset so we move forward
void next_min(const BtrfsIoctlSearchHeader& ref);
size_t m_buf_size;
vector<BtrfsIoctlSearchHeader> m_result;
};
ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key);
ostream & operator<<(ostream &os, const BtrfsIoctlSearchKey &key);
string btrfs_search_type_ntoa(unsigned type);
string btrfs_search_objectid_ntoa(unsigned objectid);
uint64_t btrfs_get_root_id(int fd);
uint64_t btrfs_get_root_transid(int fd);
template<class T>
const T*
get_struct_ptr(vector<char> &v, size_t offset = 0)
{
// OK so sometimes btrfs overshoots a little
if (offset + sizeof(T) > v.size()) {
v.resize(offset + sizeof(T), 0);
}
THROW_CHECK2(invalid_argument, v.size(), offset + sizeof(T), offset + sizeof(T) <= v.size());
return reinterpret_cast<const T*>(v.data() + offset);
}
template<class A, class R>
R
call_btrfs_get(R (*func)(const A*), vector<char> &v, size_t offset = 0)
{
return func(get_struct_ptr<A>(v, offset));
}
template <class T> struct btrfs_get_le;
template<> struct btrfs_get_le<__le64> {
uint64_t operator()(const void *p) { return get_unaligned_le64(p); }
};
template<> struct btrfs_get_le<__le32> {
uint32_t operator()(const void *p) { return get_unaligned_le32(p); }
};
template<> struct btrfs_get_le<__le16> {
uint16_t operator()(const void *p) { return get_unaligned_le16(p); }
};
template<> struct btrfs_get_le<__le8> {
uint8_t operator()(const void *p) { return get_unaligned_le8(p); }
};
template<class S, class T>
T
btrfs_get_member(T S::* member, vector<char> &v, size_t offset = 0)
{
const S *sp = reinterpret_cast<const S*>(NULL);
const T *spm = &(sp->*member);
auto member_offset = reinterpret_cast<const char *>(spm) - reinterpret_cast<const char *>(sp);
return btrfs_get_le<T>()(get_struct_ptr<S>(v, offset + member_offset));
}
struct Statvfs : public statvfs {
Statvfs();
Statvfs(string path);
Statvfs(int fd);
unsigned long size() const;
unsigned long free() const;
unsigned long available() const;
};
ostream &hexdump(ostream &os, const vector<char> &v);
struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args {
BtrfsIoctlFsInfoArgs();
void do_ioctl(int fd);
string uuid() const;
};
ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a);
};
#endif // CRUCIBLE_FS_H

106
include/crucible/interp.h Normal file
View File

@ -0,0 +1,106 @@
#ifndef CRUCIBLE_INTERP_H
#define CRUCIBLE_INTERP_H
#include "crucible/error.h"
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace crucible {
using namespace std;
struct ArgList : public vector<string> {
ArgList(const char **argv);
// using vector<string>::vector ... doesn't work:
// error: std::vector<std::basic_string<char> >::vector names constructor
// Still doesn't work in 4.9 because it can't manage a conversion
ArgList(const vector<string> &&that);
};
struct ArgActor {
struct ArgActorBase {
virtual void predicate(void *obj, string arg);
};
template <class T>
struct ArgActorDerived {
function<void(T, string)> m_func;
ArgActorDerived(decltype(m_func) func) :
m_func(func)
{
}
void predicate(void *obj, string arg) override
{
T &op = *(reinterpret_cast<T*>(obj));
m_func(op, obj);
}
};
template <class T>
ArgActor(T, function<void(T, string)> func) :
m_actor(make_shared(ArgActorDerived<T>(func)))
{
}
ArgActor() = default;
void predicate(void *t, string arg)
{
if (m_actor) {
m_actor->predicate(t, arg);
} else {
THROW_ERROR(invalid_argument, "null m_actor for predicate arg '" << arg << "'");
}
}
private:
shared_ptr<ArgActorBase> m_actor;
};
struct ArgParser {
~ArgParser();
ArgParser();
void add_opt(string opt, ArgActor actor);
template <class T>
void
parse(T t, const ArgList &args)
{
void *vt = &t;
parse_backend(vt, args);
}
private:
void parse_backend(void *t, const ArgList &args);
map<string, ArgActor> m_string_opts;
};
struct Command {
virtual ~Command();
virtual int exec(const ArgList &args) = 0;
};
struct Proc : public Command {
int exec(const ArgList &args) override;
Proc(const function<int(const ArgList &)> &f);
private:
function<int(const ArgList &)> m_cmd;
};
struct Interp {
virtual ~Interp();
Interp(const map<string, shared_ptr<Command> > &cmdlist);
void add_command(const string &name, const shared_ptr<Command> &command);
int exec(const ArgList &args);
private:
Interp(const Interp &) = delete;
map<string, shared_ptr<Command> > m_commands;
};
};
#endif // CRUCIBLE_INTERP_H

51
include/crucible/limits.h Normal file
View File

@ -0,0 +1,51 @@
#ifndef CRUCIBLE_LIMITS_H
#define CRUCIBLE_LIMITS_H
#include "crucible/error.h"
#include <limits>
#include <typeinfo>
namespace crucible {
using namespace std;
template <class To, class From>
To
ranged_cast(From f)
{
if (typeid(From) == typeid(To)) {
return f;
}
To t;
static string f_info = typeid(f).name();
static string t_info = typeid(t).name();
if (numeric_limits<From>::max() > numeric_limits<To>::max() && numeric_limits<From>::max() < numeric_limits<To>::max()) {
THROW_ERROR(out_of_range,
"ranged_cast: can't compare limits of types " << f_info << " and " << t_info << ", template specialization required");
}
if (numeric_limits<From>::max() > numeric_limits<To>::max() && f > static_cast<From>(numeric_limits<To>::max())) {
THROW_ERROR(out_of_range,
"ranged_cast: " << f_info << "(" << f << ") out of range of target type " << t_info);
}
if (!numeric_limits<To>::is_signed && numeric_limits<From>::is_signed && f < 0) {
THROW_ERROR(out_of_range,
"ranged_cast: " << f_info << "(" << f << ") out of range of unsigned target type " << t_info);
}
t = static_cast<To>(f);
From f2 = static_cast<From>(t);
if (f2 != f) {
THROW_ERROR(out_of_range,
"ranged_cast: " << f_info << "(" << f << ") -> " << t_info << " failed: result value " << f2);
}
return t;
}
};
#endif // CRUCIBLE_LIMITS_H

210
include/crucible/lockset.h Normal file
View File

@ -0,0 +1,210 @@
#ifndef CRUCIBLE_LOCKSET_H
#define CRUCIBLE_LOCKSET_H
#include <crucible/error.h>
#include <cassert>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <set>
namespace crucible {
using namespace std;
template <class T>
class LockSet {
public:
using key_type = T;
using set_type = set<T>;
private:
set_type m_set;
mutex m_mutex;
condition_variable m_condvar;
public:
~LockSet();
LockSet() = default;
void lock(const key_type &name);
void unlock(const key_type &name);
bool try_lock(const key_type &name);
size_t size();
bool empty();
set_type copy();
void wait_unlock(double interval);
class Lock {
LockSet &m_lockset;
key_type m_name;
bool m_locked;
Lock() = delete;
Lock(const Lock &) = delete;
Lock& operator=(const Lock &) = delete;
public:
~Lock();
Lock(LockSet &lockset, const key_type &m_name, bool start_locked = true);
Lock(Lock &&that);
Lock& operator=(Lock &&that);
void lock();
void unlock();
bool try_lock();
};
};
template <class T>
LockSet<T>::~LockSet()
{
if (!m_set.empty()) {
cerr << "ERROR: " << m_set.size() << " locked items still in set at destruction" << endl;
}
// We will crash later. Might as well crash now.
assert(m_set.empty());
}
template <class T>
void
LockSet<T>::lock(const key_type &name)
{
unique_lock<mutex> lock(m_mutex);
while (m_set.count(name)) {
m_condvar.wait(lock);
}
auto rv = m_set.insert(name);
THROW_CHECK0(runtime_error, rv.second);
}
template <class T>
bool
LockSet<T>::try_lock(const key_type &name)
{
unique_lock<mutex> lock(m_mutex);
if (m_set.count(name)) {
return false;
}
auto rv = m_set.insert(name);
THROW_CHECK1(runtime_error, name, rv.second);
return true;
}
template <class T>
void
LockSet<T>::unlock(const key_type &name)
{
unique_lock<mutex> lock(m_mutex);
m_condvar.notify_all();
auto erase_count = m_set.erase(name);
THROW_CHECK1(invalid_argument, erase_count, erase_count == 1);
}
template <class T>
void
LockSet<T>::wait_unlock(double interval)
{
unique_lock<mutex> lock(m_mutex);
if (m_set.empty()) return;
m_condvar.wait_for(lock, chrono::duration<double>(interval));
}
template <class T>
size_t
LockSet<T>::size()
{
unique_lock<mutex> lock(m_mutex);
return m_set.size();
}
template <class T>
bool
LockSet<T>::empty()
{
unique_lock<mutex> lock(m_mutex);
return m_set.empty();
}
template <class T>
typename LockSet<T>::set_type
LockSet<T>::copy()
{
unique_lock<mutex> lock(m_mutex);
return m_set;
}
template <class T>
void
LockSet<T>::Lock::lock()
{
if (m_locked) return;
m_lockset.lock(m_name);
m_locked = true;
}
template <class T>
bool
LockSet<T>::Lock::try_lock()
{
if (m_locked) return true;
m_locked = m_lockset.try_lock(m_name);
return m_locked;
}
template <class T>
void
LockSet<T>::Lock::unlock()
{
if (!m_locked) return;
m_lockset.unlock(m_name);
m_locked = false;
}
template <class T>
LockSet<T>::Lock::~Lock()
{
if (m_locked) {
unlock();
}
}
template <class T>
LockSet<T>::Lock::Lock(LockSet &lockset, const key_type &name, bool start_locked) :
m_lockset(lockset),
m_name(name),
m_locked(false)
{
if (start_locked) {
lock();
}
}
template <class T>
LockSet<T>::Lock::Lock(Lock &&that) :
m_lockset(that.lockset),
m_name(that.m_name),
m_locked(that.m_locked)
{
that.m_locked = false;
}
template <class T>
typename LockSet<T>::Lock &
LockSet<T>::Lock::operator=(Lock &&that)
{
THROW_CHECK2(invalid_argument, &m_lockset, &that.m_lockset, &m_lockset == &that.m_lockset);
if (m_locked && that.m_name != m_name) {
unlock();
}
m_name = that.m_name;
m_locked = that.m_locked;
that.m_locked = false;
return *this;
}
}
#endif // CRUCIBLE_LOCKSET_H

28
include/crucible/ntoa.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef CRUCIBLE_NTOA_H
#define CRUCIBLE_NTOA_H
#include <string>
namespace crucible {
using namespace std;
struct bits_ntoa_table {
unsigned long n;
unsigned long mask;
const char *a;
};
string bits_ntoa(unsigned long n, const bits_ntoa_table *a);
};
// Combinations of bits (list multiple-bit entries first)
#define NTOA_TABLE_ENTRY_BITS(x) { .n = (x), .mask = (x), .a = (#x) }
// Enumerations (entire value matches all bits)
#define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0UL, .a = (#x) }
// End of table (sorry, gcc doesn't implement this)
#define NTOA_TABLE_ENTRY_END() { .n = 0, .mask = 0, .a = nullptr }
#endif // CRUCIBLE_NTOA_H

13
include/crucible/path.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef CRUCIBLE_PATH_H
#define CRUCIBLE_PATH_H
#include <string>
namespace crucible {
using namespace std;
string basename(string s);
string join(string dir, string base);
};
#endif // CRUCIBLE_PATH_H

View File

@ -0,0 +1,78 @@
#ifndef CRUCIBLE_PROCESS_H
#define CRUCIBLE_PROCESS_H
#include "crucible/resource.h"
#include <functional>
#include <memory>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
namespace crucible {
using namespace std;
// Like thread, but for processes.
// TODO: thread has a few warts for this usage:
// - can't create one from its native_handle,
// - can't destroy one without joining/detaching it first
// - can't implement detach correctly without crossing threshold of insanity
// - WTF is native_handle() not const?
struct Process {
// These parts are for compatibility with std::thread
using id = ::pid_t;
using native_handle_type = ::pid_t;
~Process();
Process();
template <class Fn, class... Args>
Process(Fn fn, Args... args) :
Process()
{
do_fork(function<int()>([&]() { return fn(args...); }));
}
Process(const Process &) = delete;
Process(Process &&move_from);
bool joinable();
void detach();
native_handle_type native_handle();
id get_id();
// Modified thread members for Process
// join() calls waitpid(), returns status or exception (std::thread returns void)
using status_type = int;
status_type join();
// New members for Process
// kill() terminates a process in the usual Unix way
void kill(int sig = SIGTERM);
// take over ownership of an already-forked native process handle
Process(id pid);
private:
id m_pid;
void do_fork(function<int()>);
};
template <>
struct ResourceTraits<Process::id, Process> {
Process::id get_key(const Process &res) const { return (const_cast<Process&>(res)).native_handle(); }
shared_ptr<Process> make_resource(const Process::id &id) const { return make_shared<Process>(id); }
bool is_null_key(const Process::id &key) const { return !key; }
Process::id get_null_key() const { return 0; }
};
typedef ResourceHandle<Process::id, Process> Pid;
pid_t gettid();
}
#endif // CRUCIBLE_PROCESS_H

387
include/crucible/resource.h Normal file
View File

@ -0,0 +1,387 @@
#ifndef CRUCIBLE_RESOURCE_H
#define CRUCIBLE_RESOURCE_H
#include "crucible/error.h"
#include <cassert>
#include <map>
#include <memory>
#include <mutex>
#include <iostream>
namespace crucible {
using namespace std;
// Template classes for non-copiable resource owner objects
// for objects with process-wide unique names.
// Everything we need to know about Key and Resource.
// Specialize this template for your Resource class.
template <class Key, class Resource>
struct ResourceTraits {
// How to get the Key out of a Resource owner.
// If the owner owns no resource, returns "null" for "no Resource."
Key get_key(const Resource &res) const;
// How to construct a new Resource owner given _only_ the key.
// Usually just calls make_shared<Resource>(key).
shared_ptr<Resource> make_resource(const Key &key) const;
// Test a Key value to see if it is null (no active Resource has this Key value).
// Usually an equality test with get_null_key(), but sometimes many Key values are equivalent to null.
bool is_null_key(const Key &key) const;
// is_null_key(get_null_key()) == true
Key get_null_key() const;
};
template <class Key, class Resource>
class ResourceHandle {
public:
using key_type = Key;
using resource_type = Resource;
using resource_ptr_type = shared_ptr<Resource>;
private:
using traits_type = ResourceTraits<Key, Resource>;
class ResourceHolder {
resource_ptr_type m_ptr;
public:
~ResourceHolder();
ResourceHolder(resource_ptr_type that);
ResourceHolder(const ResourceHolder &that) = default;
ResourceHolder(ResourceHolder &&that) = default;
ResourceHolder& operator=(ResourceHolder &&that) = default;
ResourceHolder& operator=(const ResourceHolder &that) = default;
resource_ptr_type get_resource_ptr() const;
};
using holder_ptr_type = shared_ptr<ResourceHolder>;
using weak_holder_ptr_type = weak_ptr<ResourceHolder>;
using map_type = map<key_type, weak_holder_ptr_type>;
// The only instance variable
holder_ptr_type m_ptr;
// A bunch of static variables and functions
static mutex &s_mutex();
static shared_ptr<map_type> s_map();
static holder_ptr_type insert(const key_type &key);
static holder_ptr_type insert(const resource_ptr_type &res);
static void erase(const key_type &key);
static ResourceTraits<Key, Resource> s_traits;
public:
// test for resource. A separate operator because key_type could be confused with bool.
bool operator!() const;
// get key_type for an active resource or null
key_type get_key() const;
// conversion/assignment to and from key_type
operator key_type() const;
ResourceHandle(const key_type &key);
ResourceHandle& operator=(const key_type &key);
// conversion to/from resource_ptr_type
ResourceHandle(const resource_ptr_type &res);
ResourceHandle& operator=(const resource_ptr_type &res);
// default constructor is public
ResourceHandle() = default;
// forward anything else to the Resource constructor
// if we can do so unambiguously
template<class A1, class A2, class... Args>
ResourceHandle(A1 a1, A2 a2, Args... args) : ResourceHandle( make_shared<Resource>(a1, a2, args...) )
{
}
// forward anything else to a Resource factory method
template<class... Args>
static
ResourceHandle
make(Args... args) {
return ResourceHandle( make_shared<Resource>(args...) );
}
// get pointer to Resource object (nothrow, result may be null)
resource_ptr_type get_resource_ptr() const;
// this version throws and is probably not thread safe
resource_ptr_type operator->() const;
// dynamic casting of the resource (throws if cast fails)
template <class T> shared_ptr<T> cast() const;
};
template <class Key, class Resource>
Key
ResourceTraits<Key, Resource>::get_key(const Resource &res) const
{
return res.get_key();
}
template <class Key, class Resource>
shared_ptr<Resource>
ResourceTraits<Key, Resource>::make_resource(const Key &key) const
{
return make_shared<Resource>(key);
}
template <class Key, class Resource>
bool
ResourceTraits<Key, Resource>::is_null_key(const Key &key) const
{
return !key;
}
template <class Key, class Resource>
Key
ResourceTraits<Key, Resource>::get_null_key() const
{
return NULL;
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>::ResourceHolder::ResourceHolder(resource_ptr_type that) :
m_ptr(that)
{
// Cannot insert ourselves here since our shared_ptr does not exist yet.
}
template <class Key, class Resource>
mutex &
ResourceHandle<Key, Resource>::s_mutex()
{
static mutex gcc_won_t_instantiate_this_either;
return gcc_won_t_instantiate_this_either;
}
template <class Key, class Resource>
shared_ptr<typename ResourceHandle<Key, Resource>::map_type>
ResourceHandle<Key, Resource>::s_map()
{
static shared_ptr<map_type> gcc_won_t_instantiate_the_damn_static_vars;
if (!gcc_won_t_instantiate_the_damn_static_vars) {
gcc_won_t_instantiate_the_damn_static_vars = make_shared<map_type>();
}
return gcc_won_t_instantiate_the_damn_static_vars;
}
template <class Key, class Resource>
void
ResourceHandle<Key, Resource>::erase(const key_type &key)
{
unique_lock<mutex> lock(s_mutex());
// Resources are allowed to set their Keys to null.
if (s_traits.is_null_key(key)) {
// Clean out any dead weak_ptr objects.
for (auto i = s_map()->begin(); i != s_map()->end(); ) {
if (! (*i).second.lock()) {
i = s_map()->erase(i);
} else {
++i;
}
}
return;
}
auto erased = s_map()->erase(key);
if (erased != 1) {
cerr << __PRETTY_FUNCTION__ << ": WARNING: s_map()->erase(" << key << ") returned " << erased << " != 1" << endl;
}
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>::ResourceHolder::~ResourceHolder()
{
if (!m_ptr) {
// Probably something harmless like a failed constructor.
cerr << __PRETTY_FUNCTION__ << ": WARNING: destroying null m_ptr" << endl;
return;
}
Key key = s_traits.get_key(*m_ptr);
ResourceHandle::erase(key);
}
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::holder_ptr_type
ResourceHandle<Key, Resource>::insert(const key_type &key)
{
// no Resources for null keys
if (s_traits.is_null_key(key)) {
return holder_ptr_type();
}
unique_lock<mutex> lock(s_mutex());
// find ResourceHolder for non-null key
auto found = s_map()->find(key);
if (found != s_map()->end()) {
holder_ptr_type rv = (*found).second.lock();
// a weak_ptr may have expired
if (rv) {
return rv;
}
}
// not found or expired, throw any existing ref away and make a new one
resource_ptr_type rpt = s_traits.make_resource(key);
holder_ptr_type hpt = make_shared<ResourceHolder>(rpt);
// store weak_ptr in map
(*s_map())[key] = hpt;
// return shared_ptr
return hpt;
};
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::holder_ptr_type
ResourceHandle<Key, Resource>::insert(const resource_ptr_type &res)
{
// no Resource, no ResourceHolder.
if (!res) {
return holder_ptr_type();
}
// no ResourceHolders for null keys either.
key_type key = s_traits.get_key(*res);
if (s_traits.is_null_key(key)) {
return holder_ptr_type();
}
unique_lock<mutex> lock(s_mutex());
// find ResourceHolder for non-null key
auto found = s_map()->find(key);
if (found != s_map()->end()) {
holder_ptr_type rv = (*found).second.lock();
// The map doesn't own the ResourceHolders, the ResourceHandles do.
// It's OK for the map to contain an expired weak_ptr to some dead ResourceHolder...
if (rv) {
// found ResourceHolder, look at pointer
resource_ptr_type rp = rv->get_resource_ptr();
// We do not store references to null Resources.
assert(rp);
// Key retrieved for an existing object must match key searched or be null.
key_type found_key = s_traits.get_key(*rp);
bool found_key_is_null = s_traits.is_null_key(found_key);
assert(found_key_is_null || found_key == key);
if (!found_key_is_null) {
// We do not store references to duplicate resources.
if (rp.owner_before(res) || res.owner_before(rp)) {
cerr << "inserting new Resource with existing Key " << key << " not allowed at " << __PRETTY_FUNCTION__ << endl;;
abort();
// THROW_ERROR(out_of_range, "inserting new Resource with existing Key " << key << " not allowed at " << __PRETTY_FUNCTION__);
}
// rv is good, return it
return rv;
}
}
}
// not found or expired, make a new one
holder_ptr_type rv = make_shared<ResourceHolder>(res);
s_map()->insert(make_pair(key, weak_holder_ptr_type(rv)));
// no need to check s_map result, we are either replacing a dead weak_ptr or adding a new one
return rv;
};
template <class Key, class Resource>
ResourceHandle<Key, Resource>::ResourceHandle(const key_type &key)
{
m_ptr = insert(key);
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>&
ResourceHandle<Key, Resource>::operator=(const key_type &key)
{
m_ptr = insert(key);
return *this;
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>::ResourceHandle(const resource_ptr_type &res)
{
m_ptr = insert(res);
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>&
ResourceHandle<Key, Resource>::operator=(const resource_ptr_type &res)
{
m_ptr = insert(res);
return *this;
}
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::resource_ptr_type
ResourceHandle<Key, Resource>::ResourceHolder::get_resource_ptr() const
{
return m_ptr;
}
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::resource_ptr_type
ResourceHandle<Key, Resource>::get_resource_ptr() const
{
if (!m_ptr) {
return resource_ptr_type();
}
return m_ptr->get_resource_ptr();
}
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::resource_ptr_type
ResourceHandle<Key, Resource>::operator->() const
{
resource_ptr_type rp = get_resource_ptr();
if (!rp) {
THROW_ERROR(out_of_range, __PRETTY_FUNCTION__ << " called on null Resource");
}
return rp;
}
template <class Key, class Resource>
template <class T>
shared_ptr<T>
ResourceHandle<Key, Resource>::cast() const
{
shared_ptr<T> dp;
resource_ptr_type rp = get_resource_ptr();
if (!rp) {
return dp;
}
dp = dynamic_pointer_cast<T>(rp);
if (!dp) {
throw bad_cast();
}
return dp;
}
template <class Key, class Resource>
typename ResourceHandle<Key, Resource>::key_type
ResourceHandle<Key, Resource>::get_key() const
{
resource_ptr_type rp = get_resource_ptr();
if (!rp) {
return s_traits.get_null_key();
} else {
return s_traits.get_key(*rp);
}
}
template <class Key, class Resource>
ResourceHandle<Key, Resource>::operator key_type() const
{
return get_key();
}
template <class Key, class Resource>
bool
ResourceHandle<Key, Resource>::operator!() const
{
return s_traits.is_null_key(operator key_type());
}
template <class Key, class Resource>
ResourceTraits<Key, Resource> ResourceHandle<Key, Resource>::s_traits;
}
#endif // RESOURCE_H

67
include/crucible/string.h Normal file
View File

@ -0,0 +1,67 @@
#ifndef CRUCIBLE_STRING_H
#define CRUCIBLE_STRING_H
#include "crucible/error.h"
#include <cstdint>
#include <cstring>
#include <string>
#include <vector>
namespace crucible {
using namespace std;
// Zero-initialize a base class object (usually a C struct)
template <class Base>
void
memset_zero(Base *that)
{
memset(that, 0, sizeof(Base));
}
// Copy a base class object (usually a C struct) into a vector<char>
template <class Base>
vector<char>
vector_copy_struct(Base *that)
{
const char *begin_that = reinterpret_cast<const char *>(static_cast<const Base *>(that));
return vector<char>(begin_that, begin_that + sizeof(Base));
}
// int->hex conversion with sprintf
string to_hex(uint64_t i);
// hex->int conversion with stoull
uint64_t from_hex(const string &s);
// asprintf with string output and exceptions
template<class... Args>
string
astringprintf(const char *fmt, Args... args)
{
char *rv = NULL;
DIE_IF_MINUS_ONE(asprintf(&rv, fmt, args...));
string rv_string = rv;
free(rv);
return rv_string;
}
template<class... Args>
string
astringprintf(const string &fmt, Args... args)
{
return astringprintf(fmt.c_str(), args...);
}
vector<string> split(string delim, string s);
// Shut up and give me the difference between two pointers
template <class P1, class P2>
ptrdiff_t
pointer_distance(const P1 *a, const P2 *b)
{
return reinterpret_cast<const char *>(a) - reinterpret_cast<const char *>(b);
}
};
#endif // CRUCIBLE_STRING_H

49
include/crucible/time.h Normal file
View File

@ -0,0 +1,49 @@
#ifndef CRUCIBLE_TIME_H
#define CRUCIBLE_TIME_H
#include "crucible/error.h"
#include <chrono>
#include <mutex>
#include <ostream>
namespace crucible {
double nanosleep(double secs);
class Timer {
chrono::high_resolution_clock::time_point m_start;
public:
Timer();
double age() const;
double report(int precision = 1000) const;
void reset();
void set(const chrono::high_resolution_clock::time_point &start);
void set(double delta);
double lap();
bool operator<(double d) const;
bool operator>(double d) const;
};
ostream &operator<<(ostream &os, const Timer &t);
class RateLimiter {
Timer m_timer;
double m_rate;
double m_burst;
double m_tokens;
mutex m_mutex;
void update_tokens();
public:
RateLimiter(double rate, double burst);
RateLimiter(double rate);
void sleep_for(double cost = 1.0);
bool is_ready();
void borrow(double cost = 1.0);
};
}
#endif // CRUCIBLE_TIME_H

View File

@ -0,0 +1,188 @@
#ifndef CRUCIBLE_TIMEQUEUE_H
#define CRUCIBLE_TIMEQUEUE_H
#include <crucible/error.h>
#include <crucible/time.h>
#include <condition_variable>
#include <limits>
#include <list>
#include <memory>
#include <mutex>
#include <set>
namespace crucible {
using namespace std;
template <class Task>
class TimeQueue {
public:
using Timestamp = chrono::high_resolution_clock::time_point;
private:
struct Item {
Timestamp m_time;
unsigned m_id;
Task m_task;
bool operator<(const Item &that) const {
if (m_time < that.m_time) return true;
if (that.m_time < m_time) return false;
return m_id < that.m_id;
}
static unsigned s_id;
Item(const Timestamp &time, const Task& task) :
m_time(time),
m_id(++s_id),
m_task(task)
{
}
};
set<Item> m_set;
mutable mutex m_mutex;
condition_variable m_cond_full, m_cond_empty;
size_t m_max_queue_depth;
public:
~TimeQueue();
TimeQueue(size_t max_queue_depth = numeric_limits<size_t>::max());
void push(const Task &task, double delay = 0);
void push_nowait(const Task &task, double delay = 0);
Task pop();
bool pop_nowait(Task &t);
double when() const;
size_t size() const;
bool empty() const;
list<Task> peek(size_t count) const;
};
template <class Task> unsigned TimeQueue<Task>::Item::s_id = 0;
template <class Task>
TimeQueue<Task>::~TimeQueue()
{
if (!m_set.empty()) {
cerr << "ERROR: " << m_set.size() << " locked items still in TimeQueue at destruction" << endl;
}
}
template <class Task>
void
TimeQueue<Task>::push(const Task &task, double delay)
{
Timestamp time = chrono::high_resolution_clock::now() +
chrono::duration_cast<chrono::high_resolution_clock::duration>(chrono::duration<double>(delay));
unique_lock<mutex> lock(m_mutex);
while (m_set.size() > m_max_queue_depth) {
m_cond_full.wait(lock);
}
m_set.insert(Item(time, task));
m_cond_empty.notify_all();
}
template <class Task>
void
TimeQueue<Task>::push_nowait(const Task &task, double delay)
{
Timestamp time = chrono::high_resolution_clock::now() +
chrono::duration_cast<chrono::high_resolution_clock::duration>(chrono::duration<double>(delay));
unique_lock<mutex> lock(m_mutex);
m_set.insert(Item(time, task));
m_cond_empty.notify_all();
}
template <class Task>
Task
TimeQueue<Task>::pop()
{
unique_lock<mutex> lock(m_mutex);
while (1) {
while (m_set.empty()) {
m_cond_empty.wait(lock);
}
Timestamp now = chrono::high_resolution_clock::now();
if (now > m_set.begin()->m_time) {
Task rv = m_set.begin()->m_task;
m_set.erase(m_set.begin());
m_cond_full.notify_all();
return rv;
}
m_cond_empty.wait_until(lock, m_set.begin()->m_time);
}
}
template <class Task>
bool
TimeQueue<Task>::pop_nowait(Task &t)
{
unique_lock<mutex> lock(m_mutex);
if (m_set.empty()) {
return false;
}
Timestamp now = chrono::high_resolution_clock::now();
if (now <= m_set.begin()->m_time) {
return false;
}
t = m_set.begin()->m_task;
m_set.erase(m_set.begin());
m_cond_full.notify_all();
return true;
}
template <class Task>
double
TimeQueue<Task>::when() const
{
unique_lock<mutex> lock(m_mutex);
if (m_set.empty()) {
return numeric_limits<double>::infinity();
}
return chrono::duration<double>(m_set.begin()->m_time - chrono::high_resolution_clock::now()).count();
}
template <class Task>
size_t
TimeQueue<Task>::size() const
{
unique_lock<mutex> lock(m_mutex);
return m_set.size();
}
template <class Task>
bool
TimeQueue<Task>::empty() const
{
unique_lock<mutex> lock(m_mutex);
return m_set.empty();
}
template <class Task>
list<Task>
TimeQueue<Task>::peek(size_t count) const
{
unique_lock<mutex> lock(m_mutex);
list<Task> rv;
auto it = m_set.begin();
while (count-- && it != m_set.end()) {
rv.push_back(it->m_task);
++it;
}
return rv;
}
template <class Task>
TimeQueue<Task>::TimeQueue(size_t max_depth) :
m_max_queue_depth(max_depth)
{
}
}
#endif // CRUCIBLE_TIMEQUEUE_H

14
include/crucible/uuid.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef CRUCIBLE_UUID_H
#define CRUCIBLE_UUID_H
#include <string>
#include <uuid/uuid.h>
namespace crucible {
using namespace std;
string uuid_unparse(const unsigned char a[16]);
}
#endif // CRUCIBLE_UUID_H

View File

@ -0,0 +1,189 @@
#ifndef CRUCIBLE_WORKQUEUE_H
#define CRUCIBLE_WORKQUEUE_H
#include <crucible/error.h>
#include <condition_variable>
#include <limits>
#include <list>
#include <memory>
#include <mutex>
#include <set>
namespace crucible {
using namespace std;
template <class Task>
class WorkQueue {
public:
using set_type = set<Task>;
using key_type = Task;
private:
set_type m_set;
mutable mutex m_mutex;
condition_variable m_cond_full, m_cond_empty;
size_t m_max_queue_depth;
public:
~WorkQueue();
template <class... Args> WorkQueue(size_t max_queue_depth, Args... args);
template <class... Args> WorkQueue(Args... args);
void push(const key_type &name);
void push_wait(const key_type &name, size_t limit);
void push_nowait(const key_type &name);
key_type pop();
bool pop_nowait(key_type &rv);
key_type peek();
size_t size() const;
bool empty();
set_type copy();
list<Task> peek(size_t count) const;
};
template <class Task>
WorkQueue<Task>::~WorkQueue()
{
if (!m_set.empty()) {
cerr << "ERROR: " << m_set.size() << " locked items still in WorkQueue " << this << " at destruction" << endl;
}
}
template <class Task>
void
WorkQueue<Task>::push(const key_type &name)
{
unique_lock<mutex> lock(m_mutex);
while (!m_set.count(name) && m_set.size() > m_max_queue_depth) {
m_cond_full.wait(lock);
}
m_set.insert(name);
m_cond_empty.notify_all();
}
template <class Task>
void
WorkQueue<Task>::push_wait(const key_type &name, size_t limit)
{
unique_lock<mutex> lock(m_mutex);
while (!m_set.count(name) && m_set.size() >= limit) {
m_cond_full.wait(lock);
}
m_set.insert(name);
m_cond_empty.notify_all();
}
template <class Task>
void
WorkQueue<Task>::push_nowait(const key_type &name)
{
unique_lock<mutex> lock(m_mutex);
m_set.insert(name);
m_cond_empty.notify_all();
}
template <class Task>
typename WorkQueue<Task>::key_type
WorkQueue<Task>::pop()
{
unique_lock<mutex> lock(m_mutex);
while (m_set.empty()) {
m_cond_empty.wait(lock);
}
key_type rv = *m_set.begin();
m_set.erase(m_set.begin());
m_cond_full.notify_all();
return rv;
}
template <class Task>
bool
WorkQueue<Task>::pop_nowait(key_type &rv)
{
unique_lock<mutex> lock(m_mutex);
if (m_set.empty()) {
return false;
}
rv = *m_set.begin();
m_set.erase(m_set.begin());
m_cond_full.notify_all();
return true;
}
template <class Task>
typename WorkQueue<Task>::key_type
WorkQueue<Task>::peek()
{
unique_lock<mutex> lock(m_mutex);
if (m_set.empty()) {
return key_type();
} else {
return *m_set.begin();
}
}
template <class Task>
size_t
WorkQueue<Task>::size() const
{
unique_lock<mutex> lock(m_mutex);
return m_set.size();
}
template <class Task>
bool
WorkQueue<Task>::empty()
{
unique_lock<mutex> lock(m_mutex);
return m_set.empty();
}
template <class Task>
typename WorkQueue<Task>::set_type
WorkQueue<Task>::copy()
{
unique_lock<mutex> lock(m_mutex);
return m_set;
}
template <class Task>
list<Task>
WorkQueue<Task>::peek(size_t count) const
{
unique_lock<mutex> lock(m_mutex);
list<Task> rv;
for (auto i : m_set) {
if (count--) {
rv.push_back(i);
} else {
break;
}
}
return rv;
}
template <class Task>
template <class... Args>
WorkQueue<Task>::WorkQueue(Args... args) :
m_set(args...),
m_max_queue_depth(numeric_limits<size_t>::max())
{
}
template <class Task>
template <class... Args>
WorkQueue<Task>::WorkQueue(size_t max_depth, Args... args) :
m_set(args...),
m_max_queue_depth(max_depth)
{
}
}
#endif // CRUCIBLE_WORKQUEUE_H

37
lib/Makefile Normal file
View File

@ -0,0 +1,37 @@
default: libcrucible.so
OBJS = \
crc64.o \
chatter.o \
error.o \
execpipe.o \
extentwalker.o \
fd.o \
fs.o \
interp.o \
ntoa.o \
path.o \
process.o \
string.o \
time.o \
uuid.o \
include ../makeflags
LDFLAGS = -shared -luuid
depends.mk: *.c *.cc
for x in *.c; do $(CC) $(CFLAGS) -M "$$x"; done > depends.mk.new
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done >> depends.mk.new
mv -fv depends.mk.new depends.mk
-include depends.mk
%.o: %.c
$(CC) $(CFLAGS) -o $@ -c $<
%.o: %.cc ../include/crucible/%.h
$(CXX) $(CXXFLAGS) -o $@ -c $<
libcrucible.so: $(OBJS) Makefile
$(CXX) $(LDFLAGS) -o $@ $(OBJS)

140
lib/chatter.cc Normal file
View File

@ -0,0 +1,140 @@
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/path.h"
#include "crucible/process.h"
#include <cassert>
#include <ctime>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <pthread.h>
namespace crucible {
using namespace std;
static auto_ptr<set<string>> chatter_names;
static const char *SPACETAB = " \t";
static
void
init_chatter_names()
{
if (!chatter_names.get()) {
chatter_names.reset(new set<string>);
const char *sp = ::getenv("CRUCIBLE_CHATTER");
if (sp) {
cerr << "CRUCIBLE_CHATTER = '" << sp << "'" << endl;
string s(sp);
while (!s.empty()) {
s.erase(0, s.find_first_not_of(SPACETAB));
if (s.empty()) {
break;
}
size_t last = s.find_first_of(SPACETAB);
string first_word = s.substr(0, last);
cerr << "\t'" << first_word << "'" << endl;
chatter_names->insert(first_word);
s.erase(0, last);
}
}
}
}
Chatter::Chatter(string name, ostream &os)
: m_name(name), m_os(os)
{
}
Chatter::~Chatter()
{
ostringstream header_stream;
time_t ltime;
DIE_IF_MINUS_ONE(time(&ltime));
struct tm ltm;
DIE_IF_ZERO(localtime_r(&ltime, &ltm));
char buf[1024];
DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &ltm));
header_stream << buf;
header_stream << " " << getpid() << "." << gettid();
if (!m_name.empty()) {
header_stream << " " << m_name;
}
header_stream << ": ";
string out = m_oss.str();
string header = header_stream.str();
string::size_type start = 0;
while (start < out.size()) {
size_t end_line = out.find_first_of("\n", start);
if (end_line != string::npos) {
assert(out[end_line] == '\n');
size_t end = end_line;
m_os << (header + out.substr(start, end - start) + "\n") << flush;
start = end_line + 1;
} else {
m_os << (header + out.substr(start) + "\n") << flush;
start = out.size();
}
}
}
Chatter::Chatter(Chatter &&c)
: m_name(c.m_name), m_os(c.m_os), m_oss(c.m_oss.str())
{
c.m_oss.str("");
}
set<ChatterBox*> ChatterBox::s_boxes;
set<ChatterBox*>& ChatterBox::all_boxes()
{
return s_boxes;
}
ChatterBox::ChatterBox(string file, int line, string pretty_function, ostream &os)
: m_file(basename(file)), m_line(line), m_pretty_function(pretty_function), m_enabled(false), m_os(os)
{
s_boxes.insert(this);
init_chatter_names();
if (chatter_names->find(m_file) != chatter_names->end()) {
m_enabled = true;
} else if (chatter_names->find(m_pretty_function) != chatter_names->end()) {
m_enabled = true;
} else if (!chatter_names->empty()) {
cerr << "CRUCIBLE_CHATTER does not list '" << m_file << "' or '" << m_pretty_function << "'" << endl;
}
// cerr << "ChatterBox " << reinterpret_cast<void*>(this) << " constructed" << endl;
}
ChatterBox::~ChatterBox()
{
s_boxes.erase(this);
// cerr << "ChatterBox " << reinterpret_cast<void*>(this) << " destructed" << endl;
}
void
ChatterBox::set_enable(bool en)
{
m_enabled = en;
}
ChatterUnwinder::ChatterUnwinder(function<void()> f) :
m_func(f)
{
}
ChatterUnwinder::~ChatterUnwinder()
{
if (uncaught_exception()) {
m_func();
}
}
};

59
lib/crc64.cc Normal file
View File

@ -0,0 +1,59 @@
#include "crucible/crc64.h"
#define POLY64REV 0xd800000000000000ULL
namespace crucible {
static bool init = false;
static uint64_t CRCTable[256];
static void init_crc64_table()
{
if (!init) {
for (int i = 0; i <= 255; i++) {
uint64_t part = i;
for (int j = 0; j < 8; j++) {
if (part & 1) {
part = (part >> 1) ^ POLY64REV;
} else {
part >>= 1;
}
}
CRCTable[i] = part;
}
init = true;
}
}
uint64_t
Digest::CRC::crc64(const char *s)
{
init_crc64_table();
uint64_t crc = 0;
for (; *s; s++) {
uint64_t temp1 = crc >> 8;
uint64_t temp2 = CRCTable[(crc ^ static_cast<uint64_t>(*s)) & 0xff];
crc = temp1 ^ temp2;
}
return crc;
}
uint64_t
Digest::CRC::crc64(const void *p, size_t len)
{
init_crc64_table();
uint64_t crc = 0;
for (const unsigned char *s = static_cast<const unsigned char *>(p); len; --len) {
uint64_t temp1 = crc >> 8;
uint64_t temp2 = CRCTable[(crc ^ *s++) & 0xff];
crc = temp1 ^ temp2;
}
return crc;
}
};

74
lib/error.cc Normal file
View File

@ -0,0 +1,74 @@
#include "crucible/error.h"
#include <cstdarg>
#include <iostream>
#include <cxxabi.h>
namespace crucible {
using namespace std;
static
string
analyze_exception(const exception &e)
{
// Let's ignore all the potential memory allocation exceptions for now, K?
ostringstream oss;
int status;
char *realname = abi::__cxa_demangle(typeid(e).name(), 0, 0, &status);
oss << "exception type ";
// This is questionable since anything that would cause
// cxa_demangle to fail will probably cause an exception anyway.
if (realname) {
oss << realname;
free(realname);
} else {
oss << typeid(e).name();
}
oss << ": " << e.what();
return oss.str();
}
// FIXME: could probably avoid some of these levels of indirection
static
function<void(string s)> current_catch_explainer = [&](string s) {
cerr << s << endl;
};
void
set_catch_explainer(function<void(string s)> f)
{
current_catch_explainer = f;
}
void
default_catch_explainer(string s)
{
current_catch_explainer(s);
}
int
catch_all(const function<void()> &f, const function<void(string)> &explainer)
{
try {
f();
return 0;
} catch (const exception &e) {
explainer(analyze_exception(e));
return 1;
}
}
void
catch_and_explain(const function<void()> &f, const function<void(string)> &explainer)
{
try {
f();
} catch (const exception &e) {
explainer(analyze_exception(e));
throw;
}
}
};

104
lib/execpipe.cc Normal file
View File

@ -0,0 +1,104 @@
#include "crucible/execpipe.h"
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/process.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
namespace crucible {
using namespace std;
void
redirect_stdin(const Fd &child_fd)
{
dup2_or_die(child_fd, STDIN_FILENO);
}
void
redirect_stdin_stdout(const Fd &child_fd)
{
dup2_or_die(child_fd, STDOUT_FILENO);
dup2_or_die(child_fd, STDIN_FILENO);
}
void
redirect_stdin_stdout_stderr(const Fd &child_fd)
{
dup2_or_die(child_fd, STDERR_FILENO);
dup2_or_die(child_fd, STDOUT_FILENO);
dup2_or_die(child_fd, STDIN_FILENO);
}
void
redirect_stdout_stderr(const Fd &child_fd)
{
dup2_or_die(child_fd, STDERR_FILENO);
dup2_or_die(child_fd, STDOUT_FILENO);
}
void
redirect_stdout(const Fd &child_fd)
{
dup2_or_die(child_fd, STDOUT_FILENO);
}
void
redirect_stderr(const Fd &child_fd)
{
dup2_or_die(child_fd, STDERR_FILENO);
}
Fd popen(function<int()> f, function<void(const Fd &child_fd)> import_fd_fn)
{
Fd parent_fd, child_fd;
{
pair<Fd, Fd> fd_pair = socketpair_or_die();
parent_fd = fd_pair.first;
child_fd = fd_pair.second;
}
pid_t fv;
DIE_IF_MINUS_ONE(fv = fork());
if (fv) {
child_fd->close();
return parent_fd;
} else {
int rv = EXIT_FAILURE;
catch_all([&]() {
parent_fd->close();
import_fd_fn(child_fd);
// system("ls -l /proc/$$/fd/ >&2");
rv = f();
});
_exit(rv);
cerr << "PID " << getpid() << " TID " << gettid() << "STILL ALIVE" << endl;
system("ls -l /proc/$$/task/ >&2");
exit(EXIT_FAILURE);
}
}
string
read_all(Fd fd, size_t max_bytes, size_t chunk_bytes)
{
char buf[chunk_bytes];
string str;
size_t rv;
while (1) {
read_partial_or_die(fd, static_cast<void *>(buf), chunk_bytes, rv);
if (rv == 0) {
break;
}
if (max_bytes - str.size() < rv) {
THROW_ERROR(out_of_range, "Output size limit " << max_bytes << " exceeded by appending " << rv << " bytes read to " << str.size() << " already in string");
}
str.append(buf, rv);
}
return str;
}
}

630
lib/extentwalker.cc Normal file
View File

@ -0,0 +1,630 @@
#include "crucible/extentwalker.h"
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/fs.h"
#include "crucible/limits.h"
#include "crucible/string.h"
namespace crucible {
using namespace std;
const off_t ExtentWalker::sc_step_size;
// fm_start, fm_length, fm_flags, m_extents
// fe_logical, fe_physical, fe_length, fe_flags
static const off_t MAX_OFFSET = numeric_limits<off_t>::max();
static const off_t FIEMAP_BLOCK_SIZE = 4096;
static bool __ew_do_log = getenv("EXTENTWALKER_DEBUG");
#define EWLOG(x) do { \
if (__ew_do_log) { \
CHATTER(x); \
} \
} while (0)
ostream &
operator<<(ostream &os, const Extent &e)
{
os << "Extent {"
<< " begin = " << to_hex(e.m_begin)
<< ", end = " << to_hex(e.m_end)
<< ", physical = " << to_hex(e.m_physical)
<< ", flags = ";
if (e.m_flags & Extent::HOLE) {
os << "Extent::HOLE|";
}
if (e.m_flags & Extent::PREALLOC) {
os << "Extent::PREALLOC|";
}
if (e.m_flags & Extent::OBSCURED) {
os << "Extent::OBSCURED|";
}
if (e.m_flags & ~(Extent::HOLE | Extent::PREALLOC | Extent::OBSCURED)) {
os << fiemap_extent_flags_ntoa(e.m_flags & ~(Extent::HOLE | Extent::PREALLOC | Extent::OBSCURED));
}
if (e.m_physical_len) {
os << ", physical_len = " << to_hex(e.m_physical_len);
}
if (e.m_logical_len) {
os << ", logical_len = " << to_hex(e.m_logical_len);
}
if (e.m_offset) {
os << ", offset = " << to_hex(e.m_offset);
}
return os << " }";
}
ostream &
operator<<(ostream &os, const ExtentWalker::Vec &v)
{
os << "ExtentWalker::Vec {";
for (auto e : v) {
os << "\n\t" << e;
}
return os << "}";
}
ostream &
operator<<(ostream &os, const ExtentWalker &ew)
{
return os << "ExtentWalker {"
<< " fd = " << name_fd(ew.m_fd)
<< ", stat.st_size = " << to_hex(ew.m_stat.st_size)
<< ", extents = " << ew.m_extents
<< ", current = [" << ew.m_current - ew.m_extents.begin()
<< "] }";
}
Extent::Extent() :
m_begin(0),
m_end(0),
m_physical(0),
m_flags(0),
m_physical_len(0),
m_logical_len(0),
m_offset(0)
{
}
Extent::operator bool() const
{
THROW_CHECK2(invalid_argument, m_begin, m_end, m_end >= m_begin);
return m_end > m_begin;
}
off_t
Extent::size() const
{
THROW_CHECK2(invalid_argument, m_begin, m_end, m_end >= m_begin);
return m_end - m_begin;
}
bool
Extent::operator==(const Extent &that) const
{
return m_begin == that.m_begin && m_end == that.m_end && m_physical == that.m_physical && m_flags == that.m_flags;
}
ExtentWalker::ExtentWalker(Fd fd) :
m_fd(fd),
m_current(m_extents.begin())
{
}
ExtentWalker::ExtentWalker(Fd fd, off_t initial_pos) :
m_fd(fd),
m_current(m_extents.begin())
{
seek(initial_pos);
}
ExtentWalker::Itr
ExtentWalker::find_in_cache(off_t pos)
{
EWLOG("find_in_cache " << to_hex(pos));
// EOF is an annoying special case
if (pos >= m_stat.st_size) {
if (!m_extents.empty() && m_extents.rbegin()->m_end == m_stat.st_size) {
auto i = m_extents.end();
return --i;
}
}
for (auto vi = m_extents.begin(); vi != m_extents.end(); ++vi) {
if (pos >= vi->m_begin && pos < vi->m_end) {
EWLOG("pos " << to_hex(pos) << " in " << *vi);
if (vi == m_extents.begin() && !(m_extents.begin()->m_begin == 0)) {
// Must have an extent before pos, unless
// there can be no extent before pos because pos == 0
EWLOG("can't match first unless begin is BOF");
break;
}
auto ni = vi;
++ni;
if (ni == m_extents.end() && !(vi->m_end >= m_stat.st_size)) {
// Must have an extent after pos, unless
// there can be no extent after pos because pos >= EOF
EWLOG("can't match last unless end past EOF " << to_hex(m_stat.st_size));
break;
}
// Extent surrounded on either side by other known extents
return vi;
}
}
EWLOG("find_in_cache failed: " << *this);
return m_extents.end();
}
void
ExtentWalker::run_fiemap(off_t pos)
{
ostringstream log;
CHATTER_UNWIND("Log of run_fiemap: " << log.str());
EWLOG("pos = " << to_hex(pos));
THROW_CHECK1(invalid_argument, pos, (pos & (FIEMAP_BLOCK_SIZE - 1)) == 0);
Vec fm;
off_t step_size = pos;
off_t begin = pos - min(pos, sc_step_size);
// This loop should not run forever
int loop_count = 0;
int loop_limit = 99;
while (true) {
if (loop_count == 90) {
EWLOG(log.str());
}
THROW_CHECK1(runtime_error, loop_count, loop_count < loop_limit);
++loop_count;
// Get file size every time in case it changes under us
m_stat.fstat(m_fd);
// Get fiemap begin..EOF
fm = get_extent_map(begin);
EWLOG("fiemap result loop count #" << loop_count << ":" << fm);
// This algorithm seeks at least three extents: one before,
// one after, and one containing pos. Files which contain
// two or fewer extents will cause an obvious problem with that,
// so handle those cases separately.
// FIEMAP lies, and we catch it in a lie about the size of the
// second extent. To work around this, try getting more than 3.
// 0..2(ish) extents
if (fm.size() < sc_extent_fetch_min) {
// If we are not at beginning of file, move backward
if (begin > 0) {
step_size /= 2;
auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size());
if (begin == next_begin) {
EWLOG("step backward stopped");
break;
}
begin = next_begin;
continue;
}
// We are at beginning of file and have too few extents.
// Zero extents? Entire file is a hole.
if (fm.empty()) {
EWLOG("zero extents");
break;
}
// We know we have the beginning of the file and at least
// one extent. If the last extent is EOF then we have the
// whole file in the buffer. If the last extent is NOT
// EOF then fiemap did something we didn't expect.
THROW_CHECK1(runtime_error, fm.rbegin()->flags(), fm.rbegin()->flags() & FIEMAP_EXTENT_LAST);
break;
}
// We have at least three extents, so there is now a first and last.
// We want pos to be between first and last. There doesn't have
// to be an extent between these (it could be a hole).
auto &first_extent = fm.at(sc_extent_fetch_min - 2);
auto &last_extent = *fm.rbegin();
EWLOG("first_extent = " << first_extent);
EWLOG("last_extent = " << last_extent);
// First extent must end on or before pos
if (first_extent.end() > pos) {
// Can we move backward?
if (begin > 0) {
step_size /= 2;
auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size());
if (begin == next_begin) {
EWLOG("step backward stopped");
break;
}
begin = next_begin;
continue;
}
// We are as far back as we can go, so there must be no
// extent before pos (i.e. file starts with a hole).
EWLOG("no extent before pos");
break;
}
// First extent ends on or before pos.
// If last extent is EOF then we have the entire file in the buffer.
// pos could be in last extent, so skip the later checks that
// insist pos be located prior to the last extent.
if (last_extent.flags() & FIEMAP_EXTENT_LAST) {
break;
}
// Don't have EOF, must have an extent after pos.
if (last_extent.begin() <= pos) {
step_size /= 2;
auto new_begin = (begin + step_size) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step forward " << to_hex(begin) << " -> " << to_hex(new_begin));
if (begin == new_begin) {
EWLOG("step forward stopped");
break;
}
begin = new_begin;
continue;
}
// Last extent begins after pos, first extent ends on or before pos.
// All other cases should have been handled before here.
THROW_CHECK2(runtime_error, pos, first_extent, first_extent.end() <= pos);
THROW_CHECK2(runtime_error, pos, last_extent, last_extent.begin() > pos);
// We should probably stop now
break;
}
// Fill in holes so there are Extent records over entire range
auto fmi = fm.begin();
off_t ipos = begin;
Vec new_vec;
// If we mapped the entire file and there are no extents,
// the entire file is a hole.
bool last_extent_is_last = (begin == 0 && fm.empty());
while (fmi != fm.end()) {
Extent new_extent(*fmi);
THROW_CHECK2(runtime_error, ipos, new_extent.m_begin, ipos <= new_extent.m_begin);
if (new_extent.m_begin > ipos) {
Extent hole_extent;
hole_extent.m_begin = ipos;
hole_extent.m_end = fmi->begin();
hole_extent.m_physical = 0;
hole_extent.m_flags = Extent::HOLE;
new_vec.push_back(hole_extent);
ipos += hole_extent.size();
}
THROW_CHECK2(runtime_error, ipos, new_extent.m_begin, ipos == new_extent.m_begin);
new_vec.push_back(new_extent);
ipos += new_extent.size();
last_extent_is_last = fmi->flags() & FIEMAP_EXTENT_LAST;
++fmi;
}
// If we have run out of extents before EOF, insert a hole at the end
if (last_extent_is_last && ipos < m_stat.st_size) {
Extent hole_extent;
hole_extent.m_begin = ipos;
hole_extent.m_end = m_stat.st_size;
hole_extent.m_physical = 0;
hole_extent.m_flags = Extent::HOLE;
if (!new_vec.empty() && new_vec.rbegin()->m_flags & FIEMAP_EXTENT_LAST) {
new_vec.rbegin()->m_flags &= ~(FIEMAP_EXTENT_LAST);
hole_extent.m_flags |= FIEMAP_EXTENT_LAST;
}
new_vec.push_back(hole_extent);
ipos += new_vec.size();
}
THROW_CHECK1(runtime_error, new_vec.size(), !new_vec.empty());
// Allow last extent to extend beyond desired range (e.g. at EOF)
THROW_CHECK2(runtime_error, ipos, new_vec.rbegin()->m_end, ipos <= new_vec.rbegin()->m_end);
// If we have the last extent in the file, truncate it to the file size.
if (ipos >= m_stat.st_size) {
THROW_CHECK2(runtime_error, new_vec.rbegin()->m_begin, m_stat.st_size, m_stat.st_size > new_vec.rbegin()->m_begin);
THROW_CHECK2(runtime_error, new_vec.rbegin()->m_end, m_stat.st_size, m_stat.st_size <= new_vec.rbegin()->m_end);
new_vec.rbegin()->m_end = m_stat.st_size;
}
// Verify contiguous, ascending order, at least one Extent
THROW_CHECK1(runtime_error, new_vec, !new_vec.empty());
ipos = new_vec.begin()->m_begin;
bool last_flag_last = false;
for (auto e : new_vec) {
THROW_CHECK1(runtime_error, new_vec, e.m_begin == ipos);
THROW_CHECK1(runtime_error, e, e.size() > 0);
THROW_CHECK1(runtime_error, new_vec, !last_flag_last);
ipos += e.size();
last_flag_last = e.m_flags & FIEMAP_EXTENT_LAST;
}
THROW_CHECK1(runtime_error, new_vec, !last_extent_is_last || new_vec.rbegin()->m_end == ipos);
m_extents = new_vec;
m_current = m_extents.begin();
}
void
ExtentWalker::reset()
{
m_extents.clear();
m_current = m_extents.begin();
}
void
ExtentWalker::seek(off_t pos)
{
CHATTER_UNWIND("seek " << to_hex(pos));
THROW_CHECK1(out_of_range, pos, pos >= 0);
Itr rv = find_in_cache(pos);
if (rv != m_extents.end()) {
m_current = rv;
return;
}
run_fiemap(pos);
m_current = find_in_cache(pos);
}
Extent
ExtentWalker::current()
{
THROW_CHECK2(invalid_argument, *this, m_extents.size(), m_current != m_extents.end());
CHATTER_UNWIND("current " << *m_current);
return *m_current;
}
bool
ExtentWalker::next()
{
CHATTER_UNWIND("next");
THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end());
if (current().m_end >= m_stat.st_size) {
CHATTER_UNWIND("next EOF");
return false;
}
auto next_pos = current().m_end;
if (next_pos >= m_stat.st_size) {
CHATTER_UNWIND("next next_pos = " << next_pos << " m_stat.st_size = " << m_stat.st_size);
return false;
}
seek(next_pos);
THROW_CHECK1(runtime_error, (m_current != m_extents.end()), m_current != m_extents.end());
// FIEMAP is full of lies, so this check keeps failing
// THROW_CHECK2(runtime_error, current().m_begin, next_pos, current().m_begin == next_pos);
// Just ensure that pos is in the next extent somewhere.
THROW_CHECK2(runtime_error, current(), next_pos, current().m_begin <= next_pos);
THROW_CHECK2(runtime_error, current(), next_pos, current().m_end > next_pos);
return true;
}
bool
ExtentWalker::prev()
{
CHATTER_UNWIND("prev");
THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end());
auto prev_iter = m_current;
if (prev_iter->m_begin == 0) {
CHATTER_UNWIND("prev BOF");
return false;
}
THROW_CHECK1(invalid_argument, (prev_iter != m_extents.begin()), prev_iter != m_extents.begin());
--prev_iter;
CHATTER_UNWIND("prev seeking to " << *prev_iter << "->m_begin");
auto prev_end = current().m_begin;
seek(prev_iter->m_begin);
THROW_CHECK1(runtime_error, (m_current != m_extents.end()), m_current != m_extents.end());
THROW_CHECK2(runtime_error, current().m_end, prev_end, current().m_end == prev_end);
return true;
}
ExtentWalker::~ExtentWalker()
{
}
BtrfsExtentWalker::BtrfsExtentWalker(Fd fd) :
ExtentWalker(fd),
m_tree_id(0)
{
}
BtrfsExtentWalker::BtrfsExtentWalker(Fd fd, off_t initial_pos) :
ExtentWalker(fd),
m_tree_id(0)
{
seek(initial_pos);
}
void
BtrfsExtentWalker::set_root_fd(Fd root_fd)
{
m_root_fd = root_fd;
}
BtrfsExtentWalker::BtrfsExtentWalker(Fd fd, off_t initial_pos, Fd root_fd) :
ExtentWalker(fd),
m_tree_id(0)
{
set_root_fd(root_fd);
seek(initial_pos);
}
BtrfsExtentWalker::Vec
BtrfsExtentWalker::get_extent_map(off_t pos)
{
BtrfsIoctlSearchKey sk;
if (!m_root_fd) {
m_root_fd = m_fd;
}
if (!m_tree_id) {
m_tree_id = btrfs_get_root_id(m_fd);
}
sk.tree_id = m_tree_id;
sk.min_objectid = m_stat.st_ino;
sk.max_objectid = numeric_limits<uint64_t>::max();
sk.min_offset = ranged_cast<uint64_t>(pos);
sk.max_offset = numeric_limits<uint64_t>::max();
sk.min_transid = 0;
sk.max_transid = numeric_limits<uint64_t>::max();
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
sk.nr_items = sc_extent_fetch_max;
CHATTER_UNWIND("sk " << sk << " root_fd " << name_fd(m_root_fd));
sk.do_ioctl(m_root_fd);
Vec rv;
bool past_eof = false;
for (auto i : sk.m_result) {
// If we're seeing extents from the next file then we're past EOF on this file
if (i.objectid > m_stat.st_ino) {
past_eof = true;
break;
}
// Ignore things that aren't EXTENT_DATA_KEY
if (i.type != BTRFS_EXTENT_DATA_KEY) {
continue;
}
// Hmmmkay we shouldn't be seeing these
if (i.objectid < m_stat.st_ino) {
THROW_ERROR(out_of_range, "objectid " << i.objectid << " < m_stat.st_ino " << m_stat.st_ino);
continue;
}
Extent e;
e.m_begin = i.offset;
auto compressed = call_btrfs_get(btrfs_stack_file_extent_compression, i.m_data);
// FIEMAP told us about compressed extents and we can too
if (compressed) {
e.m_flags |= FIEMAP_EXTENT_ENCODED;
}
auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data);
off_t len = -1;
switch (type) {
default:
cerr << "Unhandled file extent type " << type << " in root " << m_tree_id << " ino " << m_stat.st_ino << endl;
break;
case BTRFS_FILE_EXTENT_INLINE:
len = ranged_cast<off_t>(call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data));
e.m_flags |= FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
// Inline extents are never obscured, so don't bother filling in m_physical_len, etc.
break;
case BTRFS_FILE_EXTENT_PREALLOC:
e.m_flags |= Extent::PREALLOC;
case BTRFS_FILE_EXTENT_REG: {
e.m_physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data);
// This is the length of the full extent (decompressed)
off_t ram = ranged_cast<off_t>(call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data));
// This is the length of the part of the extent appearing in the file (decompressed)
len = ranged_cast<off_t>(call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data));
// This is the offset from start of on-disk extent to the part we see in the file (decompressed)
// May be negative due to the kind of bug we're stuck with forever, so no cast range check
off_t offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data);
// If there is a physical address there must be size too
if (e.m_physical) {
THROW_CHECK1(runtime_error, ram, ram > 0);
THROW_CHECK1(runtime_error, len, len > 0);
THROW_CHECK2(runtime_error, offset, ram, offset < ram);
} else {
// There are two kinds of hole in btrfs. This is the other one.
e.m_flags |= Extent::HOLE;
}
// Partially obscured extent
// FIXME: sometimes this happens:
// i.type == BTRFS_EXTENT_DATA_KEY
// type = 0x1
// compressed = 0x0
// REG start 0x0 offset 0x0 num 0x20000 ram 0x21000 gen 1101121
// btrfs_file_extent_item {
// generation = 1101121
// ram_bytes = 135168
// compression = 0x0
// encryption = 0x0
// other_encoding = 0x0
// type = 0x1
// disk_bytenr = 0x0
// disk_num_bytes = 0x0
// offset = 0x0
// num_bytes = 0x20000
// }
if (ram != len || offset != 0) {
e.m_flags |= Extent::OBSCURED;
// cerr << e << "\nram = " << ram << ", len = " << len << ", offset = " << offset << endl;
}
e.m_physical_len = ram;
e.m_logical_len = len;
e.m_offset = offset;
// To maintain compatibility with FIEMAP we ignore the offset for compressed extents.
// At some point we'll grow out of this.
if (!compressed) {
e.m_physical += offset;
}
break;
}
}
if (len > 0) {
e.m_end = e.m_begin + len;
if (e.m_end >= m_stat.st_size) {
e.m_flags |= FIEMAP_EXTENT_LAST;
}
// FIXME: no FIEMAP_EXTENT_SHARED
// WONTFIX: non-trivial to replicate LOGIAL_INO
rv.push_back(e);
}
}
// Plug a hole at EOF
if (past_eof && !rv.empty()) {
rv.rbegin()->m_flags |= FIEMAP_EXTENT_LAST;
}
return rv;
}
ExtentWalker::Vec
ExtentWalker::get_extent_map(off_t pos)
{
Fiemap fm;
fm.fm_start = ranged_cast<uint64_t>(pos);
fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos);
fm.m_max_count = fm.m_min_count = sc_extent_fetch_max;
fm.do_ioctl(m_fd);
Vec rv;
for (auto i : fm.m_extents) {
Extent e;
e.m_begin = ranged_cast<off_t>(i.fe_logical);
e.m_end = ranged_cast<off_t>(i.fe_logical + i.fe_length);
e.m_physical = i.fe_physical;
e.m_flags = i.fe_flags;
rv.push_back(e);
}
return rv;
}
};

575
lib/fd.cc Normal file
View File

@ -0,0 +1,575 @@
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/fd.h"
#include "crucible/ntoa.h"
#include "crucible/string.h"
#include <cstdio>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <vector>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
namespace crucible {
using namespace std;
static const struct bits_ntoa_table o_flags_table[] = {
NTOA_TABLE_ENTRY_BITS(O_APPEND),
NTOA_TABLE_ENTRY_BITS(O_ASYNC),
NTOA_TABLE_ENTRY_BITS(O_CLOEXEC),
NTOA_TABLE_ENTRY_BITS(O_CREAT),
NTOA_TABLE_ENTRY_BITS(O_DIRECT),
NTOA_TABLE_ENTRY_BITS(O_DIRECTORY),
NTOA_TABLE_ENTRY_BITS(O_EXCL),
NTOA_TABLE_ENTRY_BITS(O_LARGEFILE),
NTOA_TABLE_ENTRY_BITS(O_NOATIME),
NTOA_TABLE_ENTRY_BITS(O_NOCTTY),
NTOA_TABLE_ENTRY_BITS(O_NOFOLLOW),
NTOA_TABLE_ENTRY_BITS(O_NONBLOCK),
NTOA_TABLE_ENTRY_BITS(O_NDELAY), // NONBLOCK will prevent this
NTOA_TABLE_ENTRY_BITS(O_SYNC),
NTOA_TABLE_ENTRY_BITS(O_TRUNC),
// These aren't really bit values
NTOA_TABLE_ENTRY_BITS(O_RDWR),
NTOA_TABLE_ENTRY_BITS(O_WRONLY),
NTOA_TABLE_ENTRY_BITS(O_RDONLY),
NTOA_TABLE_ENTRY_END(),
};
static const struct bits_ntoa_table o_mode_table[] = {
NTOA_TABLE_ENTRY_BITS(S_IFMT),
NTOA_TABLE_ENTRY_BITS(S_IFSOCK),
NTOA_TABLE_ENTRY_BITS(S_IFLNK),
NTOA_TABLE_ENTRY_BITS(S_IFREG),
NTOA_TABLE_ENTRY_BITS(S_IFBLK),
NTOA_TABLE_ENTRY_BITS(S_IFDIR),
NTOA_TABLE_ENTRY_BITS(S_IFCHR),
NTOA_TABLE_ENTRY_BITS(S_IFIFO),
NTOA_TABLE_ENTRY_BITS(S_ISUID),
NTOA_TABLE_ENTRY_BITS(S_ISGID),
NTOA_TABLE_ENTRY_BITS(S_ISVTX),
NTOA_TABLE_ENTRY_BITS(S_IRWXU),
NTOA_TABLE_ENTRY_BITS(S_IRUSR),
NTOA_TABLE_ENTRY_BITS(S_IWUSR),
NTOA_TABLE_ENTRY_BITS(S_IXUSR),
NTOA_TABLE_ENTRY_BITS(S_IRWXG),
NTOA_TABLE_ENTRY_BITS(S_IRGRP),
NTOA_TABLE_ENTRY_BITS(S_IWGRP),
NTOA_TABLE_ENTRY_BITS(S_IXGRP),
NTOA_TABLE_ENTRY_BITS(S_IRWXO),
NTOA_TABLE_ENTRY_BITS(S_IROTH),
NTOA_TABLE_ENTRY_BITS(S_IWOTH),
NTOA_TABLE_ENTRY_BITS(S_IXOTH),
NTOA_TABLE_ENTRY_END(),
};
string o_flags_ntoa(int flags)
{
return bits_ntoa(flags, o_flags_table);
}
string o_mode_ntoa(mode_t mode)
{
return bits_ntoa(mode, o_mode_table);
}
void
IOHandle::close()
{
CHATTER_TRACE("close fd " << m_fd << " in " << this);
if (m_fd >= 0) {
// Assume that ::close always destroys the FD, even if errors are encountered;
int closing_fd = m_fd;
m_fd = -1;
CHATTER_UNWIND("closing fd " << closing_fd << " in " << this);
DIE_IF_MINUS_ONE(::close(closing_fd));
}
}
IOHandle::~IOHandle()
{
CHATTER_TRACE("destroy fd " << m_fd << " in " << this);
if (m_fd >= 0) {
catch_all([&](){
close();
});
}
}
IOHandle::IOHandle() :
m_fd(-1)
{
CHATTER_TRACE("open fd " << m_fd << " in " << this);
}
IOHandle::IOHandle(int fd) :
m_fd(fd)
{
CHATTER_TRACE("open fd " << m_fd << " in " << this);
}
int
IOHandle::release_fd()
{
CHATTER_TRACE("release fd " << m_fd << " in " << this);
int rv = m_fd;
m_fd = -1;
return rv;
}
// XXX: necessary? useful?
template <>
struct ChatterTraits<Fd> {
Chatter &operator()(Chatter &c, const Fd &fd) const
{
c << "Fd {this=" << &fd << " fd=" << static_cast<int>(fd) << "}";
return c;
}
};
int
open_or_die(const string &file, int flags, mode_t mode)
{
int fd(::open(file.c_str(), flags, mode));
if (fd < 0) {
THROW_ERRNO("open: name '" << file << "' mode " << oct << setfill('0') << setw(3) << mode << " flags " << o_flags_ntoa(flags));
}
return fd;
}
int
openat_or_die(int dir_fd, const string &file, int flags, mode_t mode)
{
int fd(::openat(dir_fd, file.c_str(), flags, mode));
if (fd < 0) {
THROW_ERRNO("openat: dir_fd " << dir_fd << " " << name_fd(dir_fd) << " name '" << file << "' mode " << oct << setfill('0') << setw(3) << mode << " flags " << o_flags_ntoa(flags));
}
return fd;
}
static const struct bits_ntoa_table mmap_prot_table[] = {
NTOA_TABLE_ENTRY_BITS(PROT_EXEC),
NTOA_TABLE_ENTRY_BITS(PROT_READ),
NTOA_TABLE_ENTRY_BITS(PROT_WRITE),
NTOA_TABLE_ENTRY_BITS(PROT_NONE),
NTOA_TABLE_ENTRY_END(),
};
string mmap_prot_ntoa(int prot)
{
return bits_ntoa(prot, mmap_prot_table);
}
static const struct bits_ntoa_table mmap_flags_table[] = {
NTOA_TABLE_ENTRY_BITS(MAP_SHARED),
NTOA_TABLE_ENTRY_BITS(MAP_PRIVATE),
NTOA_TABLE_ENTRY_BITS(MAP_32BIT),
NTOA_TABLE_ENTRY_BITS(MAP_ANONYMOUS),
NTOA_TABLE_ENTRY_BITS(MAP_DENYWRITE),
NTOA_TABLE_ENTRY_BITS(MAP_EXECUTABLE),
#if MAP_FILE
NTOA_TABLE_ENTRY_BITS(MAP_FILE),
#endif
NTOA_TABLE_ENTRY_BITS(MAP_FIXED),
NTOA_TABLE_ENTRY_BITS(MAP_GROWSDOWN),
NTOA_TABLE_ENTRY_BITS(MAP_HUGETLB),
NTOA_TABLE_ENTRY_BITS(MAP_LOCKED),
NTOA_TABLE_ENTRY_BITS(MAP_NONBLOCK),
NTOA_TABLE_ENTRY_BITS(MAP_NORESERVE),
NTOA_TABLE_ENTRY_BITS(MAP_POPULATE),
NTOA_TABLE_ENTRY_BITS(MAP_STACK),
#ifdef MAP_UNINITIALIZED
NTOA_TABLE_ENTRY_BITS(MAP_UNINITIALIZED),
#endif
NTOA_TABLE_ENTRY_END(),
};
string mmap_flags_ntoa(int flags)
{
return bits_ntoa(flags, mmap_flags_table);
}
void *
mmap_or_die(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
void *rv = mmap(addr, length, prot, flags, fd, offset);
if (rv == MAP_FAILED) {
THROW_ERRNO("mmap: addr " << addr << " length " << length
<< " prot " << mmap_prot_ntoa(prot)
<< " flags " << mmap_flags_ntoa(flags)
<< " fd " << fd << " offset " << offset);
}
return rv;
}
void
rename_or_die(const string &from, const string &to)
{
if (::rename(from.c_str(), to.c_str())) {
THROW_ERRNO("rename: " << from << " -> " << to);
}
}
void
renameat_or_die(int fromfd, const string &frompath, int tofd, const string &topath)
{
if (::renameat(fromfd, frompath.c_str(), tofd, topath.c_str())) {
THROW_ERRNO("renameat: " << name_fd(fromfd) << "/" << frompath
<< " -> " << name_fd(tofd) << "/" << topath);
}
}
string
socket_domain_ntoa(int domain)
{
static const bits_ntoa_table table[] = {
NTOA_TABLE_ENTRY_ENUM(AF_UNIX),
NTOA_TABLE_ENTRY_ENUM(AF_LOCAL), // probably the same as AF_UNIX
NTOA_TABLE_ENTRY_ENUM(AF_INET),
NTOA_TABLE_ENTRY_ENUM(AF_INET6),
NTOA_TABLE_ENTRY_ENUM(AF_PACKET),
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(domain, table);
}
string
socket_type_ntoa(int type)
{
static const bits_ntoa_table table[] = {
NTOA_TABLE_ENTRY_BITS(SOCK_CLOEXEC),
NTOA_TABLE_ENTRY_BITS(SOCK_NONBLOCK),
NTOA_TABLE_ENTRY_ENUM(SOCK_STREAM),
NTOA_TABLE_ENTRY_ENUM(SOCK_DGRAM),
NTOA_TABLE_ENTRY_ENUM(SOCK_RAW),
NTOA_TABLE_ENTRY_ENUM(SOCK_PACKET),
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(type, table);
}
string
socket_protocol_ntoa(int protocol)
{
static const bits_ntoa_table table[] = {
// an empty table just prints the number
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(protocol, table);
}
Fd
socket_or_die(int domain, int type, int protocol)
{
Fd fd(::socket(domain, type, protocol));
if (fd < 0) {
THROW_ERRNO("socket: domain " << socket_domain_ntoa(domain)
<< " type " << socket_type_ntoa(type)
<< " protocol " << socket_protocol_ntoa(protocol));
}
return fd;
}
void
write_or_die_partial(int fd, const void *buf, size_t size_wanted, size_t &size_written)
{
if (size_wanted > (static_cast<size_t>(~0) >> 1)) {
THROW_ERROR(invalid_argument, "cannot read " << size_wanted << ", more than signed size allows");
}
if (fd < 0) {
THROW_ERROR(invalid_argument, "write: trying to write on a closed file descriptor");
}
int rv = write(fd, buf, size_wanted);
if (rv < 0) {
THROW_ERRNO("write: " << size_wanted << " bytes returned " << rv);
}
size_written = rv;
}
void
write_or_die(int fd, const void *buf, size_t size)
{
size_t size_written = 0;
write_or_die_partial(fd, buf, size, size_written);
if (size_written != size) {
THROW_ERROR(runtime_error, "write: only " << size_written << " of " << size << " bytes written");
}
}
void
pwrite_or_die(int fd, const void *buf, size_t size, off_t offset)
{
if (size > (static_cast<size_t>(~0) >> 1)) {
THROW_ERROR(invalid_argument, "pwrite: cannot write " << size << ", more than signed size allows");
}
if (fd < 0) {
THROW_ERROR(invalid_argument, "pwrite: trying to write on a closed file descriptor");
}
int rv = ::pwrite(fd, buf, size, offset);
if (rv != static_cast<int>(size)) {
THROW_ERROR(runtime_error, "pwrite: only " << rv << " of " << size << " bytes written at offset " << offset);
}
}
template<>
void
write_or_die<string>(int fd, const string &text)
{
return write_or_die(fd, text.data(), text.size());
}
void
read_partial_or_die(int fd, void *buf, size_t size, size_t &size_read)
{
if (size > (static_cast<size_t>(~0) >> 1)) {
THROW_ERROR(invalid_argument, "cannot read " << size << ", more than signed size allows");
}
if (fd < 0) {
THROW_ERROR(runtime_error, "read: trying to read on a closed file descriptor");
}
size_read = 0;
while (size) {
int rv = read(fd, buf, size);
if (rv < 0) {
if (errno == EINTR) {
CHATTER_TRACE("resuming after EINTR");
continue;
}
THROW_ERRNO("read: " << size << " bytes");
}
if (rv > static_cast<int>(size)) {
THROW_ERROR(runtime_error, "read: somehow read more bytes (" << rv << ") than requested (" << size << ")");
}
if (rv == 0) break;
size_read += rv;
size -= rv;
// CHATTER("read " << rv << " bytes from fd " << fd);
}
}
string
read_string(int fd, size_t size)
{
string rv(size, '\0');
size_t size_read = 0;
void *rvp = const_cast<char *>(rv.data());
read_partial_or_die(fd, rvp, size, size_read);
rv.resize(size_read);
return rv;
}
void
read_or_die(int fd, void *buf, size_t size)
{
size_t size_read = 0;
read_partial_or_die(fd, buf, size, size_read);
if (size_read != size) {
THROW_ERROR(runtime_error, "read: " << size_read << " of " << size << " bytes");
}
}
void
pread_or_die(int fd, void *buf, size_t size, off_t offset)
{
if (size > (static_cast<size_t>(~0) >> 1)) {
THROW_ERROR(invalid_argument, "cannot read " << size << ", more than signed size allows");
}
if (fd < 0) {
throw runtime_error("read: trying to read on a closed file descriptor");
} else {
while (size) {
int rv = pread(fd, buf, size, offset);
if (rv < 0) {
if (errno == EINTR) {
CHATTER(__func__ << "resuming after EINTR");
continue;
}
THROW_ERRNO("pread: " << size << " bytes");
}
if (rv != static_cast<int>(size)) {
THROW_ERROR(runtime_error, "pread: " << size << " bytes at offset " << offset << " returned " << rv);
}
break;
}
}
}
template<>
void
pread_or_die<string>(int fd, string &text, off_t offset)
{
return pread_or_die(fd, const_cast<char *>(text.data()), text.size(), offset);
}
template<>
void
pread_or_die<vector<char>>(int fd, vector<char> &text, off_t offset)
{
return pread_or_die(fd, text.data(), text.size(), offset);
}
template<>
void
pread_or_die<vector<uint8_t>>(int fd, vector<uint8_t> &text, off_t offset)
{
return pread_or_die(fd, text.data(), text.size(), offset);
}
Stat::Stat()
{
memset_zero<stat>(this);
}
Stat &
Stat::lstat(const string &filename)
{
CHATTER_UNWIND("lstat " << filename);
DIE_IF_MINUS_ONE(::lstat(filename.c_str(), this));
return *this;
}
Stat &
Stat::fstat(int fd)
{
CHATTER_UNWIND("fstat " << fd);
DIE_IF_MINUS_ONE(::fstat(fd, this));
return *this;
}
Stat::Stat(int fd)
{
memset_zero<stat>(this);
fstat(fd);
}
Stat::Stat(const string &filename)
{
memset_zero<stat>(this);
lstat(filename);
}
string
readlink_or_die(const string &path)
{
// Start with a reasonable guess since it will usually work
off_t size = 4096;
while (size < 1048576) {
char buf[size + 1];
int rv;
DIE_IF_MINUS_ONE(rv = readlink(path.c_str(), buf, size + 1));
// No negative values allowed except -1
THROW_CHECK1(runtime_error, rv, rv >= 0);
if (rv <= size) {
buf[rv] = 0;
return buf;
}
// cerr << "Retrying readlink(" << path << ", buf, " << size + 1 << ")" << endl;
// This is from the Linux readlink(2) man page (release 3.44).
// It only works when the filesystem reports st_size accurately for symlinks,
// and at least one doesn't, so we can't rely on it at all.
// size = lstat_or_die(path).st_size;
size *= 2;
}
THROW_ERROR(runtime_error, "readlink: maximum buffer size exceeded");
}
// Turn a FD into a human-recognizable filename OR an error message.
string
name_fd(int fd)
{
try {
ostringstream oss;
oss << "/proc/self/fd/" << fd;
return readlink_or_die(oss.str());
} catch (exception &e) {
return string(e.what());
}
}
bool
assert_no_leaked_fds()
{
struct rlimit rlim;
int rv = getrlimit(RLIMIT_NOFILE, &rlim);
if (rv) {
perror("getrlimit(RLIMIT_NOFILE)");
// Well, that sucked. Guess.
rlim.rlim_cur = 1024;
}
CHATTER("Checking for leaked FDs in range 3.." << rlim.rlim_cur);
int leaked_fds = 0;
for (unsigned i = 3; i < rlim.rlim_cur; ++i) {
struct stat buf;
if (! fstat(i, &buf)) {
CHATTER("WARNING: fd " << i << " open at exit");
++leaked_fds;
}
}
CHATTER(leaked_fds << " leaked FD(s) found");
return leaked_fds == 0;
}
pair<Fd, Fd>
socketpair_or_die(int domain, int type, int protocol)
{
pair<Fd, Fd> rv;
int sv[2];
DIE_IF_MINUS_ONE(socketpair(domain, type, protocol, sv));
rv.first = sv[0];
rv.second = sv[1];
return rv;
}
void
dup2_or_die(int fd_in, int fd_out)
{
DIE_IF_MINUS_ONE(dup2(fd_in, fd_out));
}
string
st_mode_ntoa(mode_t mode)
{
static const bits_ntoa_table table[] = {
NTOA_TABLE_ENTRY_BITS(S_IFMT),
NTOA_TABLE_ENTRY_BITS(S_IFSOCK),
NTOA_TABLE_ENTRY_BITS(S_IFLNK),
NTOA_TABLE_ENTRY_BITS(S_IFMT),
NTOA_TABLE_ENTRY_BITS(S_IFSOCK),
NTOA_TABLE_ENTRY_BITS(S_IFLNK),
NTOA_TABLE_ENTRY_BITS(S_IFREG),
NTOA_TABLE_ENTRY_BITS(S_IFBLK),
NTOA_TABLE_ENTRY_BITS(S_IFDIR),
NTOA_TABLE_ENTRY_BITS(S_IFCHR),
NTOA_TABLE_ENTRY_BITS(S_IFIFO),
NTOA_TABLE_ENTRY_BITS(S_ISUID),
NTOA_TABLE_ENTRY_BITS(S_ISGID),
NTOA_TABLE_ENTRY_BITS(S_ISVTX),
NTOA_TABLE_ENTRY_BITS(S_IRWXU),
NTOA_TABLE_ENTRY_BITS(S_IRUSR),
NTOA_TABLE_ENTRY_BITS(S_IWUSR),
NTOA_TABLE_ENTRY_BITS(S_IXUSR),
NTOA_TABLE_ENTRY_BITS(S_IRWXG),
NTOA_TABLE_ENTRY_BITS(S_IRGRP),
NTOA_TABLE_ENTRY_BITS(S_IWGRP),
NTOA_TABLE_ENTRY_BITS(S_IXGRP),
NTOA_TABLE_ENTRY_BITS(S_IRWXO),
NTOA_TABLE_ENTRY_BITS(S_IROTH),
NTOA_TABLE_ENTRY_BITS(S_IWOTH),
NTOA_TABLE_ENTRY_BITS(S_IXOTH),
NTOA_TABLE_ENTRY_END()
};
return bits_ntoa(mode, table);
}
};

1050
lib/fs.cc Normal file

File diff suppressed because it is too large Load Diff

96
lib/interp.cc Normal file
View File

@ -0,0 +1,96 @@
#include "crucible/interp.h"
#include "crucible/chatter.h"
namespace crucible {
using namespace std;
int
Proc::exec(const ArgList &args)
{
return m_cmd(args);
}
Proc::Proc(const function<int(const ArgList &)> &f) :
m_cmd(f)
{
}
Command::~Command()
{
}
ArgList::ArgList(const char **argv)
{
while (argv && *argv) {
push_back(*argv++);
}
}
ArgList::ArgList(const vector<string> &&that) :
vector<string>(that)
{
}
Interp::~Interp()
{
}
Interp::Interp(const map<string, shared_ptr<Command> > &cmdlist) :
m_commands(cmdlist)
{
}
void
Interp::add_command(const string &name, const shared_ptr<Command> &command)
{
m_commands[name] = command;
}
int
Interp::exec(const ArgList &args)
{
auto next_arg = args.begin();
++next_arg;
return m_commands.at(args[0])->exec(vector<string>(next_arg, args.end()));
}
ArgParser::~ArgParser()
{
}
ArgParser::ArgParser()
{
}
void
ArgParser::add_opt(string opt, ArgActor actor)
{
m_string_opts[opt] = actor;
}
void
ArgParser::parse_backend(void *t, const ArgList &args)
{
bool quote_args = false;
for (string arg : args) {
if (quote_args) {
cerr << "arg: '" << arg << "'" << endl;
continue;
}
if (arg == "--") {
quote_args = true;
continue;
}
if (arg.compare(0, 2, "--") == 0) {
auto found = m_string_opts.find(arg.substr(2, string::npos));
if (found != m_string_opts.end()) {
found->second.predicate(t, "foo");
}
(void)t;
}
}
}
};

40
lib/ntoa.cc Normal file
View File

@ -0,0 +1,40 @@
#include "crucible/ntoa.h"
#include <cassert>
#include <sstream>
#include <string>
namespace crucible {
using namespace std;
string bits_ntoa(unsigned long n, const bits_ntoa_table *table)
{
string out;
while (n && table->a) {
// No bits in n outside of mask
assert( ((~table->mask) & table->n) == 0);
if ( (n & table->mask) == table->n) {
if (!out.empty()) {
out += "|";
}
out += table->a;
n &= ~(table->mask);
}
++table;
}
if (n) {
ostringstream oss;
oss << "0x" << hex << n;
if (!out.empty()) {
out += "|";
}
out += oss.str();
}
if (out.empty()) {
out = "0";
}
return out;
}
};

26
lib/path.cc Normal file
View File

@ -0,0 +1,26 @@
#include "crucible/path.h"
#include "crucible/error.h"
namespace crucible {
using namespace std;
string
basename(string s)
{
size_t left = s.find_last_of("/");
size_t right = s.find_last_not_of("/");
if (left == string::npos) {
return s;
}
return s.substr(left + 1, right);
}
string
join(string dir, string base)
{
// TODO: a lot of sanity checking, maybe canonicalization
return dir + "/" + base;
}
};

121
lib/process.cc Normal file
View File

@ -0,0 +1,121 @@
#include "crucible/process.h"
#include "crucible/chatter.h"
#include "crucible/error.h"
#include <utility>
// for gettid()
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <unistd.h>
#include <sys/syscall.h>
namespace crucible {
using namespace std;
bool
Process::joinable()
{
return !!m_pid;
}
Process::~Process()
{
if (joinable()) {
// because it's just not the same without the word "zombie"...
CHATTER("ZOMBIE WARNING: joinable Process pid " << m_pid << " abandoned");
}
}
Process::Process() :
m_pid(0)
{
}
Process::Process(Process &&move_from) :
m_pid(0)
{
swap(m_pid, move_from.m_pid);
}
void
Process::do_fork(function<int()> child_func)
{
int rv = fork();
if (rv < 0) {
THROW_ERRNO("fork failed");
}
m_pid = rv;
if (rv == 0) {
// child
catch_all([&]() {
int rv = child_func();
exit(rv);
});
terminate();
}
}
Process::status_type
Process::join()
{
if (m_pid == 0) {
THROW_ERROR(invalid_argument, "Process not created");
}
int status = 0;
pid_t rv = waitpid(m_pid, &status, 0);
if (rv == -1) {
THROW_ERRNO("waitpid failed, pid = " << m_pid);
}
if (rv != m_pid) {
THROW_ERROR(runtime_error, "waitpid failed, wanted pid = " << m_pid << ", got rv = " << rv << ", status = " << status);
}
m_pid = 0;
return status;
}
void
Process::detach()
{
m_pid = 0;
}
Process::native_handle_type
Process::native_handle()
{
return m_pid;
}
Process::id
Process::get_id()
{
return m_pid;
}
void
Process::kill(int sig)
{
if (!m_pid) {
THROW_ERROR(invalid_argument, "Process not created");
}
int rv = ::kill(m_pid, sig);
if (rv) {
THROW_ERRNO("killing process " << m_pid << " with signal " << sig);
}
}
template<>
struct ResourceHandle<Process::id, Process>;
pid_t
gettid()
{
return syscall(SYS_gettid);
}
}

43
lib/string.cc Normal file
View File

@ -0,0 +1,43 @@
#include "crucible/string.h"
#include "crucible/error.h"
#include <inttypes.h>
namespace crucible {
using namespace std;
string
to_hex(uint64_t i)
{
return astringprintf("0x%" PRIx64, i);
}
uint64_t
from_hex(const string &s)
{
return stoull(s, 0, 0);
}
vector<string>
split(string delim, string s)
{
if (delim.empty()) {
THROW_ERROR(invalid_argument, "delimiter empty when splitting '" << s << "'");
}
vector<string> rv;
size_t n = 0;
while (n < s.length()) {
size_t f = s.find(delim, n);
if (f == string::npos) {
rv.push_back(s.substr(n));
break;
}
if (f > n) {
rv.push_back(s.substr(n, f - n));
}
n = f + delim.length();
}
return rv;
}
};

158
lib/time.cc Normal file
View File

@ -0,0 +1,158 @@
#include "crucible/time.h"
#include "crucible/error.h"
#include <algorithm>
#include <cmath>
#include <ctime>
#include <thread>
namespace crucible {
double
nanosleep(double secs)
{
if (secs <= 0) return secs;
struct timespec req;
req.tv_sec = time_t(floor(secs));
req.tv_nsec = long((secs - floor(secs)) * 1000000000);
// Just silently ignore weirdo values for now
if (req.tv_sec < 0) return secs;
if (req.tv_sec > 1000000000) return secs;
if (req.tv_nsec < 0) return secs;
if (req.tv_nsec > 1000000000) return secs;
struct timespec rem;
rem.tv_sec = 0;
rem.tv_nsec = 0;
int nanosleep_rv = ::nanosleep(&req, &rem);
if (nanosleep_rv) {
THROW_ERRNO("nanosleep (" << secs << ") { tv_sec = " << req.tv_sec << ", tv_nsec = " << req.tv_nsec << " }");
}
return rem.tv_sec + (double(rem.tv_nsec) / 1000000000.0);
}
Timer::Timer() :
m_start(chrono::high_resolution_clock::now())
{
}
double
Timer::age() const
{
chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();
return chrono::duration<double>(end - m_start).count();
}
double
Timer::report(int precision) const
{
return ceil(age() * precision) / precision;
}
void
Timer::reset()
{
m_start = chrono::high_resolution_clock::now();
}
void
Timer::set(const chrono::high_resolution_clock::time_point &start)
{
m_start = start;
}
void
Timer::set(double delta)
{
m_start += chrono::duration_cast<chrono::high_resolution_clock::duration>(chrono::duration<double>(delta));
}
double
Timer::lap()
{
auto end = chrono::high_resolution_clock::now();
double rv = chrono::duration<double>(end - m_start).count();
m_start = end;
return rv;
}
ostream &
operator<<(ostream &os, const Timer &t)
{
return os << t.report();
}
bool
Timer::operator<(double d) const
{
return age() < d;
}
bool
Timer::operator>(double d) const
{
return age() > d;
}
RateLimiter::RateLimiter(double rate, double burst) :
m_rate(rate),
m_burst(burst)
{
}
RateLimiter::RateLimiter(double rate) :
m_rate(rate),
m_burst(rate)
{
}
void
RateLimiter::update_tokens()
{
double delta = m_timer.lap();
m_tokens += delta * m_rate;
if (m_tokens > m_burst) {
m_tokens = m_burst;
}
}
void
RateLimiter::sleep_for(double cost)
{
borrow(cost);
while (1) {
unique_lock<mutex> lock(m_mutex);
update_tokens();
if (m_tokens >= 0) {
return;
}
double sleep_time(-m_tokens / m_rate);
lock.unlock();
if (sleep_time > 0.0) {
nanosleep(sleep_time);
} else {
return;
}
}
}
bool
RateLimiter::is_ready()
{
unique_lock<mutex> lock(m_mutex);
update_tokens();
return m_tokens >= 0;
}
void
RateLimiter::borrow(double cost)
{
unique_lock<mutex> lock(m_mutex);
m_tokens -= cost;
}
}

16
lib/uuid.cc Normal file
View File

@ -0,0 +1,16 @@
#include "crucible/uuid.h"
namespace crucible {
using namespace std;
const size_t uuid_unparsed_size = 37; // "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\0"
string
uuid_unparse(const unsigned char in[16])
{
char out[uuid_unparsed_size];
::uuid_unparse(in, out);
return string(out);
}
}

4
makeflags Normal file
View File

@ -0,0 +1,4 @@
CCFLAGS = -Wall -Wextra -Werror -O3 -I../include -ggdb -fpic
# CCFLAGS = -Wall -Wextra -Werror -O0 -I../include -ggdb -fpic
CFLAGS = $(CCFLAGS) -std=c99
CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast

39
src/Makefile Normal file
View File

@ -0,0 +1,39 @@
PROGRAMS = \
../bin/bees \
../bin/fiemap \
../bin/fiewalk \
all: $(PROGRAMS) depends.mk
include ../makeflags
LIBS = -lcrucible -lpthread
LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib)
depends.mk: Makefile *.cc
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new
mv -fv depends.mk.new depends.mk
-include depends.mk
%.o: %.cc %.h
$(CXX) $(CXXFLAGS) -o "$@" -c "$<"
../bin/%: %.o
@echo Implicit bin rule "$<" '->' "$@"
$(CXX) $(CXXFLAGS) -o "$@" "$<" $(LDFLAGS) $(LIBS)
BEES_OBJS = \
bees.o \
bees-context.o \
bees-hash.o \
bees-resolve.o \
bees-roots.o \
bees-thread.o \
bees-types.o \
../bin/bees: $(BEES_OBJS)
$(CXX) $(CXXFLAGS) -o "$@" $(BEES_OBJS) $(LDFLAGS) $(LIBS)
clean:
-rm -fv *.o

1009
src/bees-context.cc Normal file

File diff suppressed because it is too large Load Diff

682
src/bees-hash.cc Normal file
View File

@ -0,0 +1,682 @@
#include "bees.h"
#include "crucible/crc64.h"
#include "crucible/string.h"
#include <algorithm>
#include <random>
#include <sys/mman.h>
using namespace crucible;
using namespace std;
static inline
bool
using_any_madvise()
{
return true;
}
ostream &
operator<<(ostream &os, const BeesHash &bh)
{
return os << to_hex(BeesHash::Type(bh));
}
ostream &
operator<<(ostream &os, const BeesHashTable::Cell &bhte)
{
return os << "BeesHashTable::Cell { hash = " << BeesHash(bhte.e_hash) << ", addr = "
<< BeesAddress(bhte.e_addr) << " }";
}
void
dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
{
// Must be called while holding m_bucket_mutex
for (auto i = p; i < q; ++i) {
BEESLOG("Entry " << i - p << " " << *i);
}
}
const bool VERIFY_CLEARS_BUGS = false;
bool
verify_cell_range(BeesHashTable::Cell *p, BeesHashTable::Cell *q, bool clear_bugs = VERIFY_CLEARS_BUGS)
{
// Must be called while holding m_bucket_mutex
bool bugs_found = false;
set<BeesHashTable::Cell> seen_it;
for (BeesHashTable::Cell *cell = p; cell < q; ++cell) {
if (cell->e_addr && cell->e_addr < 0x1000) {
BEESCOUNT(bug_hash_magic_addr);
BEESINFO("Bad hash table address hash " << to_hex(cell->e_hash) << " addr " << to_hex(cell->e_addr));
if (clear_bugs) {
cell->e_addr = 0;
cell->e_hash = 0;
}
bugs_found = true;
}
if (cell->e_addr && !seen_it.insert(*cell).second) {
BEESCOUNT(bug_hash_duplicate_cell);
// BEESLOG("Duplicate hash table entry:\nthis = " << *cell << "\nold = " << *seen_it.find(*cell));
BEESINFO("Duplicate hash table entry: " << *cell);
if (clear_bugs) {
cell->e_addr = 0;
cell->e_hash = 0;
}
bugs_found = true;
}
}
return bugs_found;
}
pair<BeesHashTable::Cell *, BeesHashTable::Cell *>
BeesHashTable::get_cell_range(HashType hash)
{
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
Bucket *pp = &m_bucket_ptr[hash % m_buckets];
Cell *bp = pp[0].p_cells;
Cell *ep = pp[1].p_cells;
THROW_CHECK2(out_of_range, m_cell_ptr, bp, bp >= m_cell_ptr);
THROW_CHECK2(out_of_range, m_cell_ptr_end, ep, ep <= m_cell_ptr_end);
return make_pair(bp, ep);
}
pair<uint8_t *, uint8_t *>
BeesHashTable::get_extent_range(HashType hash)
{
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
Extent *iop = &m_extent_ptr[ (hash % m_buckets) / c_buckets_per_extent ];
uint8_t *bp = iop[0].p_byte;
uint8_t *ep = iop[1].p_byte;
THROW_CHECK2(out_of_range, m_byte_ptr, bp, bp >= m_byte_ptr);
THROW_CHECK2(out_of_range, m_byte_ptr_end, ep, ep <= m_byte_ptr_end);
return make_pair(bp, ep);
}
void
BeesHashTable::flush_dirty_extents()
{
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
unique_lock<mutex> lock(m_extent_mutex);
auto dirty_extent_copy = m_buckets_dirty;
m_buckets_dirty.clear();
if (dirty_extent_copy.empty()) {
BEESNOTE("idle");
m_condvar.wait(lock);
return; // please call later, i.e. immediately
}
lock.unlock();
size_t extent_counter = 0;
for (auto extent_number : dirty_extent_copy) {
++extent_counter;
BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
catch_all([&]() {
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
if (using_shared_map()) {
BEESTOOLONG("flush extent " << extent_number);
copy(dirty_extent, dirty_extent_end, dirty_extent);
} else {
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
// Page locks slow us down more than copying the data does
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
BEESCOUNT(hash_extent_out);
}
});
BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
}
}
void
BeesHashTable::set_extent_dirty(HashType hash)
{
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
auto pr = get_extent_range(hash);
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
unique_lock<mutex> lock(m_extent_mutex);
m_buckets_dirty.insert(extent_number);
m_condvar.notify_one();
}
void
BeesHashTable::writeback_loop()
{
if (!using_shared_map()) {
while (1) {
flush_dirty_extents();
}
}
}
static
string
percent(size_t num, size_t den)
{
if (den) {
return astringprintf("%u%%", num * 100 / den);
} else {
return "--%";
}
}
void
BeesHashTable::prefetch_loop()
{
// Always do the mlock, whether shared or not
THROW_CHECK1(runtime_error, m_size, m_size > 0);
catch_all([&]() {
BEESNOTE("mlock " << pretty(m_size));
DIE_IF_NON_ZERO(mlock(m_byte_ptr, m_size));
});
while (1) {
size_t width = 64;
vector<size_t> occupancy(width, 0);
size_t occupied_count = 0;
size_t total_count = 0;
size_t compressed_count = 0;
size_t compressed_offset_count = 0;
size_t toxic_count = 0;
size_t unaligned_eof_count = 0;
for (uint64_t ext = 0; ext < m_extents; ++ext) {
BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
catch_all([&]() {
fetch_missing_extent(ext * c_buckets_per_extent);
BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
bool duplicate_bugs_found = false;
unique_lock<mutex> lock(m_bucket_mutex);
for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
duplicate_bugs_found = true;
}
size_t this_bucket_occupied_count = 0;
for (Cell *cell = bucket[0].p_cells; cell < bucket[1].p_cells; ++cell) {
if (cell->e_addr) {
++this_bucket_occupied_count;
BeesAddress a(cell->e_addr);
if (a.is_compressed()) {
++compressed_count;
if (a.has_compressed_offset()) {
++compressed_offset_count;
}
}
if (a.is_toxic()) {
++toxic_count;
}
if (a.is_unaligned_eof()) {
++unaligned_eof_count;
}
}
++total_count;
}
++occupancy.at(this_bucket_occupied_count * width / (1 + c_cells_per_bucket) );
// Count these instead of calculating the number so we get better stats in case of exceptions
occupied_count += this_bucket_occupied_count;
}
lock.unlock();
if (duplicate_bugs_found) {
set_extent_dirty(ext);
}
});
}
BEESNOTE("calculating hash table statistics");
vector<string> histogram;
vector<size_t> thresholds;
size_t threshold = 1;
bool threshold_exceeded = false;
do {
threshold_exceeded = false;
histogram.push_back(string(width, ' '));
thresholds.push_back(threshold);
for (size_t x = 0; x < width; ++x) {
if (occupancy.at(x) >= threshold) {
histogram.back().at(x) = '#';
threshold_exceeded = true;
}
}
threshold *= 2;
} while (threshold_exceeded);
ostringstream out;
size_t count = histogram.size();
bool first_line = true;
for (auto it = histogram.rbegin(); it != histogram.rend(); ++it) {
out << *it << " " << thresholds.at(--count);
if (first_line) {
first_line = false;
out << " pages";
}
out << "\n";
}
size_t uncompressed_count = occupied_count - compressed_count;
size_t legacy_count = compressed_count - compressed_offset_count;
ostringstream graph_blob;
graph_blob << "Now: " << format_time(time(NULL)) << "\n";
graph_blob << "Uptime: " << m_ctx->total_timer().age() << " seconds\n";
graph_blob
<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
<< out.str() << "0% | 25% | 50% | 75% | 100% page fill\n"
<< "compressed " << compressed_count << " (" << percent(compressed_count, occupied_count) << ")"
<< " new-style " << compressed_offset_count << " (" << percent(compressed_offset_count, occupied_count) << ")"
<< " old-style " << legacy_count << " (" << percent(legacy_count, occupied_count) << ")\n"
<< "uncompressed " << uncompressed_count << " (" << percent(uncompressed_count, occupied_count) << ")"
<< " unaligned_eof " << unaligned_eof_count << " (" << percent(unaligned_eof_count, occupied_count) << ")"
<< " toxic " << toxic_count << " (" << percent(toxic_count, occupied_count) << ")";
graph_blob << "\n\n";
graph_blob << "TOTAL:\n";
auto thisStats = BeesStats::s_global;
graph_blob << "\t" << thisStats << "\n";
graph_blob << "\nRATES:\n";
auto avg_rates = thisStats / m_ctx->total_timer().age();
graph_blob << "\t" << avg_rates << "\n";
BEESLOG(graph_blob.str());
catch_all([&]() {
m_stats_file.write(graph_blob.str());
});
BEESNOTE("idle " << BEES_HASH_TABLE_ANALYZE_INTERVAL << "s");
nanosleep(BEES_HASH_TABLE_ANALYZE_INTERVAL);
}
}
void
BeesHashTable::fetch_missing_extent(HashType hash)
{
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
if (using_shared_map()) return;
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
auto pr = get_extent_range(hash);
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
unique_lock<mutex> lock(m_extent_mutex);
if (!m_buckets_missing.count(extent_number)) {
return;
}
size_t missing_buckets = m_buckets_missing.size();
lock.unlock();
BEESNOTE("fetch waiting for hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
// Acquire blocking lock on this extent only
LockSet<uint64_t>::Lock extent_lock(m_extent_lock_set, extent_number);
// Check missing again because someone else might have fetched this
// extent for us while we didn't hold any locks
lock.lock();
if (!m_buckets_missing.count(extent_number)) {
BEESCOUNT(hash_extent_in_twice);
return;
}
lock.unlock();
// OK we have to read this extent
BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
BEESTRACE("Fetching missing hash extent " << extent_number);
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
{
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
}
BEESCOUNT(hash_extent_in);
// We don't block when fetching an extent but we do slow down the
// prefetch thread.
m_prefetch_rate_limit.borrow(BLOCK_SIZE_HASHTAB_EXTENT);
lock.lock();
m_buckets_missing.erase(extent_number);
}
bool
BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
{
return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
}
vector<BeesHashTable::Cell>
BeesHashTable::find_cell(HashType hash)
{
// This saves a lot of time prefilling the hash table, and there's no risk of eviction
if (is_toxic_hash(hash)) {
BEESCOUNT(hash_toxic);
BeesAddress toxic_addr(0x1000);
toxic_addr.set_toxic();
Cell toxic_cell(hash, toxic_addr);
vector<Cell> rv;
rv.push_back(toxic_cell);
return rv;
}
fetch_missing_extent(hash);
BEESTOOLONG("find_cell hash " << BeesHash(hash));
vector<Cell> rv;
unique_lock<mutex> lock(m_bucket_mutex);
auto er = get_cell_range(hash);
// FIXME: Weed out zero addresses in the table due to earlier bugs
copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
BEESCOUNT(hash_lookup);
return rv;
}
// Move an entry to the end of the list. Used after an attempt to resolve
// an address in the hash table fails. Probably more correctly called
// push_back_hash_addr, except it never inserts. Shared hash tables
// never erase anything, since there is no way to tell if an entry is
// out of date or just belonging to the wrong filesystem.
void
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
{
// if (m_shared) return;
fetch_missing_extent(hash);
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
unique_lock<mutex> lock(m_bucket_mutex);
auto er = get_cell_range(hash);
Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv);
bool found = (ip < er.second);
if (found) {
// Lookups on invalid addresses really hurt us. Kill it with fire!
*ip = Cell(0, 0);
set_extent_dirty(hash);
BEESCOUNT(hash_erase);
#if 0
if (verify_cell_range(er.first, er.second)) {
BEESINFO("while erasing hash " << hash << " addr " << addr);
}
#endif
}
}
// If entry is already present in list, move it to the front of the
// list without dropping any entries, and return true. If entry is not
// present in list, insert it at the front of the list, possibly dropping
// the last entry in the list, and return false. Used to move duplicate
// hash blocks to the front of the list.
bool
BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
{
fetch_missing_extent(hash);
BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
unique_lock<mutex> lock(m_bucket_mutex);
auto er = get_cell_range(hash);
Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv);
bool found = (ip < er.second);
if (!found) {
// If no match found, get rid of an empty space instead
// If no empty spaces, ip will point to end
ip = find(er.first, er.second, Cell(0, 0));
}
if (ip > er.first) {
// Delete matching entry, first empty entry,
// or last entry whether empty or not
// move_backward(er.first, ip - 1, ip);
auto sp = ip;
auto dp = ip;
--sp;
// If we are deleting the last entry then don't copy it
if (ip == er.second) {
--sp;
--dp;
BEESCOUNT(hash_evict);
}
while (dp > er.first) {
*dp-- = *sp--;
}
}
// There is now a space at the front, insert there if different
if (er.first[0] != mv) {
er.first[0] = mv;
set_extent_dirty(hash);
BEESCOUNT(hash_front);
}
#if 0
if (verify_cell_range(er.first, er.second)) {
BEESINFO("while push_fronting hash " << hash << " addr " << addr);
}
#endif
return found;
}
// If entry is already present in list, returns true and does not
// modify list. If entry is not present in list, returns false and
// inserts at a random position in the list, possibly evicting the entry
// at the end of the list. Used to insert new unique (not-yet-duplicate)
// blocks in random order.
bool
BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
{
fetch_missing_extent(hash);
BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
unique_lock<mutex> lock(m_bucket_mutex);
auto er = get_cell_range(hash);
Cell mv(hash, addr);
Cell *ip = find(er.first, er.second, mv);
bool found = (ip < er.second);
thread_local default_random_engine generator;
thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
auto pos = distribution(generator);
int case_cond = 0;
vector<Cell> saved(er.first, er.second);
if (found) {
// If hash already exists after pos, swap with pos
if (ip > er.first + pos) {
// move_backward(er.first + pos, ip - 1, ip);
auto sp = ip;
auto dp = ip;
--sp;
while (dp > er.first + pos) {
*dp-- = *sp--;
}
*dp = mv;
BEESCOUNT(hash_bump);
case_cond = 1;
goto ret_dirty;
}
// Hash already exists before (or at) pos, leave it there
BEESCOUNT(hash_already);
case_cond = 2;
goto ret;
}
// Find an empty space to back of pos
for (ip = er.first + pos; ip < er.second; ++ip) {
if (*ip == Cell(0, 0)) {
*ip = mv;
case_cond = 3;
goto ret_dirty;
}
}
// Find an empty space to front of pos
// if there is anything to front of pos
if (pos > 0) {
for (ip = er.first + pos - 1; ip >= er.first; --ip) {
if (*ip == Cell(0, 0)) {
*ip = mv;
case_cond = 4;
goto ret_dirty;
}
}
}
// Evict something and insert at pos
move_backward(er.first + pos, er.second - 1, er.second);
er.first[pos] = mv;
BEESCOUNT(hash_evict);
case_cond = 5;
ret_dirty:
BEESCOUNT(hash_insert);
set_extent_dirty(hash);
ret:
#if 0
if (verify_cell_range(er.first, er.second, false)) {
BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
<< " ip " << (ip - er.first) << " " << mv);
// dump_bucket(saved.data(), saved.data() + saved.size());
// dump_bucket(er.first, er.second);
}
#else
(void)case_cond;
#endif
return found;
}
void
BeesHashTable::try_mmap_flags(int flags)
{
if (!m_cell_ptr) {
THROW_CHECK1(out_of_range, m_size, m_size > 0);
Timer map_time;
catch_all([&]() {
BEESLOG("mapping hash table size " << m_size << " with flags " << mmap_flags_ntoa(flags));
void *ptr = mmap_or_die(nullptr, m_size, PROT_READ | PROT_WRITE, flags, flags & MAP_ANONYMOUS ? -1 : int(m_fd), 0);
BEESLOG("mmap done in " << map_time << " sec");
m_cell_ptr = static_cast<Cell *>(ptr);
void *ptr_end = static_cast<uint8_t *>(ptr) + m_size;
m_cell_ptr_end = static_cast<Cell *>(ptr_end);
});
}
}
void
BeesHashTable::set_shared(bool shared)
{
m_shared = shared;
}
BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
m_ctx(ctx),
m_size(0),
m_void_ptr(nullptr),
m_void_ptr_end(nullptr),
m_buckets(0),
m_cells(0),
m_writeback_thread("hash_writeback"),
m_prefetch_thread("hash_prefetch " + m_ctx->root_path()),
m_flush_rate_limit(BEES_FLUSH_RATE),
m_prefetch_rate_limit(BEES_FLUSH_RATE),
m_stats_file(m_ctx->home_fd(), "beesstats.txt")
{
BEESNOTE("opening hash table " << filename);
m_fd = openat_or_die(m_ctx->home_fd(), filename, FLAGS_OPEN_FILE_RW, 0700);
Stat st(m_fd);
m_size = st.st_size;
BEESTRACE("hash table size " << m_size);
BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET);
BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT);
THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0);
// Does the union work?
THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr);
THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr);
// There's more than one union
THROW_CHECK2(runtime_error, sizeof(Bucket), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket));
THROW_CHECK2(runtime_error, sizeof(Bucket::p_byte), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket::p_byte));
THROW_CHECK2(runtime_error, sizeof(Extent), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent));
THROW_CHECK2(runtime_error, sizeof(Extent::p_byte), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent::p_byte));
BEESLOG("opened hash table filename '" << filename << "' length " << m_size);
m_buckets = m_size / BLOCK_SIZE_HASHTAB_BUCKET;
m_cells = m_buckets * c_cells_per_bucket;
m_extents = (m_size + BLOCK_SIZE_HASHTAB_EXTENT - 1) / BLOCK_SIZE_HASHTAB_EXTENT;
BEESLOG("\tcells " << m_cells << ", buckets " << m_buckets << ", extents " << m_extents);
BEESLOG("\tflush rate limit " << BEES_FLUSH_RATE);
if (using_shared_map()) {
try_mmap_flags(MAP_SHARED);
} else {
try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS);
}
if (!m_cell_ptr) {
THROW_ERROR(runtime_error, "unable to mmap " << filename);
}
if (!using_shared_map()) {
// madvise fails if MAP_SHARED
if (using_any_madvise()) {
// DONTFORK because we sometimes do fork,
// but the child doesn't touch any of the many, many pages
BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)");
DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK));
}
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
m_buckets_missing.insert(i);
}
}
m_writeback_thread.exec([&]() {
writeback_loop();
});
m_prefetch_thread.exec([&]() {
prefetch_loop();
});
// Blacklist might fail if the hash table is not stored on a btrfs
catch_all([&]() {
m_ctx->blacklist_add(BeesFileId(m_fd));
});
// Skip zero because we already weed that out before it gets near a hash function
for (unsigned i = 1; i < 256; ++i) {
vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
HashType hash = Digest::CRC::crc64(v.data(), v.size());
m_toxic_hashes.insert(hash);
}
}
BeesHashTable::~BeesHashTable()
{
if (m_cell_ptr && m_size) {
flush_dirty_extents();
catch_all([&]() {
DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
m_cell_ptr = nullptr;
m_size = 0;
});
}
}

487
src/bees-resolve.cc Normal file
View File

@ -0,0 +1,487 @@
#include "bees.h"
#include "crucible/limits.h"
#include "crucible/string.h"
using namespace crucible;
using namespace std;
BeesAddress
BeesResolver::addr(BeesAddress new_addr)
{
THROW_CHECK1(invalid_argument, new_addr, !new_addr.is_magic());
m_found_data = false;
m_found_dup = false;
m_found_hash = false;
m_wrong_data = false;
m_biors.clear();
m_ranges.clear();
m_addr = new_addr;
m_bior_count = 0;
auto rv = m_ctx->resolve_addr(m_addr);
m_biors = rv.m_biors;
m_is_toxic = rv.m_is_toxic;
m_bior_count = m_biors.size();
return m_addr;
}
BeesResolver::BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress new_addr) :
m_ctx(ctx),
m_bior_count(0)
{
addr(new_addr);
}
BeesBlockData
BeesResolver::adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle)
{
BEESTRACE("Searching for needle " << needle << "\n\tin haystack " << haystack);
BEESCOUNT(adjust_try);
// Constraint checks
THROW_CHECK1(invalid_argument, needle.begin(), (needle.begin() & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, haystack.begin(), (haystack.begin() & BLOCK_MASK_CLONE) == 0);
// Need to know the precise dimensions of the haystack and needle
off_t haystack_size = haystack.file_size();
// If the needle is not a full block then it can only match at EOF
off_t needle_len = needle.size();
bool is_unaligned_eof = needle_len & BLOCK_MASK_CLONE;
BEESTRACE("is_unaligned_eof = " << is_unaligned_eof << ", needle_len = " << to_hex(needle_len) << ", haystack_size = " << to_hex(haystack_size));
// Unaligned EOF can only match at EOF, so only check there
if (is_unaligned_eof) {
BEESTRACE("Construct needle_bfr from " << needle);
BeesFileRange needle_bfr(needle);
// Census
if (haystack_size & BLOCK_MASK_CLONE) {
BEESCOUNT(adjust_eof_haystack);
}
if (needle_bfr.end() & BLOCK_MASK_CLONE) {
BEESCOUNT(adjust_eof_needle);
}
// Non-aligned part of the lengths must be the same
if ( (haystack_size & BLOCK_MASK_CLONE) != (needle_bfr.end() & BLOCK_MASK_CLONE) ) {
BEESCOUNT(adjust_eof_fail);
return BeesBlockData();
}
// Read the haystack block
BEESTRACE("Reading haystack (haystack_size = " << to_hex(haystack_size) << ")");
BeesBlockData straw(haystack.fd(), haystack_size & ~BLOCK_MASK_CLONE, haystack_size & BLOCK_MASK_CLONE);
// It either matches or it doesn't
BEESTRACE("Verifying haystack " << straw);
if (straw.is_data_equal(needle)) {
BEESCOUNT(adjust_eof_hit);
m_found_data = true;
m_found_hash = true;
return straw;
}
// Check for matching hash
BEESTRACE("Verifying haystack hash");
if (straw.hash() == needle.hash()) {
// OK at least the hash is still valid
m_found_hash = true;
}
BEESCOUNT(adjust_eof_miss);
// BEESLOG("adjust_eof_miss " << straw);
return BeesBlockData();
}
off_t lower_offset = haystack.begin();
off_t upper_offset = haystack.end();
bool is_compressed_offset = false;
bool is_exact = false;
bool is_legacy = false;
if (m_addr.is_compressed()) {
BtrfsExtentWalker ew(haystack.fd(), haystack.begin(), m_ctx->root_fd());
BEESTRACE("haystack extent data " << ew);
Extent e = ew.current();
if (m_addr.has_compressed_offset()) {
off_t coff = m_addr.get_compressed_offset();
if (e.offset() > coff) {
// this extent begins after the target block
BEESCOUNT(adjust_offset_low);
return BeesBlockData();
}
coff -= e.offset();
if (e.size() <= coff) {
// this extent ends before the target block
BEESCOUNT(adjust_offset_high);
return BeesBlockData();
}
lower_offset = e.begin() + coff;
upper_offset = lower_offset + BLOCK_SIZE_CLONE;
BEESCOUNT(adjust_offset_hit);
is_compressed_offset = true;
} else {
lower_offset = e.begin();
upper_offset = e.end();
BEESCOUNT(adjust_legacy);
is_legacy = true;
}
} else {
BEESCOUNT(adjust_exact);
is_exact = true;
}
BEESTRACE("Checking haystack " << haystack << " offsets " << to_hex(lower_offset) << ".." << to_hex(upper_offset));
// Check all the blocks in the list
for (off_t haystack_offset = lower_offset; haystack_offset < upper_offset; haystack_offset += BLOCK_SIZE_CLONE) {
THROW_CHECK1(out_of_range, haystack_offset, (haystack_offset & BLOCK_MASK_CLONE) == 0);
// Straw cannot extend beyond end of haystack
if (haystack_offset + needle.size() > haystack_size) {
BEESCOUNT(adjust_needle_too_long);
break;
}
// Read the haystack
BEESTRACE("straw " << name_fd(haystack.fd()) << ", offset " << to_hex(haystack_offset) << ", length " << needle.size());
BeesBlockData straw(haystack.fd(), haystack_offset, needle.size());
BEESTRACE("straw = " << straw);
// Stop if we find a match
if (straw.is_data_equal(needle)) {
BEESCOUNT(adjust_hit);
m_found_data = true;
m_found_hash = true;
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_correct);
if (is_legacy) BEESCOUNT(adjust_legacy_correct);
if (is_exact) BEESCOUNT(adjust_exact_correct);
return straw;
}
if (straw.hash() != needle.hash()) {
// Not the same hash or data, try next block
BEESCOUNT(adjust_miss);
continue;
}
// Found the hash but not the data. Yay!
m_found_hash = true;
BEESLOG("HASH COLLISION\n"
<< "\tneedle " << needle << "\n"
<< "\tstraw " << straw);
BEESCOUNT(hash_collision);
}
// Ran out of offsets to try
BEESCOUNT(adjust_no_match);
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_wrong);
if (is_legacy) BEESCOUNT(adjust_legacy_wrong);
if (is_exact) BEESCOUNT(adjust_exact_wrong);
m_wrong_data = true;
return BeesBlockData();
}
BeesFileRange
BeesResolver::chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd)
{
BEESTRACE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
BEESNOTE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
BEESCOUNT(chase_try);
Fd file_fd = m_ctx->roots()->open_root_ino(bior.m_root, bior.m_inum);
if (!file_fd) {
// Delete snapshots generate craptons of these
// BEESINFO("No FD in chase_extent_ref " << bior);
BEESCOUNT(chase_no_fd);
return BeesFileRange();
}
BEESNOTE("searching at offset " << to_hex(bior.m_offset) << " in file " << name_fd(file_fd) << "\n\tfor " << needle_bbd);
BEESTRACE("bior file " << name_fd(file_fd));
BEESTRACE("get file_addr " << bior);
BeesAddress file_addr(file_fd, bior.m_offset, m_ctx);
BEESTRACE("file_addr " << file_addr);
// ...or are we?
if (file_addr.is_magic()) {
BEESINFO("file_addr is magic: file_addr = " << file_addr << " bior = " << bior << " needle_bbd = " << needle_bbd);
BEESCOUNT(chase_wrong_magic);
return BeesFileRange();
}
THROW_CHECK1(invalid_argument, m_addr, !m_addr.is_magic());
// Did we get the physical block we asked for? The magic bits have to match too,
// but the compressed offset bits do not.
if (file_addr.get_physical_or_zero() != m_addr.get_physical_or_zero()) {
// BEESINFO("found addr " << file_addr << " at " << name_fd(file_fd) << " offset " << to_hex(bior.m_offset) << " but looking for " << m_addr);
// FIEMAP/resolve are working, but the data is old.
BEESCOUNT(chase_wrong_addr);
return BeesFileRange();
}
// Calculate end of range, which is a sum block or less
// It's a sum block because we have to compare content now
off_t file_size = Stat(file_fd).st_size;
off_t bior_offset = ranged_cast<off_t>(bior.m_offset);
off_t end_offset = min(file_size, bior_offset + needle_bbd.size());
BeesBlockData haystack_bbd(file_fd, bior_offset, end_offset - bior_offset);
BEESTRACE("matched haystack_bbd " << haystack_bbd << " file_addr " << file_addr);
// If the data was compressed and no offset was captured then
// we won't get an exact address from resolve.
// Search near the resolved address for a matching data block.
// ...even if it's not compressed, we should do this sanity
// check before considering the block as a duplicate candidate.
auto new_bbd = adjust_offset(haystack_bbd, needle_bbd);
if (new_bbd.empty()) {
// matching offset search failed
BEESCOUNT(chase_wrong_data);
return BeesFileRange();
}
if (new_bbd.begin() == haystack_bbd.begin()) {
BEESCOUNT(chase_uncorrected);
} else {
// corrected the bfr
BEESCOUNT(chase_corrected);
haystack_bbd = new_bbd;
}
// We have found at least one duplicate block, so resolve was a success
BEESCOUNT(chase_hit);
// Matching block
BEESTRACE("Constructing dst_bfr { " << BeesFileId(haystack_bbd.fd()) << ", " << to_hex(haystack_bbd.begin()) << ".." << to_hex(haystack_bbd.end()) << " }");
BeesFileRange dst_bfr(BeesFileId(haystack_bbd.fd()), haystack_bbd.begin(), haystack_bbd.end());
return dst_bfr;
}
void
BeesResolver::replace_src(const BeesFileRange &src_bfr)
{
BEESTRACE("replace_src src_bfr " << src_bfr);
THROW_CHECK0(runtime_error, !m_is_toxic);
BEESCOUNT(replacesrc_try);
// Open src, reuse it for all dst
auto i_bfr = src_bfr;
BEESNOTE("Opening src bfr " << i_bfr);
BEESTRACE("Opening src bfr " << i_bfr);
i_bfr.fd(m_ctx);
BeesBlockData bbd(i_bfr);
for_each_extent_ref(bbd, [&](const BeesFileRange &j) -> bool {
// Open dst
auto j_bfr = j;
BEESNOTE("Opening dst bfr " << j_bfr);
BEESTRACE("Opening dst bfr " << j_bfr);
j_bfr.fd(m_ctx);
if (i_bfr.overlaps(j_bfr)) {
BEESCOUNT(replacesrc_overlaps);
return false; // i.e. continue
}
// Make pair(src, dst)
BEESTRACE("creating brp (" << i_bfr << ", " << j_bfr << ")");
BeesRangePair brp(i_bfr, j_bfr);
BEESTRACE("Found matching range: " << brp);
// Extend range at beginning
BEESNOTE("Extending matching range: " << brp);
// No particular reason to be constrained?
if (brp.grow(m_ctx, true)) {
BEESCOUNT(replacesrc_grown);
}
// Dedup
BEESNOTE("dedup " << brp);
if (m_ctx->dedup(brp)) {
BEESCOUNT(replacesrc_dedup_hit);
m_found_dup = true;
} else {
BEESCOUNT(replacesrc_dedup_miss);
}
return false; // i.e. continue
});
}
void
BeesResolver::find_matches(bool just_one, BeesBlockData &bbd)
{
// Walk through the (ino, offset, root) tuples until we find a match.
BEESTRACE("finding all matches for " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
THROW_CHECK0(runtime_error, !m_is_toxic);
bool stop_now = false;
for (auto ino_off_root : m_biors) {
if (m_wrong_data) {
return;
}
BEESTRACE("ino_off_root " << ino_off_root);
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
// Silently ignore blacklisted files, e.g. BeesTempFile files
if (m_ctx->is_blacklisted(this_fid)) {
continue;
}
// Look at the old data
catch_all([&]() {
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
auto new_range = chase_extent_ref(ino_off_root, bbd);
if (new_range) {
m_ranges.insert(new_range.copy_closed());
stop_now = true;
}
});
if (just_one && stop_now) {
break;
}
}
}
bool
BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor)
{
// Walk through the (ino, offset, root) tuples until we are told to stop
BEESTRACE("for_each_extent_ref " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
THROW_CHECK0(runtime_error, !m_is_toxic);
bool stop_now = false;
for (auto ino_off_root : m_biors) {
BEESTRACE("ino_off_root " << ino_off_root);
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
// Silently ignore blacklisted files, e.g. BeesTempFile files
if (m_ctx->is_blacklisted(this_fid)) {
continue;
}
// Look at the old data
catch_all([&]() {
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
auto new_range = chase_extent_ref(ino_off_root, bbd);
// XXX: should we catch visitor's exceptions here?
if (new_range) {
stop_now = visitor(new_range);
} else {
// We have reliable block addresses now, so we guarantee we can hit the desired block.
// Failure in chase_extent_ref means we are done, and don't need to look up all the
// other references.
stop_now = true;
}
});
if (stop_now) {
break;
}
}
return stop_now;
}
BeesFileRange
BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
{
BEESTRACE("replace_dst dst_bfr " << dst_bfr);
BEESCOUNT(replacedst_try);
// Open dst, reuse it for all src
BEESNOTE("Opening dst bfr " << dst_bfr);
BEESTRACE("Opening dst bfr " << dst_bfr);
dst_bfr.fd(m_ctx);
BeesFileRange overlap_bfr;
BEESTRACE("overlap_bfr " << overlap_bfr);
BeesBlockData bbd(dst_bfr);
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
// Open src
BEESNOTE("Opening src bfr " << src_bfr);
BEESTRACE("Opening src bfr " << src_bfr);
src_bfr.fd(m_ctx);
if (dst_bfr.overlaps(src_bfr)) {
BEESCOUNT(replacedst_overlaps);
return false; // i.e. continue
}
// If dst is already occupying src, skip.
// FIXME: BeesContext::scan_one_extent should be weeding these out, but does not.
BeesBlockData src_bbd(src_bfr.fd(), src_bfr.begin(), min(BLOCK_SIZE_SUMS, src_bfr.size()));
if (bbd.addr().get_physical_or_zero() == src_bbd.addr().get_physical_or_zero()) {
BEESCOUNT(replacedst_same);
return false; // i.e. continue
}
// Make pair(src, dst)
BEESTRACE("creating brp (" << src_bfr << ", " << dst_bfr << ")");
BeesRangePair brp(src_bfr, dst_bfr);
BEESTRACE("Found matching range: " << brp);
// Extend range at beginning
BEESNOTE("Extending matching range: " << brp);
// 'false' Has nasty loops, and may not be faster.
// 'true' At best, keeps fragmentation constant...but can also make it worse
if (brp.grow(m_ctx, true)) {
BEESCOUNT(replacedst_grown);
}
// Dedup
BEESNOTE("dedup " << brp);
if (m_ctx->dedup(brp)) {
BEESCOUNT(replacedst_dedup_hit);
m_found_dup = true;
overlap_bfr = brp.second;
// FIXME: find best range first, then dedup that
return true; // i.e. break
} else {
BEESCOUNT(replacedst_dedup_miss);
return false; // i.e. continue
}
});
// BEESLOG("overlap_bfr after " << overlap_bfr);
return overlap_bfr.copy_closed();
}
BeesFileRange
BeesResolver::find_one_match(BeesBlockData &bbd)
{
THROW_CHECK0(runtime_error, !m_is_toxic);
find_matches(true, bbd);
if (m_ranges.empty()) {
return BeesFileRange();
} else {
return *m_ranges.begin();
}
}
set<BeesFileRange>
BeesResolver::find_all_matches(BeesBlockData &bbd)
{
THROW_CHECK0(runtime_error, !m_is_toxic);
find_matches(false, bbd);
return m_ranges;
}
bool
BeesResolver::operator<(const BeesResolver &that) const
{
if (that.m_bior_count < m_bior_count) {
return true;
} else if (m_bior_count < that.m_bior_count) {
return false;
}
return m_addr < that.m_addr;
}

823
src/bees-roots.cc Normal file
View File

@ -0,0 +1,823 @@
#include "bees.h"
#include "crucible/cache.h"
#include "crucible/string.h"
#include <fstream>
#include <tuple>
using namespace crucible;
using namespace std;
string
format_time(time_t t)
{
struct tm *tmp = localtime(&t);
char buf[1024];
strftime(buf, sizeof(buf), "%Y-%m-%d-%H-%M-%S", tmp);
return buf;
}
ostream &
operator<<(ostream &os, const BeesCrawlState &bcs)
{
time_t now = time(NULL);
auto age = now - bcs.m_started;
return os << "BeesCrawlState "
<< bcs.m_root << ":" << bcs.m_objectid << " offset " << to_hex(bcs.m_offset)
<< " transid " << bcs.m_min_transid << ".." << bcs.m_max_transid
<< " started " << format_time(bcs.m_started) << " (" << age << "s ago)";
}
BeesCrawlState::BeesCrawlState() :
m_root(0),
m_objectid(0),
m_offset(0),
m_min_transid(0),
m_max_transid(0),
m_started(time(NULL))
{
}
bool
BeesCrawlState::operator<(const BeesCrawlState &that) const
{
return tie(m_root, m_objectid, m_offset, m_min_transid, m_max_transid)
< tie(that.m_root, that.m_objectid, that.m_offset, that.m_min_transid, that.m_max_transid);
}
string
BeesRoots::crawl_state_filename() const
{
string rv;
rv += "beescrawl.";
rv += m_ctx->root_uuid();
rv += ".dat";
return rv;
}
void
BeesRoots::state_save()
{
// Make sure we have a full complement of crawlers
insert_new_crawl();
BEESNOTE("saving crawl state");
BEESLOG("Saving crawl state");
BEESTOOLONG("Saving crawl state");
Timer save_time;
unique_lock<mutex> lock(m_mutex);
// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
ostringstream ofs;
if (!m_crawl_dirty) {
BEESLOG("Nothing to save");
return;
}
for (auto i : m_root_crawl_map) {
auto ibcs = i.second->get_state();
if (ibcs.m_max_transid) {
ofs << "root " << ibcs.m_root << " ";
ofs << "objectid " << ibcs.m_objectid << " ";
ofs << "offset " << ibcs.m_offset << " ";
ofs << "min_transid " << ibcs.m_min_transid << " ";
ofs << "max_transid " << ibcs.m_max_transid << " ";
ofs << "started " << ibcs.m_started << " ";
ofs << "start_ts " << format_time(ibcs.m_started) << "\n";
}
}
if (ofs.str().empty()) {
BEESLOG("Crawl state empty!");
m_crawl_dirty = false;
return;
}
lock.unlock();
m_crawl_state_file.write(ofs.str());
BEESNOTE("relocking crawl state");
lock.lock();
// Not really correct but probably close enough
m_crawl_dirty = false;
BEESLOG("Saved crawl state in " << save_time << "s");
}
BeesCrawlState
BeesRoots::crawl_state_get(uint64_t rootid)
{
unique_lock<mutex> lock(m_mutex);
auto rv = m_root_crawl_map.at(rootid)->get_state();
THROW_CHECK2(runtime_error, rv.m_root, rootid, rv.m_root == rootid);
return rv;
}
void
BeesRoots::crawl_state_set_dirty()
{
unique_lock<mutex> lock(m_mutex);
m_crawl_dirty = true;
}
void
BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
{
unique_lock<mutex> lock(m_mutex);
// Do not delete the last entry, it holds our max_transid
if (m_root_crawl_map.size() < 2) {
BEESCOUNT(crawl_no_empty);
return;
}
if (m_root_crawl_map.count(bcs.m_root)) {
m_root_crawl_map.erase(bcs.m_root);
m_crawl_dirty = true;
}
}
uint64_t
BeesRoots::transid_min()
{
BEESNOTE("Calculating transid_min");
unique_lock<mutex> lock(m_mutex);
if (m_root_crawl_map.empty()) {
return 0;
}
uint64_t rv = numeric_limits<uint64_t>::max();
for (auto i : m_root_crawl_map) {
rv = min(rv, i.second->get_state().m_min_transid);
}
return rv;
}
uint64_t
BeesRoots::transid_max()
{
BEESNOTE("Calculating transid_max");
uint64_t rv = 0;
uint64_t root = 0;
BEESTRACE("Calculating transid_max...");
do {
root = next_root(root);
if (root) {
catch_all([&]() {
auto transid = btrfs_get_root_transid(open_root(root));
rv = max(rv, transid);
// BEESLOG("\troot " << root << " transid " << transid << " max " << rv);
});
}
} while (root);
return rv;
}
void
BeesRoots::crawl_roots()
{
BEESNOTE("Crawling roots");
unique_lock<mutex> lock(m_mutex);
if (m_root_crawl_map.empty()) {
BEESNOTE("idle, crawl map is empty");
m_condvar.wait(lock);
// Don't count the time we were waiting as part of the crawl time
m_crawl_timer.reset();
}
// Work from a copy because BeesCrawl might change the world under us
auto crawl_map_copy = m_root_crawl_map;
lock.unlock();
BeesFileRange first_range;
shared_ptr<BeesCrawl> first_crawl;
for (auto i : crawl_map_copy) {
auto this_crawl = i.second;
auto this_range = this_crawl->peek_front();
if (this_range) {
auto tuple_this = make_tuple(this_range.fid().ino(), this_range.fid().root(), this_range.begin());
auto tuple_first = make_tuple(first_range.fid().ino(), first_range.fid().root(), first_range.begin());
if (!first_range || tuple_this < tuple_first) {
first_crawl = this_crawl;
first_range = this_range;
}
}
}
if (first_range) {
catch_all([&]() {
// BEESINFO("scan_forward " << first_range);
m_ctx->scan_forward(first_range);
});
BEESCOUNT(crawl_scan);
m_crawl_current = first_crawl->get_state();
auto first_range_popped = first_crawl->pop_front();
THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped);
return;
}
BEESLOG("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more...");
BEESCOUNT(crawl_done);
BEESNOTE("idle, waiting for more data");
lock.lock();
m_condvar.wait(lock);
// Don't count the time we were waiting as part of the crawl time
m_crawl_timer.reset();
}
void
BeesRoots::crawl_thread()
{
BEESNOTE("crawling");
while (1) {
catch_all([&]() {
crawl_roots();
});
}
}
void
BeesRoots::writeback_thread()
{
while (1) {
BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : ""));
catch_all([&]() {
BEESNOTE("saving crawler state");
state_save();
});
nanosleep(BEES_WRITEBACK_INTERVAL);
}
}
void
BeesRoots::insert_root(const BeesCrawlState &new_bcs)
{
unique_lock<mutex> lock(m_mutex);
if (!m_root_crawl_map.count(new_bcs.m_root)) {
auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
auto new_pair = make_pair(new_bcs.m_root, new_bcp);
m_root_crawl_map.insert(new_pair);
m_crawl_dirty = true;
}
}
void
BeesRoots::insert_new_crawl()
{
BEESNOTE("adding crawlers for new subvols and removing crawlers for removed subvols");
BeesCrawlState new_bcs;
// Avoid a wasted loop iteration by starting from root 5
new_bcs.m_root = BTRFS_FS_TREE_OBJECTID;
new_bcs.m_min_transid = transid_min();
new_bcs.m_max_transid = transid_max();
unique_lock<mutex> lock(m_mutex);
set<uint64_t> excess_roots;
for (auto i : m_root_crawl_map) {
excess_roots.insert(i.first);
}
lock.unlock();
while (new_bcs.m_root) {
excess_roots.erase(new_bcs.m_root);
insert_root(new_bcs);
BEESCOUNT(crawl_create);
new_bcs.m_root = next_root(new_bcs.m_root);
}
for (auto i : excess_roots) {
new_bcs.m_root = i;
crawl_state_erase(new_bcs);
}
// Wake up crawl_roots if sleeping
lock.lock();
m_condvar.notify_all();
}
void
BeesRoots::state_load()
{
BEESNOTE("loading crawl state");
BEESLOG("loading crawl state");
string crawl_data = m_crawl_state_file.read();
for (auto line : split("\n", crawl_data)) {
BEESLOG("Read line: " << line);
map<string, uint64_t> d;
auto words = split(" ", line);
for (auto it = words.begin(); it < words.end(); ++it) {
auto it1 = it;
++it;
THROW_CHECK1(out_of_range, words.size(), it < words.end());
string key = *it1;
uint64_t val = from_hex(*it);
BEESTRACE("key " << key << " val " << val);
auto result = d.insert(make_pair(key, val));
THROW_CHECK0(runtime_error, result.second);
}
BeesCrawlState loaded_state;
loaded_state.m_root = d.at("root");
loaded_state.m_objectid = d.at("objectid");
loaded_state.m_offset = d.at("offset");
loaded_state.m_min_transid = d.count("gen_current") ? d.at("gen_current") : d.at("min_transid");
loaded_state.m_max_transid = d.count("gen_next") ? d.at("gen_next") : d.at("max_transid");
if (d.count("started")) {
loaded_state.m_started = d.at("started");
}
BEESLOG("loaded_state " << loaded_state);
insert_root(loaded_state);
}
}
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
m_ctx(ctx),
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
m_crawl_thread("crawl " + ctx->root_path()),
m_writeback_thread("crawl_writeback " + ctx->root_path())
{
m_crawl_thread.exec([&]() {
catch_all([&]() {
state_load();
});
m_writeback_thread.exec([&]() {
writeback_thread();
});
crawl_thread();
});
}
Fd
BeesRoots::open_root_nocache(uint64_t rootid)
{
BEESTRACE("open_root_nocache " << rootid);
BEESNOTE("open_root_nocache " << rootid);
// Stop recursion at the root of the filesystem tree
if (rootid == BTRFS_FS_TREE_OBJECTID) {
return m_ctx->root_fd();
}
// Find backrefs for this rootid and follow up to root
BtrfsIoctlSearchKey sk;
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
sk.min_objectid = sk.max_objectid = rootid;
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
BEESTRACE("sk " << sk);
while (sk.min_objectid <= rootid) {
sk.nr_items = 1024;
sk.do_ioctl(m_ctx->root_fd());
if (sk.m_result.empty()) {
break;
}
for (auto i : sk.m_result) {
sk.next_min(i);
if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
auto dirid = call_btrfs_get(btrfs_stack_root_ref_dirid, i.m_data);
auto name_len = call_btrfs_get(btrfs_stack_root_ref_name_len, i.m_data);
auto name_start = sizeof(struct btrfs_root_ref);
auto name_end = name_len + name_start;
THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
auto parent_rootid = i.offset;
// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
Fd parent_fd = open_root(parent_rootid);
if (!parent_fd) {
BEESLOGTRACE("no parent_fd");
continue;
}
if (dirid != BTRFS_FIRST_FREE_OBJECTID) {
BEESTRACE("dirid " << dirid << " root " << rootid << " INO_PATH");
BtrfsIoctlInoPathArgs ino(dirid);
if (!ino.do_ioctl_nothrow(parent_fd)) {
BEESINFO("dirid " << dirid << " inode path lookup failed in parent_fd " << name_fd(parent_fd));
continue;
}
if (ino.m_paths.empty()) {
BEESINFO("dirid " << dirid << " inode has no paths in parent_fd " << name_fd(parent_fd));
continue;
}
BEESTRACE("dirid " << dirid << " path " << ino.m_paths.at(0));
parent_fd = openat(parent_fd, ino.m_paths.at(0).c_str(), FLAGS_OPEN_DIR);
if (!parent_fd) {
BEESLOGTRACE("no parent_fd from dirid");
continue;
}
}
// BEESLOG("openat(" << name_fd(parent_fd) << ", " << name << ")");
BEESTRACE("openat(" << name_fd(parent_fd) << ", " << name << ")");
Fd rv = openat(parent_fd, name.c_str(), FLAGS_OPEN_DIR);
if (!rv) {
BEESLOGTRACE("open failed for name " << name);
continue;
}
BEESCOUNT(root_found);
// Verify correct root ID
auto new_root_id = btrfs_get_root_id(rv);
THROW_CHECK2(runtime_error, new_root_id, rootid, new_root_id == rootid);
Stat st(rv);
THROW_CHECK1(runtime_error, st.st_ino, st.st_ino == BTRFS_FIRST_FREE_OBJECTID);
BEESINFO("open_root_nocache " << rootid << ": " << name_fd(rv));
return rv;
}
}
}
BEESINFO("No path for rootid " << rootid);
BEESCOUNT(root_notfound);
return Fd();
}
Fd
BeesRoots::open_root(uint64_t rootid)
{
// Ignore some of the crap that comes out of LOGICAL_INO
if (rootid == BTRFS_ROOT_TREE_OBJECTID) {
return Fd();
}
return m_ctx->fd_cache()->open_root(m_ctx, rootid);
}
uint64_t
BeesRoots::next_root(uint64_t root)
{
BEESNOTE("Next root from " << root);
BEESTRACE("Next root from " << root);
// BTRFS_FS_TREE_OBJECTID has no backref keys so we can't find it that way
if (root < BTRFS_FS_TREE_OBJECTID) {
// BEESLOG("First root is BTRFS_FS_TREE_OBJECTID = " << BTRFS_FS_TREE_OBJECTID);
return BTRFS_FS_TREE_OBJECTID;
}
BtrfsIoctlSearchKey sk;
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
sk.min_objectid = root + 1;
while (true) {
sk.nr_items = 1024;
sk.do_ioctl(m_ctx->root_fd());
if (sk.m_result.empty()) {
return 0;
}
for (auto i : sk.m_result) {
sk.next_min(i);
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
// BEESLOG("Found root " << i.objectid << " parent " << i.offset);
return i.objectid;
}
}
}
}
Fd
BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
{
BEESTRACE("opening root " << root << " ino " << ino);
Fd root_fd = open_root(root);
if (!root_fd) {
return root_fd;
}
BEESTOOLONG("open_root_ino(root " << root << ", ino " << ino << ")");
BEESTRACE("looking up ino " << ino);
BtrfsIoctlInoPathArgs ipa(ino);
if (!ipa.do_ioctl_nothrow(root_fd)) {
BEESINFO("Lookup root " << root << " ino " << ino << " failed: " << strerror(errno));
return Fd();
}
BEESTRACE("searching paths for root " << root << " ino " << ino);
Fd rv;
if (ipa.m_paths.empty()) {
BEESLOG("No paths for root " << root << " ino " << ino);
}
for (auto file_path : ipa.m_paths) {
BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
BEESCOUNT(open_file);
// Try to open file RW, fall back to RO
const char *fp_cstr = file_path.c_str();
rv = openat(root_fd, fp_cstr, FLAGS_OPEN_FILE);
if (!rv) {
BEESCOUNT(open_fail);
// errno == ENOENT is common during snapshot delete, ignore it
if (errno != ENOENT) {
BEESLOG("Could not open path '" << file_path << "' at root " << root << " " << name_fd(root_fd) << ": " << strerror(errno));
BEESNOTE("ipa" << ipa);
}
continue;
}
// Correct inode?
Stat file_stat(rv);
if (file_stat.st_ino != ino) {
BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong inode " << file_stat.st_ino << " instead of " << ino);
rv = Fd();
BEESCOUNT(open_wrong_ino);
break;
}
// Correct root?
auto file_root = btrfs_get_root_id(rv);
if (file_root != root) {
BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong root " << file_root << " instead of " << root);
rv = Fd();
BEESCOUNT(open_wrong_root);
break;
}
// Same filesystem?
Stat root_stat(root_fd);
if (root_stat.st_dev != file_stat.st_dev) {
BEESLOG("Opening root " << name_fd(root_fd) << " path " << file_path << " found path st_dev " << file_stat.st_dev << " but root st_dev is " << root_stat.st_dev);
rv = Fd();
BEESCOUNT(open_wrong_dev);
break;
}
BEESTRACE("mapped " << BeesFileId(root, ino));
BEESTRACE("\tto " << name_fd(rv));
BEESCOUNT(open_hit);
return rv;
}
// Odd, we didn't find a path.
return Fd();
}
Fd
BeesRoots::open_root_ino(uint64_t root, uint64_t ino)
{
return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino);
}
BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) :
m_ctx(ctx),
m_state(initial_state)
{
}
bool
BeesCrawl::next_transid()
{
// If this crawl is recently empty, quickly and _silently_ bail out
auto current_time = time(NULL);
auto crawl_state = get_state();
auto elapsed_time = current_time - crawl_state.m_started;
if (elapsed_time < BEES_COMMIT_INTERVAL) {
if (!m_deferred) {
BEESLOG("Deferring next transid in " << get_state());
}
m_deferred = true;
BEESCOUNT(crawl_defer);
return false;
}
// Log performance stats from the old crawl
BEESLOG("Next transid in " << get_state());
// Start new crawl
m_deferred = false;
auto roots = m_ctx->roots();
crawl_state.m_min_transid = crawl_state.m_max_transid;
crawl_state.m_max_transid = roots->transid_max();
crawl_state.m_objectid = 0;
crawl_state.m_offset = 0;
crawl_state.m_started = current_time;
BEESLOG("Restarting crawl " << get_state());
BEESCOUNT(crawl_restart);
set_state(crawl_state);
return true;
}
bool
BeesCrawl::fetch_extents()
{
THROW_CHECK1(runtime_error, m_extents.size(), m_extents.empty());
auto old_state = get_state();
if (m_deferred || old_state.m_max_transid <= old_state.m_min_transid) {
BEESTRACE("Nothing to crawl in " << get_state());
return next_transid();
}
BEESNOTE("crawling " << get_state());
BEESLOG("Crawling " << get_state());
Timer crawl_timer;
BtrfsIoctlSearchKey sk;
sk.tree_id = old_state.m_root;
sk.min_objectid = old_state.m_objectid;
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
sk.min_offset = old_state.m_offset;
sk.min_transid = old_state.m_min_transid;
sk.max_transid = old_state.m_max_transid;
sk.nr_items = BEES_MAX_CRAWL_SIZE;
// Lock in the old state
set_state(old_state);
BEESTRACE("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
bool ioctl_ok = false;
{
BEESNOTE("searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
BEESTOOLONG("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
ioctl_ok = sk.do_ioctl_nothrow(m_ctx->root_fd());
}
if (ioctl_ok) {
BEESCOUNT(crawl_search);
} else {
BEESLOG("Search ioctl failed: " << strerror(errno));
BEESCOUNT(crawl_fail);
}
if (!ioctl_ok || sk.m_result.empty()) {
BEESCOUNT(crawl_empty);
BEESLOG("Crawl empty " << get_state());
return next_transid();
}
BEESLOG("Crawling " << sk.m_result.size() << " results from " << get_state());
auto results_left = sk.m_result.size();
BEESNOTE("crawling " << results_left << " results from " << get_state());
size_t count_other = 0;
size_t count_inline = 0;
size_t count_unknown = 0;
size_t count_data = 0;
size_t count_low = 0;
size_t count_high = 0;
BeesFileRange last_bfr;
for (auto i : sk.m_result) {
sk.next_min(i);
--results_left;
BEESCOUNT(crawl_items);
BEESTRACE("i = " << i);
#if 1
// We need the "+ 1" and objectid rollover that next_min does.
auto new_state = get_state();
new_state.m_objectid = sk.min_objectid;
new_state.m_offset = sk.min_offset;
// Saving state here means we can skip a search result
// if we are interrupted. Not saving state here means we
// can fail to make forward progress in cases where there
// is a lot of metadata we can't process. Favor forward
// progress over losing search results.
set_state(new_state);
#endif
// Ignore things that aren't EXTENT_DATA_KEY
if (i.type != BTRFS_EXTENT_DATA_KEY) {
++count_other;
BEESCOUNT(crawl_nondata);
continue;
}
auto gen = call_btrfs_get(btrfs_stack_file_extent_generation, i.m_data);
if (gen < get_state().m_min_transid) {
BEESCOUNT(crawl_gen_low);
++count_low;
// We probably want (need?) to scan these anyway.
// continue;
}
if (gen > get_state().m_max_transid) {
BEESCOUNT(crawl_gen_high);
++count_high;
// This shouldn't ever happen
// continue;
}
auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data);
switch (type) {
default:
BEESINFO("Unhandled file extent type " << type << " in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
++count_unknown;
BEESCOUNT(crawl_unknown);
break;
case BTRFS_FILE_EXTENT_INLINE:
// Ignore these for now.
// BEESINFO("Ignored file extent type INLINE in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
++count_inline;
// TODO: replace with out-of-line dup extents
BEESCOUNT(crawl_inline);
break;
case BTRFS_FILE_EXTENT_PREALLOC:
BEESCOUNT(crawl_prealloc);
case BTRFS_FILE_EXTENT_REG: {
auto physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data);
auto ram = call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data);
auto len = call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data);
auto offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data);
BEESTRACE("Root " << get_state().m_root << " ino " << i.objectid << " physical " << to_hex(physical)
<< " logical " << to_hex(i.offset) << ".." << to_hex(i.offset + len)
<< " gen " << gen);
++count_data;
if (physical) {
THROW_CHECK1(runtime_error, ram, ram > 0);
THROW_CHECK1(runtime_error, len, len > 0);
THROW_CHECK2(runtime_error, offset, ram, offset < ram);
BeesFileId bfi(get_state().m_root, i.objectid);
if (m_ctx->is_blacklisted(bfi)) {
BEESCOUNT(crawl_blacklisted);
} else {
BeesFileRange bfr(bfi, i.offset, i.offset + len);
// BEESNOTE("pushing bfr " << bfr << " limit " << BEES_MAX_QUEUE_SIZE);
m_extents.insert(bfr);
BEESCOUNT(crawl_push);
}
} else {
BEESCOUNT(crawl_hole);
}
break;
}
}
}
BEESLOG("Crawled inline " << count_inline << " data " << count_data << " other " << count_other << " unknown " << count_unknown << " gen_low " << count_low << " gen_high " << count_high << " " << get_state() << " in " << crawl_timer << "s");
return true;
}
void
BeesCrawl::fetch_extents_harder()
{
BEESNOTE("fetch_extents_harder " << get_state() << " with " << m_extents.size() << " extents");
while (m_extents.empty()) {
bool progress_made = fetch_extents();
if (!progress_made) {
return;
}
}
}
BeesFileRange
BeesCrawl::peek_front()
{
unique_lock<mutex> lock(m_mutex);
fetch_extents_harder();
if (m_extents.empty()) {
return BeesFileRange();
}
return *m_extents.begin();
}
BeesFileRange
BeesCrawl::pop_front()
{
unique_lock<mutex> lock(m_mutex);
fetch_extents_harder();
if (m_extents.empty()) {
return BeesFileRange();
}
auto rv = *m_extents.begin();
m_extents.erase(m_extents.begin());
#if 0
auto state = get_state();
state.m_objectid = rv.fid().ino();
state.m_offset = rv.begin();
set_state(state);
#endif
return rv;
}
BeesCrawlState
BeesCrawl::get_state()
{
unique_lock<mutex> lock(m_state_mutex);
return m_state;
}
void
BeesCrawl::set_state(const BeesCrawlState &bcs)
{
unique_lock<mutex> lock(m_state_mutex);
m_state = bcs;
lock.unlock();
m_ctx->roots()->crawl_state_set_dirty();
}

91
src/bees-thread.cc Normal file
View File

@ -0,0 +1,91 @@
#include "bees.h"
using namespace crucible;
using namespace std;
BeesThread::BeesThread(string name) :
m_name(name)
{
THROW_CHECK1(invalid_argument, name, !name.empty());
}
void
BeesThread::exec(function<void()> func)
{
m_timer.reset();
BEESLOG("BeesThread exec " << m_name);
m_thread_ptr = make_shared<thread>([=]() {
BEESLOG("Starting thread " << m_name);
BeesNote::set_name(m_name);
BEESNOTE("thread function");
Timer thread_time;
catch_all([&]() {
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_name.c_str()));
});
catch_all([&]() {
func();
});
BEESLOG("Exiting thread " << m_name << ", " << thread_time << " sec");
});
}
BeesThread::BeesThread(string name, function<void()> func) :
m_name(name)
{
THROW_CHECK1(invalid_argument, name, !name.empty());
BEESLOG("BeesThread construct " << m_name);
exec(func);
}
void
BeesThread::join()
{
if (!m_thread_ptr) {
BEESLOG("Thread " << m_name << " no thread ptr");
return;
}
BEESLOG("BeesThread::join " << m_name);
if (m_thread_ptr->joinable()) {
BEESLOG("Joining thread " << m_name);
Timer thread_time;
m_thread_ptr->join();
BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
} else if (!m_name.empty()) {
BEESLOG("BeesThread " << m_name << " not joinable");
} else {
BEESLOG("BeesThread else " << m_name);
}
}
void
BeesThread::set_name(const string &name)
{
m_name = name;
}
BeesThread::~BeesThread()
{
if (!m_thread_ptr) {
BEESLOG("Thread " << m_name << " no thread ptr");
return;
}
BEESLOG("BeesThread destructor " << m_name);
if (m_thread_ptr->joinable()) {
BEESLOG("Cancelling thread " << m_name);
int rv = pthread_cancel(m_thread_ptr->native_handle());
if (rv) {
BEESLOG("pthread_cancel returned " << strerror(-rv));
}
BEESLOG("Waiting for thread " << m_name);
Timer thread_time;
m_thread_ptr->join();
BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
} else if (!m_name.empty()) {
BEESLOG("Thread " << m_name << " not joinable");
} else {
BEESLOG("Thread destroy else " << m_name);
}
}

1006
src/bees-types.cc Normal file

File diff suppressed because it is too large Load Diff

599
src/bees.cc Normal file
View File

@ -0,0 +1,599 @@
#include "bees.h"
#include "crucible/interp.h"
#include "crucible/limits.h"
#include "crucible/process.h"
#include "crucible/string.h"
#include <cctype>
#include <cmath>
#include <iostream>
#include <memory>
// PRIx64
#include <inttypes.h>
#include <sched.h>
#include <sys/fanotify.h>
#include <linux/fs.h>
#include <sys/ioctl.h>
using namespace crucible;
using namespace std;
int
do_cmd_help(const ArgList &argv)
{
cerr << "Usage: " << argv[0] << " fs-root-path [fs-root-path-2...]\n"
"Performs best-effort extent-same deduplication on btrfs.\n"
"\n"
"fs-root-path MUST be the root of a btrfs filesystem tree (id 5).\n"
"Other directories will be rejected.\n"
"\n"
"Multiple filesystems can share a single hash table (BEESHOME)\n"
"but this only works well if the content of each filesystem\n"
"is distinct from all the others.\n"
"\n"
"Required environment variables:\n"
"\tBEESHOME\tPath to hash table and configuration files\n"
"\n"
"Optional environment variables:\n"
"\tBEESSTATUS\tFile to write status to (tmpfs recommended, e.g. /run)\n"
"\n"
<< endl;
return 0;
}
// tracing ----------------------------------------
RateLimiter bees_info_rate_limit(BEES_INFO_RATE, BEES_INFO_BURST);
thread_local BeesTracer *BeesTracer::s_next_tracer = nullptr;
BeesTracer::~BeesTracer()
{
if (uncaught_exception()) {
m_func();
if (!m_next_tracer) {
BEESLOG("--- END TRACE --- exception ---");
}
}
s_next_tracer = m_next_tracer;
}
BeesTracer::BeesTracer(function<void()> f) :
m_func(f)
{
m_next_tracer = s_next_tracer;
s_next_tracer = this;
}
void
BeesTracer::trace_now()
{
BeesTracer *tp = s_next_tracer;
BEESLOG("--- BEGIN TRACE ---");
while (tp) {
tp->m_func();
tp = tp->m_next_tracer;
}
BEESLOG("--- END TRACE ---");
}
thread_local BeesNote *BeesNote::s_next = nullptr;
mutex BeesNote::s_mutex;
map<pid_t, BeesNote*> BeesNote::s_status;
thread_local string BeesNote::s_name;
BeesNote::~BeesNote()
{
unique_lock<mutex> lock(s_mutex);
s_next = m_prev;
if (s_next) {
s_status[gettid()] = s_next;
} else {
s_status.erase(gettid());
}
}
BeesNote::BeesNote(function<void(ostream &os)> f) :
m_func(f)
{
unique_lock<mutex> lock(s_mutex);
m_name = s_name;
m_prev = s_next;
s_next = this;
s_status[gettid()] = s_next;
}
void
BeesNote::set_name(const string &name)
{
unique_lock<mutex> lock(s_mutex);
s_name = name;
}
string
BeesNote::get_name()
{
unique_lock<mutex> lock(s_mutex);
if (s_name.empty()) {
return "bees";
} else {
return s_name;
}
}
BeesNote::ThreadStatusMap
BeesNote::get_status()
{
unique_lock<mutex> lock(s_mutex);
ThreadStatusMap rv;
for (auto t : s_status) {
ostringstream oss;
if (!t.second->m_name.empty()) {
oss << t.second->m_name << ": ";
}
if (t.second->m_timer.age() > BEES_TOO_LONG) {
oss << "[" << t.second->m_timer << "s] ";
}
t.second->m_func(oss);
rv[t.first] = oss.str();
}
return rv;
}
// static inline helpers ----------------------------------------
static inline
bool
bees_addr_check(uint64_t v)
{
return !(v & (1ULL << 63));
}
static inline
bool
bees_addr_check(int64_t v)
{
return !(v & (1ULL << 63));
}
string
pretty(double d)
{
static const char * units[] = { "", "K", "M", "G", "T", "P", "E" };
static const char * *units_stop = units + sizeof(units) / sizeof(units[0]) - 1;
const char * *unit = units;
while (d >= 1024 && unit < units_stop) {
d /= 1024;
++unit;
}
ostringstream oss;
oss << (round(d * 1000.0) / 1000.0) << *unit;
return oss.str();
}
// ostream operators ----------------------------------------
template <class T>
ostream &
operator<<(ostream &os, const BeesStatTmpl<T> &bs)
{
unique_lock<mutex> lock(bs.m_mutex);
bool first = true;
string last_tag;
for (auto i : bs.m_stats_map) {
if (i.second == 0) {
continue;
}
string tag = i.first.substr(0, i.first.find_first_of("_"));
if (!last_tag.empty() && tag != last_tag) {
os << "\n\t";
} else if (!first) {
os << " ";
}
last_tag = tag;
first = false;
os << i.first << "=" << i.second;
}
return os;
}
// other ----------------------------------------
template <class T>
T&
BeesStatTmpl<T>::at(string idx)
{
unique_lock<mutex> lock(m_mutex);
if (!m_stats_map.count(idx)) {
m_stats_map[idx] = 0;
}
return m_stats_map[idx];
}
template <class T>
T
BeesStatTmpl<T>::at(string idx) const
{
unique_lock<mutex> lock(m_mutex);
return m_stats_map.at(idx);
}
template <class T>
void
BeesStatTmpl<T>::add_count(string idx, size_t amount)
{
unique_lock<mutex> lock(m_mutex);
if (!m_stats_map.count(idx)) {
m_stats_map[idx] = 0;
}
m_stats_map.at(idx) += amount;
}
template <class T>
BeesStatTmpl<T>::BeesStatTmpl(const BeesStatTmpl &that)
{
if (&that == this) return;
unique_lock<mutex> lock(m_mutex);
unique_lock<mutex> lock2(that.m_mutex);
m_stats_map = that.m_stats_map;
}
template <class T>
BeesStatTmpl<T> &
BeesStatTmpl<T>::operator=(const BeesStatTmpl<T> &that)
{
if (&that == this) return *this;
unique_lock<mutex> lock(m_mutex);
unique_lock<mutex> lock2(that.m_mutex);
m_stats_map = that.m_stats_map;
return *this;
}
BeesStats BeesStats::s_global;
BeesStats
BeesStats::operator-(const BeesStats &that) const
{
if (&that == this) return BeesStats();
unique_lock<mutex> this_lock(m_mutex);
BeesStats this_copy;
this_copy.m_stats_map = m_stats_map;
unique_lock<mutex> that_lock(that.m_mutex);
BeesStats that_copy;
that_copy.m_stats_map = that.m_stats_map;
this_lock.unlock();
that_lock.unlock();
for (auto i : that.m_stats_map) {
if (i.second != 0) {
this_copy.at(i.first) -= i.second;
}
}
return this_copy;
}
BeesRates
BeesStats::operator/(double d) const
{
BeesRates rv;
unique_lock<mutex> lock(m_mutex);
for (auto i : m_stats_map) {
rv.m_stats_map[i.first] = ceil(i.second / d * 1000) / 1000;
}
return rv;
}
BeesStats::operator bool() const
{
unique_lock<mutex> lock(m_mutex);
for (auto i : m_stats_map) {
if (i.second != 0) {
return true;
}
}
return false;
}
BeesTooLong::BeesTooLong(const string &s, double limit) :
m_limit(limit),
m_func([s](ostream &os) { os << s; })
{
}
BeesTooLong::BeesTooLong(const func_type &func, double limit) :
m_limit(limit),
m_func(func)
{
}
void
BeesTooLong::check() const
{
if (age() > m_limit) {
ostringstream oss;
m_func(oss);
BEESLOG("PERFORMANCE: " << *this << " sec: " << oss.str());
}
}
BeesTooLong::~BeesTooLong()
{
check();
}
BeesTooLong &
BeesTooLong::operator=(const func_type &f)
{
m_func = f;
return *this;
}
void
bees_sync(int fd)
{
Timer sync_timer;
BEESNOTE("syncing " << name_fd(fd));
BEESTOOLONG("syncing " << name_fd(fd));
DIE_IF_NON_ZERO(fsync(fd));
BEESCOUNT(sync_count);
BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
}
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
m_dir_fd(dir_fd),
m_name(name),
m_limit(limit)
{
BEESLOG("BeesStringFile " << name_fd(m_dir_fd) << "/" << m_name << " max size " << pretty(m_limit));
}
string
BeesStringFile::read()
{
BEESNOTE("opening " << m_name << " in " << name_fd(m_dir_fd));
Fd fd(openat(m_dir_fd, m_name.c_str(), FLAGS_OPEN_FILE));
if (!fd) {
return string();
}
BEESNOTE("sizing " << m_name << " in " << name_fd(m_dir_fd));
Stat st(fd);
THROW_CHECK1(out_of_range, st.st_size, st.st_size > 0);
THROW_CHECK1(out_of_range, st.st_size, st.st_size < ranged_cast<off_t>(m_limit));
BEESNOTE("reading " << m_name << " in " << name_fd(m_dir_fd));
return read_string(fd, st.st_size);
}
void
BeesStringFile::write(string contents)
{
THROW_CHECK2(out_of_range, contents.size(), m_limit, contents.size() < m_limit);
auto tmpname = m_name + ".tmp";
BEESNOTE("unlinking " << tmpname << " in " << name_fd(m_dir_fd));
unlinkat(m_dir_fd, tmpname.c_str(), 0);
// ignore error
BEESNOTE("closing " << tmpname << " in " << name_fd(m_dir_fd));
{
Fd ofd = openat_or_die(m_dir_fd, tmpname, FLAGS_CREATE_FILE, S_IRUSR | S_IWUSR);
BEESNOTE("writing " << tmpname << " in " << name_fd(m_dir_fd));
write_or_die(ofd, contents);
BEESNOTE("fsyncing " << tmpname << " in " << name_fd(m_dir_fd));
DIE_IF_NON_ZERO(fsync(ofd));
}
BEESNOTE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
BEESTRACE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
renameat_or_die(m_dir_fd, tmpname, m_dir_fd, m_name);
}
void
BeesTempFile::create()
{
// BEESLOG("creating temporary file in " << m_ctx->root_path());
BEESNOTE("creating temporary file in " << m_ctx->root_path());
BEESTOOLONG("creating temporary file in " << m_ctx->root_path());
DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR));
BEESCOUNT(tmp_create);
// Can't reopen this file, so don't allow any resolves there
// Resolves won't work there anyway. There are lots of tempfiles
// and they're short-lived, so this ends up being just a memory leak
// m_ctx->blacklist_add(BeesFileId(m_fd));
m_ctx->insert_root_ino(m_fd);
// Set compression attribute
int flags = 0;
BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_GETFLAGS, &flags));
flags |= FS_COMPR_FL;
BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_SETFLAGS, &flags));
// Always leave first block empty to avoid creating a file with an inline extent
m_end_offset = BLOCK_SIZE_CLONE;
}
void
BeesTempFile::resize(off_t offset)
{
BEESTOOLONG("Resizing temporary file to " << to_hex(offset));
BEESNOTE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
BEESTRACE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
// Ensure that file covers m_end_offset..offset
THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset);
// Truncate
DIE_IF_NON_ZERO(ftruncate(m_fd, offset));
BEESCOUNT(tmp_resize);
// Success
m_end_offset = offset;
}
BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
m_ctx(ctx),
m_end_offset(0)
{
create();
}
void
BeesTempFile::realign()
{
if (m_end_offset > BLOCK_SIZE_MAX_TEMP_FILE) {
BEESLOG("temporary file size " << to_hex(m_end_offset) << " > max " << BLOCK_SIZE_MAX_TEMP_FILE);
BEESCOUNT(tmp_trunc);
return create();
}
if (m_end_offset & BLOCK_MASK_CLONE) {
// BEESTRACE("temporary file size " << to_hex(m_end_offset) << " not aligned");
BEESCOUNT(tmp_realign);
return create();
}
// OK as is
BEESCOUNT(tmp_aligned);
}
BeesFileRange
BeesTempFile::make_hole(off_t count)
{
THROW_CHECK1(invalid_argument, count, count > 0);
realign();
BEESTRACE("make hole at " << m_end_offset);
auto end = m_end_offset + count;
BeesFileRange rv(m_fd, m_end_offset, end);
resize(end);
BEESTRACE("created temporary hole " << rv);
BEESCOUNT(tmp_hole);
return rv;
}
BeesFileRange
BeesTempFile::make_copy(const BeesFileRange &src)
{
BEESLOG("copy: " << src);
BEESNOTE("Copying " << src);
BEESTRACE("Copying " << src);
THROW_CHECK1(invalid_argument, src, src.size() > 0);
// FIXME: don't know where these come from, but we can't handle them.
// Grab a trace for the log.
THROW_CHECK1(invalid_argument, src, src.size() < BLOCK_SIZE_MAX_TEMP_FILE);
realign();
auto begin = m_end_offset;
auto end = m_end_offset + src.size();
resize(end);
BeesFileRange rv(m_fd, begin, end);
BEESTRACE("copying to: " << rv);
BEESNOTE("copying " << src << " to " << rv);
auto src_p = src.begin();
auto dst_p = begin;
bool did_block_write = false;
while (dst_p < end) {
auto len = min(BLOCK_SIZE_CLONE, end - dst_p);
BeesBlockData bbd(src.fd(), src_p, len);
// Don't fill in holes
if (bbd.is_data_zero()) {
BEESCOUNT(tmp_block_zero);
} else {
BEESNOTE("copying " << src << " to " << rv << "\n"
"\tpwrite " << bbd << " to " << name_fd(m_fd) << " offset " << to_hex(dst_p) << " len " << len);
pwrite_or_die(m_fd, bbd.data().data(), len, dst_p);
did_block_write = true;
BEESCOUNT(tmp_block);
BEESCOUNTADD(tmp_bytes, len);
}
src_p += len;
dst_p += len;
}
// We seem to get lockups without this!
if (did_block_write) {
bees_sync(m_fd);
}
BEESCOUNT(tmp_copy);
return rv;
}
int
bees_main(ArgList args)
{
set_catch_explainer([&](string s) {
BEESLOG("\n\n*** EXCEPTION ***\n\t" << s << "\n***\n");
BEESCOUNT(exception_caught);
});
BEESNOTE("main");
BeesNote::set_name("main");
list<shared_ptr<BeesContext>> all_contexts;
shared_ptr<BeesContext> bc;
// Subscribe to fanotify events
bool did_subscription = false;
for (string arg : args) {
catch_all([&]() {
bc = make_shared<BeesContext>(bc);
bc->set_root_path(arg);
did_subscription = true;
});
}
if (!did_subscription) {
BEESLOG("WARNING: no filesystems added");
}
BeesThread status_thread("status", [&]() {
bc->dump_status();
});
// Now we just wait forever
bc->show_progress();
// That is all.
return 0;
}
int
main(int argc, const char **argv)
{
if (argc < 2) {
do_cmd_help(argv);
return 2;
}
ArgList args(argv + 1);
int rv = 1;
catch_and_explain([&]() {
rv = bees_main(args);
});
return rv;
}
// instantiate templates for linkage ----------------------------------------
template class BeesStatTmpl<uint64_t>;
template ostream & operator<<(ostream &os, const BeesStatTmpl<uint64_t> &bs);
template class BeesStatTmpl<double>;
template ostream & operator<<(ostream &os, const BeesStatTmpl<double> &bs);

828
src/bees.h Normal file
View File

@ -0,0 +1,828 @@
#ifndef BEES_H
#define BEES_H
#include "crucible/bool.h"
#include "crucible/cache.h"
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/extentwalker.h"
#include "crucible/fd.h"
#include "crucible/fs.h"
#include "crucible/lockset.h"
#include "crucible/time.h"
#include "crucible/timequeue.h"
#include "crucible/workqueue.h"
#include <array>
#include <functional>
#include <list>
#include <mutex>
#include <string>
#include <thread>
#include <endian.h>
using namespace crucible;
using namespace std;
// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
const off_t BLOCK_SIZE_CLONE = 4096;
// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
const off_t BLOCK_SIZE_SUMS = 4096;
// Block size for memory allocations and file mappings (FIXME: should be CPU page size)
const off_t BLOCK_SIZE_MMAP = 4096;
// Maximum length parameter to extent-same ioctl (FIXME: hardcoded in kernel)
const off_t BLOCK_SIZE_MAX_EXTENT_SAME = 4096 * 4096;
// Maximum length of a compressed extent in bytes
const off_t BLOCK_SIZE_MAX_COMPRESSED_EXTENT = 128 * 1024;
// Try to combine smaller extents into larger ones
const off_t BLOCK_SIZE_MIN_EXTENT_DEFRAG = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
// Avoid splitting extents that are already too small
const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
// const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = 1024LL * 1024 * 1024 * 1024;
// Maximum length of any extent in bytes
// except we've seen 1.03G extents...
// ...FIEMAP is slow and full of lies
const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024;
// Masks, so we don't have to write "(BLOCK_SIZE_CLONE - 1)" everywhere
const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1;
const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1;
const off_t BLOCK_MASK_MMAP = BLOCK_SIZE_MMAP - 1;
const off_t BLOCK_MASK_MAX_COMPRESSED_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT * 2 - 1;
// Maximum temporary file size
const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024;
// Bucket size for hash table (size of one hash bucket)
const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP;
// Extent size for hash table (since the nocow file attribute does not seem to be working today)
const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024;
// Bytes per second we want to flush (8GB every two hours)
const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;
// Interval between writing non-hash-table things to disk (15 minutes)
const int BEES_WRITEBACK_INTERVAL = 900;
// Statistics reports while scanning
const int BEES_STATS_INTERVAL = 3600;
// Progress shows instantaneous rates and thread status
const int BEES_PROGRESS_INTERVAL = 3600;
// Status is output every freakin second. Use a ramdisk.
const int BEES_STATUS_INTERVAL = 1;
// Log warnings when an operation takes too long
const double BEES_TOO_LONG = 2.5;
// Avoid any extent where LOGICAL_INO takes this long
const double BEES_TOXIC_DURATION = 9.9;
// How long we should wait for new btrfs transactions
const double BEES_COMMIT_INTERVAL = 900;
// How long between hash table histograms
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = 3600;
// Rate limiting of informational messages
const double BEES_INFO_RATE = 10.0;
const double BEES_INFO_BURST = 1.0;
// After we have this many events queued, wait
const size_t BEES_MAX_QUEUE_SIZE = 1024;
// Read this many items at a time in SEARCHv2
const size_t BEES_MAX_CRAWL_SIZE = 4096;
// If an extent has this many refs, pretend it does not exist
// to avoid a crippling btrfs performance bug
// The actual limit in LOGICAL_INO seems to be 2730, but let's leave a little headroom
const size_t BEES_MAX_EXTENT_REF_COUNT = 2560;
// Flags
const int FLAGS_OPEN_COMMON = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY;
const int FLAGS_OPEN_DIR = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY;
const int FLAGS_OPEN_FILE = FLAGS_OPEN_COMMON | O_RDONLY;
const int FLAGS_OPEN_FILE_RW = FLAGS_OPEN_COMMON | O_RDWR;
const int FLAGS_OPEN_TMPFILE = FLAGS_OPEN_FILE_RW | O_TMPFILE | O_TRUNC | O_EXCL;
const int FLAGS_CREATE_FILE = FLAGS_OPEN_COMMON | O_WRONLY | O_CREAT | O_EXCL;
// Fanotify allows O_APPEND, O_DSYNC, O_NOATIME, O_NONBLOCK, O_CLOEXEC, O_LARGEFILE
const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
// macros ----------------------------------------
#define BEESLOG(x) do { Chatter c(BeesNote::get_name()); c << x; } while (0)
#define BEESLOGTRACE(x) do { BEESLOG(x); BeesTracer::trace_now(); } while (0)
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(x); })
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
#define BEESINFO(x) do { \
if (bees_info_rate_limit.is_ready()) { \
bees_info_rate_limit.borrow(1); \
Chatter c(BeesNote::get_name()); \
c << x; \
} \
} while (0)
#define BEESCOUNT(stat) do { \
BeesStats::s_global.add_count(#stat); \
} while (0)
#define BEESCOUNTADD(stat, amount) do { \
BeesStats::s_global.add_count(#stat, (amount)); \
} while (0)
// ----------------------------------------
template <class T> class BeesStatTmpl;
template <class T> ostream& operator<<(ostream &os, const BeesStatTmpl<T> &bs);
template <class T>
class BeesStatTmpl {
map<string, T> m_stats_map;
mutable mutex m_mutex;
public:
BeesStatTmpl() = default;
BeesStatTmpl(const BeesStatTmpl &that);
BeesStatTmpl &operator=(const BeesStatTmpl &that);
void add_count(string idx, size_t amount = 1);
T& at(string idx);
T at(string idx) const;
friend ostream& operator<< <>(ostream &os, const BeesStatTmpl<T> &bs);
friend class BeesStats;
};
using BeesRates = BeesStatTmpl<double>;
struct BeesStats : public BeesStatTmpl<uint64_t> {
static BeesStats s_global;
BeesStats operator-(const BeesStats &that) const;
BeesRates operator/(double d) const;
explicit operator bool() const;
};
class BeesContext;
class BeesBlockData;
class BeesTracer {
function<void()> m_func;
BeesTracer *m_next_tracer = 0;
thread_local static BeesTracer *s_next_tracer;
public:
BeesTracer(function<void()> f);
~BeesTracer();
static void trace_now();
};
class BeesNote {
function<void(ostream &)> m_func;
BeesNote *m_prev;
Timer m_timer;
string m_name;
static mutex s_mutex;
static map<pid_t, BeesNote*> s_status;
thread_local static BeesNote *s_next;
thread_local static string s_name;
public:
BeesNote(function<void(ostream &)> f);
~BeesNote();
using ThreadStatusMap = map<pid_t, string>;
static ThreadStatusMap get_status();
static void set_name(const string &name);
static string get_name();
};
// C++ threads dumbed down even further
class BeesThread {
string m_name;
Timer m_timer;
shared_ptr<thread> m_thread_ptr;
public:
~BeesThread();
BeesThread(string name);
BeesThread(string name, function<void()> args);
void exec(function<void()> args);
void join();
void set_name(const string &name);
};
class BeesFileId {
uint64_t m_root;
uint64_t m_ino;
public:
uint64_t root() const { return m_root; }
uint64_t ino() const { return m_ino; }
bool operator<(const BeesFileId &that) const;
bool operator!=(const BeesFileId &that) const;
bool operator==(const BeesFileId &that) const;
operator bool() const;
BeesFileId(const BtrfsInodeOffsetRoot &bior);
BeesFileId(int fd);
BeesFileId(uint64_t root, uint64_t ino);
BeesFileId();
};
ostream& operator<<(ostream &os, const BeesFileId &bfi);
class BeesFileRange {
protected:
static mutex s_mutex;
mutable Fd m_fd;
mutable BeesFileId m_fid;
off_t m_begin, m_end;
mutable off_t m_file_size;
public:
BeesFileRange();
BeesFileRange(Fd fd, off_t begin, off_t end);
BeesFileRange(const BeesFileId &fid, off_t begin, off_t end);
BeesFileRange(const BeesBlockData &bbd);
operator BeesBlockData() const;
bool operator<(const BeesFileRange &that) const;
bool operator==(const BeesFileRange &that) const;
bool operator!=(const BeesFileRange &that) const;
bool empty() const;
bool is_same_file(const BeesFileRange &that) const;
bool overlaps(const BeesFileRange &that) const;
// If file ranges overlap, extends this to include that.
// Coalesce with empty bfr = non-empty bfr
bool coalesce(const BeesFileRange &that);
// Remove that from this, creating 0, 1, or 2 new objects
pair<BeesFileRange, BeesFileRange> subtract(const BeesFileRange &that) const;
off_t begin() const { return m_begin; }
off_t end() const { return m_end; }
off_t size() const;
// Lazy accessors
off_t file_size() const;
BeesFileId fid() const;
// Get the fd if there is one
Fd fd() const;
// Get the fd, opening it if necessary
Fd fd(const shared_ptr<BeesContext> &ctx) const;
BeesFileRange copy_closed() const;
// Is it defined?
operator bool() const { return !!m_fd || m_fid; }
// Make range larger
off_t grow_end(off_t delta);
off_t grow_begin(off_t delta);
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
};
class BeesAddress {
public:
using Type = uint64_t;
private:
Type m_addr = ZERO;
bool magic_check(uint64_t flags);
public:
// Blocks with no physical address (not yet allocated, hole, or "other").
// PREALLOC blocks have a physical address so they're not magic enough to be handled here.
// Compressed blocks have a physical address but it's two-dimensional.
enum MagicValue {
ZERO, // BeesAddress uninitialized
DELALLOC, // delayed allocation
HOLE, // no extent present, no space allocated
UNUSABLE, // inline extent or unrecognized FIEMAP flags
LAST, // all further values are non-magic
};
BeesAddress(Type addr = ZERO) : m_addr(addr) {}
BeesAddress(MagicValue addr) : m_addr(addr) {}
BeesAddress& operator=(const BeesAddress &that) = default;
operator Type() const { return m_addr; }
bool operator==(const BeesAddress &that) const;
bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
bool operator!=(const BeesAddress &that) const { return !(*this == that); }
bool operator!=(const MagicValue that) const { return *this != BeesAddress(that); }
bool operator<(const BeesAddress &that) const;
static const Type c_offset_min = 1;
static const Type c_offset_max = BLOCK_SIZE_MAX_COMPRESSED_EXTENT / BLOCK_SIZE_CLONE;
// if this isn't 0x3f we will have problems
static const Type c_offset_mask = (c_offset_max - 1) | (c_offset_max);
static const Type c_compressed_mask = 1 << 11;
static const Type c_eof_mask = 1 << 10;
static const Type c_toxic_mask = 1 << 9;
static const Type c_all_mask = c_compressed_mask | c_eof_mask | c_offset_mask | c_toxic_mask;
bool is_compressed() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask); }
bool has_compressed_offset() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask) && (m_addr & c_offset_mask); }
bool is_toxic() const { return m_addr >= MagicValue::LAST && (m_addr & c_toxic_mask); }
bool is_unaligned_eof() const { return m_addr >= MagicValue::LAST && (m_addr & c_eof_mask); }
bool is_magic() const { return m_addr < MagicValue::LAST; }
Type get_compressed_offset() const;
Type get_physical_or_zero() const;
void set_toxic();
BeesAddress(int fd, off_t offset);
BeesAddress(int fd, off_t offset, shared_ptr<BeesContext> ctx);
BeesAddress(const Extent &e, off_t offset);
};
ostream & operator<<(ostream &os, const BeesAddress &ba);
class BeesStringFile {
Fd m_dir_fd;
string m_name;
size_t m_limit;
public:
BeesStringFile(Fd dir_fd, string name, size_t limit = 1024 * 1024);
string read();
void write(string contents);
};
class BeesHashTable {
shared_ptr<BeesContext> m_ctx;
public:
using HashType = uint64_t;
using AddrType = uint64_t;
struct Cell {
HashType e_hash;
AddrType e_addr;
Cell(const Cell &) = default;
Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
bool operator<(const Cell &e) const { return tie(e_hash, e_addr) < tie(e.e_hash, e.e_addr); }
} __attribute__((packed));
private:
static const uint64_t c_cells_per_bucket = BLOCK_SIZE_HASHTAB_BUCKET / sizeof(Cell);
static const uint64_t c_buckets_per_extent = BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET;
public:
union Bucket {
Cell p_cells[c_cells_per_bucket];
uint8_t p_byte[BLOCK_SIZE_HASHTAB_BUCKET];
} __attribute__((packed));
union Extent {
Bucket p_buckets[BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET];
uint8_t p_byte[BLOCK_SIZE_HASHTAB_EXTENT];
} __attribute__((packed));
BeesHashTable(shared_ptr<BeesContext> ctx, string filename);
~BeesHashTable();
vector<Cell> find_cell(HashType hash);
bool push_random_hash_addr(HashType hash, AddrType addr);
void erase_hash_addr(HashType hash, AddrType addr);
bool push_front_hash_addr(HashType hash, AddrType addr);
void set_shared(bool shared);
private:
string m_filename;
Fd m_fd;
uint64_t m_size;
union {
void *m_void_ptr; // Save some casting
uint8_t *m_byte_ptr; // for pointer arithmetic
Cell *m_cell_ptr; // pointer to one table cell (entry)
Bucket *m_bucket_ptr; // all cells in one LRU unit
Extent *m_extent_ptr; // all buckets in one I/O unit
};
union {
void *m_void_ptr_end;
uint8_t *m_byte_ptr_end;
Cell *m_cell_ptr_end;
Bucket *m_bucket_ptr_end;
Extent *m_extent_ptr_end;
};
uint64_t m_buckets;
uint64_t m_extents;
uint64_t m_cells;
set<uint64_t> m_buckets_dirty;
set<uint64_t> m_buckets_missing;
BeesThread m_writeback_thread;
BeesThread m_prefetch_thread;
RateLimiter m_flush_rate_limit;
RateLimiter m_prefetch_rate_limit;
mutex m_extent_mutex;
mutex m_bucket_mutex;
condition_variable m_condvar;
set<HashType> m_toxic_hashes;
BeesStringFile m_stats_file;
LockSet<uint64_t> m_extent_lock_set;
DefaultBool m_shared;
void writeback_loop();
void prefetch_loop();
void try_mmap_flags(int flags);
pair<Cell *, Cell *> get_cell_range(HashType hash);
pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
void fetch_missing_extent(HashType hash);
void set_extent_dirty(HashType hash);
void flush_dirty_extents();
bool is_toxic_hash(HashType h) const;
bool using_shared_map() const { return false; }
BeesHashTable(const BeesHashTable &) = delete;
BeesHashTable &operator=(const BeesHashTable &) = delete;
};
ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
struct BeesCrawlState {
uint64_t m_root;
uint64_t m_objectid;
uint64_t m_offset;
uint64_t m_min_transid;
uint64_t m_max_transid;
time_t m_started;
BeesCrawlState();
bool operator<(const BeesCrawlState &that) const;
};
class BeesCrawl {
shared_ptr<BeesContext> m_ctx;
mutex m_mutex;
set<BeesFileRange> m_extents;
DefaultBool m_deferred;
mutex m_state_mutex;
BeesCrawlState m_state;
bool fetch_extents();
void fetch_extents_harder();
bool next_transid();
public:
BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
BeesFileRange peek_front();
BeesFileRange pop_front();
BeesCrawlState get_state();
void set_state(const BeesCrawlState &bcs);
};
class BeesRoots {
shared_ptr<BeesContext> m_ctx;
BeesStringFile m_crawl_state_file;
BeesCrawlState m_crawl_current;
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
mutex m_mutex;
condition_variable m_condvar;
DefaultBool m_crawl_dirty;
Timer m_crawl_timer;
BeesThread m_crawl_thread;
BeesThread m_writeback_thread;
void insert_new_crawl();
void insert_root(const BeesCrawlState &bcs);
Fd open_root_nocache(uint64_t root);
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
uint64_t transid_min();
uint64_t transid_max();
void state_load();
void state_save();
void crawl_roots();
string crawl_state_filename() const;
BeesCrawlState crawl_state_get(uint64_t root);
void crawl_state_set_dirty();
void crawl_state_erase(const BeesCrawlState &bcs);
void crawl_thread();
void writeback_thread();
uint64_t next_root(uint64_t root = 0);
void current_state_set(const BeesCrawlState &bcs);
friend class BeesFdCache;
friend class BeesCrawl;
public:
BeesRoots(shared_ptr<BeesContext> ctx);
Fd open_root(uint64_t root);
Fd open_root_ino(uint64_t root, uint64_t ino);
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
};
struct BeesHash {
using Type = uint64_t;
BeesHash() : m_hash(0) { }
BeesHash(Type that) : m_hash(that) { }
operator Type() const { return m_hash; }
BeesHash& operator=(const Type that) { m_hash = that; return *this; }
private:
Type m_hash;
};
ostream & operator<<(ostream &os, const BeesHash &bh);
class BeesBlockData {
using Blob = vector<char>;
mutable Fd m_fd;
off_t m_offset;
off_t m_length;
mutable BeesAddress m_addr;
mutable Blob m_data;
mutable BeesHash m_hash;
mutable DefaultBool m_hash_done;
public:
// Constructor with the immutable fields
BeesBlockData(Fd fd, off_t offset, size_t read_length = BLOCK_SIZE_SUMS);
BeesBlockData();
// Non-lazy accessors
Fd fd() const { return m_fd; }
// Renaming
off_t begin() const { return m_offset; }
off_t end() const { return m_offset + m_length; }
off_t size() const { return m_length; }
bool empty() const { return !m_length; }
// Lazy accessors may modify const things
const Blob &data() const;
BeesHash hash() const;
BeesAddress addr() const;
bool is_data_zero() const;
bool is_data_equal(const BeesBlockData &that) const;
// Setters
BeesBlockData &addr(const BeesAddress &a);
friend ostream &operator<<(ostream &, const BeesBlockData &);
};
class BeesRangePair : public pair<BeesFileRange, BeesFileRange> {
public:
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
BeesRangePair copy_closed() const;
bool operator<(const BeesRangePair &that) const;
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
};
class BeesWorkQueueBase {
string m_name;
protected:
static mutex s_mutex;
static set<BeesWorkQueueBase *> s_all_workers;
public:
virtual ~BeesWorkQueueBase();
BeesWorkQueueBase(const string &name);
string name() const;
void name(const string &new_name);
virtual size_t active_size() const = 0;
virtual list<string> peek_active(size_t count) const = 0;
static void for_each_work_queue(function<void(BeesWorkQueueBase *)> f);
};
template <class Task>
class BeesWorkQueue : public BeesWorkQueueBase {
WorkQueue<Task> m_active_queue;
public:
BeesWorkQueue(const string &name);
~BeesWorkQueue();
void push_active(const Task &task, size_t limit);
void push_active(const Task &task);
size_t active_size() const override;
list<string> peek_active(size_t count) const override;
Task pop();
};
class BeesTempFile {
shared_ptr<BeesContext> m_ctx;
Fd m_fd;
off_t m_end_offset;
void create();
void realign();
void resize(off_t new_end_offset);
public:
BeesTempFile(shared_ptr<BeesContext> ctx);
BeesFileRange make_hole(off_t count);
BeesFileRange make_copy(const BeesFileRange &src);
};
class BeesFdCache {
LRUCache<Fd, shared_ptr<BeesContext>, uint64_t> m_root_cache;
LRUCache<Fd, shared_ptr<BeesContext>, uint64_t, uint64_t> m_file_cache;
Timer m_root_cache_timer;
public:
BeesFdCache();
Fd open_root(shared_ptr<BeesContext> ctx, uint64_t root);
Fd open_root_ino(shared_ptr<BeesContext> ctx, uint64_t root, uint64_t ino);
void insert_root_ino(shared_ptr<BeesContext> ctx, Fd fd);
};
struct BeesResolveAddrResult {
BeesResolveAddrResult();
vector<BtrfsInodeOffsetRoot> m_biors;
DefaultBool m_is_toxic;
bool is_toxic() const { return m_is_toxic; }
};
class BeesContext : public enable_shared_from_this<BeesContext> {
shared_ptr<BeesContext> m_parent_ctx;
Fd m_home_fd;
shared_ptr<BeesFdCache> m_fd_cache;
shared_ptr<BeesHashTable> m_hash_table;
shared_ptr<BeesRoots> m_roots;
map<thread::id, shared_ptr<BeesTempFile>> m_tmpfiles;
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
string m_root_path;
Fd m_root_fd;
string m_root_uuid;
mutable mutex m_blacklist_mutex;
set<BeesFileId> m_blacklist;
string m_uuid;
Timer m_total_timer;
void set_root_fd(Fd fd);
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
void rewrite_file_range(const BeesFileRange &bfr);
public:
BeesContext(shared_ptr<BeesContext> parent_ctx = nullptr);
void set_root_path(string path);
Fd root_fd() const { return m_root_fd; }
Fd home_fd() const { return m_home_fd; }
string root_path() const { return m_root_path; }
string root_uuid() const { return m_root_uuid; }
BeesFileRange scan_forward(const BeesFileRange &bfr);
BeesRangePair dup_extent(const BeesFileRange &src);
bool dedup(const BeesRangePair &brp);
void blacklist_add(const BeesFileId &fid);
bool is_blacklisted(const BeesFileId &fid) const;
BeesResolveAddrResult resolve_addr(BeesAddress addr);
void invalidate_addr(BeesAddress addr);
void dump_status();
void show_progress();
shared_ptr<BeesFdCache> fd_cache();
shared_ptr<BeesHashTable> hash_table();
shared_ptr<BeesRoots> roots();
shared_ptr<BeesTempFile> tmpfile();
const Timer &total_timer() const { return m_total_timer; }
// TODO: move the rest of the FD cache methods here
void insert_root_ino(Fd fd);
};
class BeesResolver {
shared_ptr<BeesContext> m_ctx;
BeesAddress m_addr;
vector<BtrfsInodeOffsetRoot> m_biors;
set<BeesFileRange> m_ranges;
unsigned m_bior_count;
// We found matching data, so we can dedup
DefaultBool m_found_data;
// We found matching data, so we *did* dedup
DefaultBool m_found_dup;
// We found matching hash, so the hash table is still correct
DefaultBool m_found_hash;
// We found matching physical address, so the hash table isn't totally wrong
DefaultBool m_found_addr;
// We found matching physical address, but data did not match
DefaultBool m_wrong_data;
// The whole thing is a placebo to avoid crippling btrfs performance bugs
DefaultBool m_is_toxic;
BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd);
BeesBlockData adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle);
void find_matches(bool just_one, BeesBlockData &bbd);
// FIXME: Do we need these? We probably always have at least one BBD
BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesHash hash);
BeesBlockData adjust_offset(const BeesFileRange &haystack, bool inexact, BeesHash needle);
void find_matches(bool just_one, BeesHash hash);
public:
BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress addr);
BeesAddress addr(BeesAddress new_addr);
// visitor returns true to stop loop, false to continue
bool for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor);
set<BeesFileRange> find_all_matches(BeesBlockData &bbd);
set<BeesFileRange> find_all_matches(BeesHash hash);
// TODO: Replace these with "for_each_extent_ref"
BeesFileRange find_one_match(BeesBlockData &bbd);
BeesFileRange find_one_match(BeesHash hash);
void replace_src(const BeesFileRange &src_bfr);
BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
bool found_addr() const { return m_found_addr; }
bool found_data() const { return m_found_data; }
bool found_dup() const { return m_found_dup; }
bool found_hash() const { return m_found_hash; }
bool is_toxic() const { return m_is_toxic; }
size_t count() const { return m_bior_count; }
BeesAddress addr() const { return m_addr; }
bool operator<(const BeesResolver &that) const;
};
class BeesTooLong : public Timer {
using func_type = function<void(ostream &)>;
double m_limit;
func_type m_func;
public:
BeesTooLong(const func_type &func = [](ostream &os) { os << __PRETTY_FUNCTION__; }, double limit = BEES_TOO_LONG);
BeesTooLong(const string &s, double limit = BEES_TOO_LONG);
BeesTooLong &operator=(const func_type &s);
~BeesTooLong();
void check() const;
};
// And now, a giant pile of extern declarations
string pretty(double d);
extern RateLimiter bees_info_rate_limit;
void bees_sync(int fd);
string format_time(time_t t);
#endif

52
src/fiemap.cc Normal file
View File

@ -0,0 +1,52 @@
#include "crucible/fd.h"
#include "crucible/fs.h"
#include "crucible/error.h"
#include "crucible/string.h"
#include <iostream>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
using namespace crucible;
using namespace std;
int
main(int argc, char **argv)
{
catch_all([&]() {
THROW_CHECK1(invalid_argument, argc, argc > 1);
string filename = argv[1];
cout << "File: " << filename << endl;
Fd fd = open_or_die(filename, O_RDONLY);
Fiemap fm;
fm.m_max_count = 100;
if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
uint64_t stop_at = fm.fm_start + fm.fm_length;
uint64_t last_byte = fm.fm_start;
do {
fm.do_ioctl(fd);
// cerr << fm;
uint64_t last_logical = FIEMAP_MAX_OFFSET;
for (auto &extent : fm.m_extents) {
if (extent.fe_logical > last_byte) {
cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
}
cout << "Log " << to_hex(extent.fe_logical) << ".." << to_hex(extent.fe_logical + extent.fe_length)
<< " Phy " << to_hex(extent.fe_physical) << ".." << to_hex(extent.fe_physical + extent.fe_length)
<< " Flags " << fiemap_extent_flags_ntoa(extent.fe_flags) << endl;
last_logical = extent.fe_logical + extent.fe_length;
last_byte = last_logical;
}
fm.fm_start = last_logical;
} while (fm.fm_start < stop_at);
});
exit(EXIT_SUCCESS);
}

40
src/fiewalk.cc Normal file
View File

@ -0,0 +1,40 @@
#include "crucible/extentwalker.h"
#include "crucible/error.h"
#include "crucible/string.h"
#include <iostream>
#include <fcntl.h>
#include <unistd.h>
using namespace crucible;
using namespace std;
int
main(int argc, char **argv)
{
catch_all([&]() {
THROW_CHECK1(invalid_argument, argc, argc > 1);
string filename = argv[1];
cout << "File: " << filename << endl;
Fd fd = open_or_die(filename, O_RDONLY);
BtrfsExtentWalker ew(fd);
off_t pos = 0;
if (argc > 2) { pos = stoull(argv[2], nullptr, 0); }
ew.seek(pos);
do {
// cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
cout << ew.current() << endl;
} while (ew.next());
#if 0
cout << "\n\n\nAnd now, backwards...\n\n\n" << endl;
do {
cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
} while (ew.prev());
cout << "\n\n\nDone!\n\n\n" << endl;
#endif
});
exit(EXIT_SUCCESS);
}

5
test/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
*
!Makefile
!*.c
!*.cc
!*.h

36
test/Makefile Normal file
View File

@ -0,0 +1,36 @@
PROGRAMS = \
chatter \
crc64 \
execpipe \
fd \
interp \
limits \
path \
process \
all: test
test: $(PROGRAMS)
set -x; for prog in $(PROGRAMS); do ./$$prog || exit 1; done
include ../makeflags
LIBS = -lcrucible
LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib)
depends.mk: *.cc
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done >> depends.mk.new
mv -fv depends.mk.new depends.mk
-include depends.mk
%.o: %.cc %.h ../makeflags
-echo "Implicit rule %.o: %.cc" >&2
$(CXX) $(CXXFLAGS) -o "$@" -c "$<"
%: %.o ../makeflags
-echo "Implicit rule %: %.o" >&2
$(CXX) $(CXXFLAGS) -o "$@" "$<" $(LDFLAGS) $(LIBS)
clean:
-rm -fv *.o

49
test/chatter.cc Normal file
View File

@ -0,0 +1,49 @@
#include "tests.h"
#include "crucible/chatter.h"
#include <ios>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <unistd.h>
using namespace crucible;
static
void
test_chatter_one()
{
cerr << endl;
CHATTER("simple chatter case");
}
static
void
test_chatter_two()
{
cerr << endl;
CHATTER("two lines\nof chatter");
}
static
void
test_chatter_three()
{
cerr << endl;
Chatter c("tct");
c << "More complicated";
c << "\ncase with\n";
c << "some \\ns";
}
int
main(int, char**)
{
RUN_A_TEST(test_chatter_one());
RUN_A_TEST(test_chatter_two());
RUN_A_TEST(test_chatter_three());
exit(EXIT_SUCCESS);
}

39
test/crc64.cc Normal file
View File

@ -0,0 +1,39 @@
#include "tests.h"
#include "crucible/crc64.h"
#include <cassert>
using namespace crucible;
static
void
test_getcrc64_strings()
{
assert(Digest::CRC::crc64("John") == 5942451273432301568);
assert(Digest::CRC::crc64("Paul") == 5838402100630913024);
assert(Digest::CRC::crc64("George") == 6714394476893704192);
assert(Digest::CRC::crc64("Ringo") == 6038837226071130112);
assert(Digest::CRC::crc64("") == 0);
assert(Digest::CRC::crc64("\377\277\300\200") == 15615382887346470912ULL);
}
static
void
test_getcrc64_byte_arrays()
{
assert(Digest::CRC::crc64("John", 4) == 5942451273432301568);
assert(Digest::CRC::crc64("Paul", 4) == 5838402100630913024);
assert(Digest::CRC::crc64("George", 6) == 6714394476893704192);
assert(Digest::CRC::crc64("Ringo", 5) == 6038837226071130112);
assert(Digest::CRC::crc64("", 0) == 0);
assert(Digest::CRC::crc64("\377\277\300\200", 4) == 15615382887346470912ULL);
}
int
main(int, char**)
{
RUN_A_TEST(test_getcrc64_strings());
RUN_A_TEST(test_getcrc64_byte_arrays());
exit(EXIT_SUCCESS);
}

64
test/execpipe.cc Normal file
View File

@ -0,0 +1,64 @@
#include "tests.h"
#include "crucible/execpipe.h"
#include <ios>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <stdexcept>
#include <unistd.h>
using namespace crucible;
using namespace std;
#if 1 // Needs rework
static inline
void
test_hello_world()
{
// alarm(9);
Fd fd = popen([]() { return system("echo Hello, World!"); });
char buf[1024];
size_t rv = -1;
read_partial_or_die(fd, buf, rv);
assert(rv > 0);
string b(buf, buf + rv - 1);
// cerr << "hello_world says: '" << b << "'" << endl;
assert(b == "Hello, World!");
}
static inline
void
test_read_limit(size_t limit = 4096)
{
alarm(9);
Fd fd = popen([]() { return system("yes Hello!"); });
try {
string b = read_all(fd, limit);
} catch (out_of_range &re) {
return;
}
assert(!"no exception thrown by read_all");
}
#endif
namespace crucible {
extern bool assert_no_leaked_fds();
};
int
main(int, char**)
{
#if 1
RUN_A_TEST(test_hello_world());
assert(assert_no_leaked_fds());
RUN_A_TEST(test_read_limit(4095));
RUN_A_TEST(test_read_limit(4096));
RUN_A_TEST(test_read_limit(4097));
assert(assert_no_leaked_fds());
#endif
exit(EXIT_SUCCESS);
}

393
test/fd.cc Normal file
View File

@ -0,0 +1,393 @@
// TEST DATA DO NOT REMOVE THIS LINE
#include "tests.h"
#include "crucible/chatter.h"
#include "crucible/error.h"
#include "crucible/fd.h"
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <ios>
#include <map>
#include <string>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
using namespace crucible;
static
void
test_default_constructor_and_destructor()
{
Fd f;
}
static
void
test_basic_read()
{
Fd f = open_or_die("fd.cc");
const char test_string[] = "// TEST DATA DO NOT REMOVE THIS LINE";
const int test_string_len = sizeof(test_string) - 1;
char read_buf[test_string_len];
read_or_die(f, read_buf);
assert(!strncmp(read_buf, test_string, test_string_len));
f->close();
}
static
void
test_create_read_write()
{
Fd f = open_or_die("tmp/fd-read-write", O_CREAT | O_RDWR | O_TRUNC);
struct test_str_out {
int i;
float f;
} tso = {
.i = 5,
.f = 3.14159,
}, tsi = {
.i = 0,
.f = 0,
};
size_t bytes_read = 0;
read_partial_or_die(f, tsi, bytes_read);
assert(bytes_read == 0);
assert(tsi.i == 0);
assert(tsi.f == 0);
pwrite_or_die(f, tso, 1024);
pread_or_die(f, tsi, 1024);
assert(!memcmp(&tsi, &tso, sizeof(tsi)));
}
static
void
test_flags()
{
#define FLAG_TEST(x) cerr << #x << ": " << flush; cerr << x << endl;
FLAG_TEST(o_flags_ntoa(O_RDONLY));
FLAG_TEST(o_flags_ntoa(O_WRONLY));
FLAG_TEST(o_flags_ntoa(O_RDWR));
FLAG_TEST(o_flags_ntoa(O_CREAT|O_WRONLY|O_TRUNC));
FLAG_TEST(o_mode_ntoa(0001));
FLAG_TEST(o_mode_ntoa(0002));
FLAG_TEST(o_mode_ntoa(0004));
FLAG_TEST(o_mode_ntoa(0010));
FLAG_TEST(o_mode_ntoa(0020));
FLAG_TEST(o_mode_ntoa(0040));
FLAG_TEST(o_mode_ntoa(0100));
FLAG_TEST(o_mode_ntoa(0200));
FLAG_TEST(o_mode_ntoa(0400));
FLAG_TEST(o_mode_ntoa(01000));
FLAG_TEST(o_mode_ntoa(02000));
FLAG_TEST(o_mode_ntoa(04000));
FLAG_TEST(o_mode_ntoa(010000));
FLAG_TEST(o_mode_ntoa(020000));
FLAG_TEST(o_mode_ntoa(040000));
FLAG_TEST(o_mode_ntoa(0777));
FLAG_TEST(o_mode_ntoa(02775));
FLAG_TEST(o_mode_ntoa(01777));
FLAG_TEST(o_mode_ntoa(022));
FLAG_TEST(o_mode_ntoa(077));
}
// Test code
namespace crucible {
extern bool assert_no_leaked_fds();
};
struct FdChecker {
~FdChecker()
{
assert_no_leaked_fds();
}
};
static FdChecker fd_destructor_check;
static inline void assert_is_closed(int i, bool closed = true)
{
pid_t self_pid = getpid();
char buf[1024];
snprintf(buf, sizeof(buf), "/proc/%d/fd/%d", self_pid, i);
assert(access(buf, F_OK) ? closed : !closed);
}
static void test_construct_destroy()
{
int i;
{
Fd fd(open("fd.cc", O_RDONLY));
i = fd;
}
assert_is_closed(i);
}
static void test_construct_copy()
{
int i;
{
Fd fd(open("fd.cc", O_RDONLY));
i = fd;
Fd fd2(fd);
int j = fd2;
assert(i == j);
}
assert_is_closed(i);
}
static void test_construct_default_assign()
{
int i;
{
i = open("fd.cc", O_RDONLY);
Fd fd;
fd = i;
Fd fd2;
fd2 = fd;
int j = fd2;
assert(i == j);
}
assert_is_closed(i);
}
static void test_assign_int()
{
int i;
{
i = open("fd.cc", O_RDONLY);
Fd fd;
fd = i;
Fd fd2;
fd2 = i;
int j = fd2;
assert(i == j);
}
assert_is_closed(i);
}
static void test_assign_int_survives_scope()
{
int i, j;
{
Fd fd2;
{
i = open("fd.cc", O_RDONLY);
Fd fd;
fd = i;
fd2 = i;
j = fd2;
assert(i == j);
}
assert_is_closed(i, false);
}
assert_is_closed(i, true);
}
static void test_assign_int_close()
{
int i;
{
Fd fd(open("fd.cc", O_RDONLY));
i = fd;
assert_is_closed(i, false);
fd = -1;
assert_is_closed(i, true);
int j = fd;
assert(j == -1);
// Bonus conversion operator tests
assert(fd == -1);
// Chasing a closed ref now triggers an exception
assert(catch_all([&]() { return fd->get_fd() == -1; }));
}
assert_is_closed(i, true);
}
static void test_assign_int_close_2()
{
int i;
{
Fd fd(open("fd.cc", O_RDONLY));
i = fd;
assert_is_closed(i, false);
// -2 is null...
fd = -2;
assert_is_closed(i, true);
int j = fd;
// ...but it will come back as -1
assert(j == -1);
// Bonus conversion operator tests
assert(fd == -1);
// Chasing a closed ref now triggers an exception
assert(catch_all([&]() { return fd->get_fd() == -1; }));
}
assert_is_closed(i, true);
}
static void test_map()
{
int a, b, c;
map<string, Fd> fds;
{
Fd fd_dot_cc = open("fd.cc", O_RDONLY);
a = fd_dot_cc;
assert_is_closed(a, false);
Fd fd_tests_h = open("tests.h", O_RDONLY);
b = fd_tests_h;
assert_is_closed(b, false);
Fd fd_makefile = open("Makefile", O_RDONLY);
c = fd_makefile;
assert_is_closed(c, false);
fds["fd.cc"] = fd_dot_cc;
fds.insert(make_pair("tests.h", fd_tests_h));
int j = fds["Makefile"];
assert(j == -1);
fds["Makefile"] = fd_makefile;
assert_is_closed(a, false);
assert_is_closed(b, false);
assert_is_closed(c, false);
}
assert_is_closed(a, false);
assert_is_closed(b, false);
assert_is_closed(c, false);
}
static void test_close_method()
{
Fd fd = open("fd.cc", O_RDONLY);
int i = fd;
assert_is_closed(i, false);
fd->close();
assert_is_closed(i, true);
}
static void test_shared_close_method()
{
Fd fd = open("fd.cc", O_RDONLY);
int i = fd;
Fd fd2 = fd;
assert_is_closed(i, false);
assert_is_closed(fd2, false);
fd->close();
assert_is_closed(i, true);
assert_is_closed(fd2, true);
}
struct DerivedFdResource : public Fd::resource_type {
string m_name;
DerivedFdResource(string name) : m_name(name) {
Fd::resource_type::operator=(open(name.c_str(), O_RDONLY));
assert_is_closed(this->get_fd(), false);
}
const string &name() const { return m_name; }
};
struct DerivedFd : public Fd {
using resource_type = DerivedFdResource;
DerivedFd(string name) {
shared_ptr<DerivedFdResource> ptr = make_shared<DerivedFdResource>(name);
Fd::operator=(static_pointer_cast<Fd::resource_type>(ptr));
}
shared_ptr<DerivedFdResource> operator->() const {
shared_ptr<DerivedFdResource> rv = cast<DerivedFdResource>();
THROW_CHECK1(out_of_range, rv, rv);
return rv;
}
private:
DerivedFd() = default;
};
static void test_derived_resource_type()
{
DerivedFd fd("fd.cc");
assert_is_closed(fd, false);
assert(fd->name() == "fd.cc");
DerivedFd fd3(fd);
assert_is_closed(fd, false);
assert_is_closed(fd3, false);
Fd fd2(fd3);
assert_is_closed(fd, false);
assert_is_closed(fd2, false);
assert_is_closed(fd3, false);
}
static void test_derived_cast()
{
DerivedFd fd("fd.cc");
assert_is_closed(fd, false);
Fd fd2(fd);
Fd fd3 = open("fd.cc", O_RDONLY);
assert(fd->name() == "fd.cc");
assert(fd.cast<Fd::resource_type>());
assert(fd.cast<DerivedFd::resource_type>());
assert(fd2.cast<Fd::resource_type>());
assert(fd2.cast<DerivedFd::resource_type>());
assert(fd3.cast<Fd::resource_type>());
assert(catch_all([&](){ assert(!fd3.cast<DerivedFd::resource_type>()); } ));
}
static void test_derived_map()
{
int a, b, c;
map<string, Fd> fds;
{
DerivedFd fd_dot_cc("fd.cc");
a = fd_dot_cc;
assert_is_closed(a, false);
Fd fd_tests_h = open("tests.h", O_RDONLY);
b = fd_tests_h;
assert_is_closed(b, false);
DerivedFd fd_makefile("Makefile");
c = fd_makefile;
assert_is_closed(c, false);
fds["fd.cc"] = fd_dot_cc;
fds.insert(make_pair("tests.h", fd_tests_h));
int j = fds["Makefile"];
assert(j == -1);
fds["Makefile"] = fd_makefile;
assert_is_closed(a, false);
assert_is_closed(b, false);
assert_is_closed(c, false);
}
assert_is_closed(a, false);
assert_is_closed(b, false);
assert_is_closed(c, false);
}
int main(int, const char **)
{
RUN_A_TEST(test_default_constructor_and_destructor());
RUN_A_TEST(test_basic_read());
RUN_A_TEST(test_create_read_write());
RUN_A_TEST(test_flags());
RUN_A_TEST(test_construct_destroy());
RUN_A_TEST(test_construct_copy());
RUN_A_TEST(test_construct_default_assign());
RUN_A_TEST(test_assign_int());
RUN_A_TEST(test_assign_int_survives_scope());
RUN_A_TEST(test_assign_int_close());
RUN_A_TEST(test_assign_int_close_2());
RUN_A_TEST(test_map());
RUN_A_TEST(test_close_method());
RUN_A_TEST(test_shared_close_method());
RUN_A_TEST(test_derived_resource_type());
RUN_A_TEST(test_derived_map());
RUN_A_TEST(test_derived_cast());
assert_no_leaked_fds();
return 0;
}

88
test/interp.cc Normal file
View File

@ -0,0 +1,88 @@
#include "tests.h"
#include "crucible/interp.h"
using namespace crucible;
using namespace std;
/***********************************************************************
How this should work:
Interpreter reads an arg list:
argv[0] --method0args --method1arg arg1 --method1arg=arg1 -- args...
argv[0] should look up a shared_ptr<Command> which creates an object of
type shared_ptr<Process>. This object is used to receive args by
method calls or one at a time.
<Command> and <Process> can be the same object, or not.
Process p methods:
p->spawn(Interp*) -> Process
p->exec(ArgList) -> Process / Result
p->method (from ArgParser<>)
p->finish() -> void (destroys object without early destruction warnings...?)
p->~Process() -> complains loudly if finish() not called first...?
Result might be a pair of Process, string. Or just string.
ArgParser should be more like GetOpt:
build a dictionary and an arg list from arguments
Process methods should interrogate ArgParser
ArgParser might have a table of boolean and string option names so it can reject invalid options
but if it had that, we could also pass in Process and have it call methods on it
...but that is a _lot_ of pointer-hiding when we could KISS
...but if we had that solved, argparser tables look like lists of method names
ArgParser<T> has a table of names and methods on object of type T
ArgParser hides everything behind void* and hands off to a compiled implementation to do callbacks
Extreme simplification: arguments are themselves executable
so '--method_foo arg' really means construct MethodFoo(arg) and cast to shared_ptr<ProcArg>
then Process->invokeSomething(ProcArg)
too extreme, use argparser instead
***********************************************************************/
void
test_arg_parser()
{
ArgParser ap;
ArgList al( { "abc", "--def", "ghi" } );
ap.parse(NULL, al);
}
struct Thing {
int m_i;
double m_d;
string m_s;
void set_i(int i) { cerr << "i = " << i << endl; m_i = i; }
void set_d(double d) { cerr << "d = " << d << endl; m_d = d; }
void set_s(string s) { cerr << "s = " << s << endl; m_s = s; }
};
template <typename F, typename T, typename A>
void
assign(T& t, F f, A a)
{
cerr << __PRETTY_FUNCTION__ << " - a = " << a << endl;
(t.*f)(a);
}
int
main(int, char**)
{
RUN_A_TEST(test_arg_parser());
Thing p;
assign(p, &Thing::set_i, 5);
cerr << "p.m_i = " << p.m_i << endl;
exit(EXIT_SUCCESS);
}

325
test/limits.cc Normal file
View File

@ -0,0 +1,325 @@
#include "tests.h"
#include "crucible/error.h"
#include "crucible/limits.h"
#include <cassert>
using namespace crucible;
// Like catch_all but don't bother printing anything
static
int
silent_catch_all(const function<void()> &f)
{
try {
f();
return 0;
} catch (const exception &) {
return 1;
} catch (...) {
return -1;
}
}
#define SHOULD_FAIL(expr) assert(1 == silent_catch_all([&]() { (expr); }))
#define SHOULD_PASS(expr, result) assert(0 == silent_catch_all([&]() { assert((result) == (expr)); }))
static
void
test_cast_signed_negative_to_unsigned()
{
off_t tv = -1;
SHOULD_FAIL(ranged_cast<uint64_t>(tv));
SHOULD_FAIL(ranged_cast<uint32_t>(tv));
SHOULD_FAIL(ranged_cast<uint16_t>(tv));
SHOULD_FAIL(ranged_cast<uint8_t>(tv));
SHOULD_FAIL(ranged_cast<unsigned long long>(tv));
SHOULD_FAIL(ranged_cast<unsigned long>(tv));
SHOULD_FAIL(ranged_cast<unsigned int>(tv));
SHOULD_FAIL(ranged_cast<unsigned short>(tv));
SHOULD_FAIL(ranged_cast<unsigned char>(tv));
}
static
void
test_cast_1_to_things()
{
auto tv = 1;
SHOULD_PASS(ranged_cast<off_t>(tv), 1);
SHOULD_PASS(ranged_cast<uint64_t>(tv), 1);
SHOULD_PASS(ranged_cast<uint32_t>(tv), 1);
SHOULD_PASS(ranged_cast<uint16_t>(tv), 1);
SHOULD_PASS(ranged_cast<uint8_t>(tv), 1);
SHOULD_PASS(ranged_cast<int64_t>(tv), 1);
SHOULD_PASS(ranged_cast<int32_t>(tv), 1);
SHOULD_PASS(ranged_cast<int16_t>(tv), 1);
SHOULD_PASS(ranged_cast<int8_t>(tv), 1);
SHOULD_PASS(ranged_cast<unsigned long long>(tv), 1);
SHOULD_PASS(ranged_cast<unsigned long>(tv), 1);
SHOULD_PASS(ranged_cast<unsigned int>(tv), 1);
SHOULD_PASS(ranged_cast<unsigned short>(tv), 1);
SHOULD_PASS(ranged_cast<unsigned char>(tv), 1);
SHOULD_PASS(ranged_cast<signed long long>(tv), 1);
SHOULD_PASS(ranged_cast<signed long>(tv), 1);
SHOULD_PASS(ranged_cast<signed int>(tv), 1);
SHOULD_PASS(ranged_cast<signed short>(tv), 1);
SHOULD_PASS(ranged_cast<signed char>(tv), 1);
}
static
void
test_cast_128_to_things()
{
auto tv = 128;
SHOULD_PASS(ranged_cast<off_t>(tv), 128);
SHOULD_PASS(ranged_cast<uint64_t>(tv), 128);
SHOULD_PASS(ranged_cast<uint32_t>(tv), 128);
SHOULD_PASS(ranged_cast<uint16_t>(tv), 128);
SHOULD_PASS(ranged_cast<uint8_t>(tv), 128);
SHOULD_PASS(ranged_cast<int64_t>(tv), 128);
SHOULD_PASS(ranged_cast<int32_t>(tv), 128);
SHOULD_PASS(ranged_cast<int16_t>(tv), 128);
SHOULD_FAIL(ranged_cast<int8_t>(tv));
SHOULD_PASS(ranged_cast<unsigned long long>(tv), 128);
SHOULD_PASS(ranged_cast<unsigned long>(tv), 128);
SHOULD_PASS(ranged_cast<unsigned int>(tv), 128);
SHOULD_PASS(ranged_cast<unsigned short>(tv), 128);
SHOULD_PASS(ranged_cast<unsigned char>(tv), 128);
SHOULD_PASS(ranged_cast<signed long long>(tv), 128);
SHOULD_PASS(ranged_cast<signed long>(tv), 128);
SHOULD_PASS(ranged_cast<signed int>(tv), 128);
SHOULD_PASS(ranged_cast<signed short>(tv), 128);
SHOULD_FAIL(ranged_cast<signed char>(tv));
}
static
void
test_cast_256_to_things()
{
auto tv = 256;
SHOULD_PASS(ranged_cast<off_t>(tv), 256);
SHOULD_PASS(ranged_cast<uint64_t>(tv), 256);
SHOULD_PASS(ranged_cast<uint32_t>(tv), 256);
SHOULD_PASS(ranged_cast<uint16_t>(tv), 256);
SHOULD_FAIL(ranged_cast<uint8_t>(tv));
SHOULD_PASS(ranged_cast<int64_t>(tv), 256);
SHOULD_PASS(ranged_cast<int32_t>(tv), 256);
SHOULD_PASS(ranged_cast<int16_t>(tv), 256);
SHOULD_FAIL(ranged_cast<int8_t>(tv));
SHOULD_PASS(ranged_cast<unsigned long long>(tv), 256);
SHOULD_PASS(ranged_cast<unsigned long>(tv), 256);
SHOULD_PASS(ranged_cast<unsigned int>(tv), 256);
SHOULD_PASS(ranged_cast<unsigned short>(tv), 256);
SHOULD_FAIL(ranged_cast<unsigned char>(tv));
SHOULD_PASS(ranged_cast<signed long long>(tv), 256);
SHOULD_PASS(ranged_cast<signed long>(tv), 256);
SHOULD_PASS(ranged_cast<signed int>(tv), 256);
SHOULD_PASS(ranged_cast<signed short>(tv), 256);
SHOULD_FAIL(ranged_cast<signed char>(tv));
}
static
void
test_cast_0x80000000_to_things()
{
auto sv = 0x80000000LL;
auto uv = 0x80000000ULL;
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_PASS(ranged_cast<uint32_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
SHOULD_FAIL(ranged_cast<uint8_t>(uv));
SHOULD_PASS(ranged_cast<int64_t>(sv), sv);
SHOULD_FAIL(ranged_cast<int32_t>(sv));
SHOULD_FAIL(ranged_cast<int16_t>(sv));
SHOULD_FAIL(ranged_cast<int8_t>(sv));
SHOULD_PASS(ranged_cast<unsigned long long>(uv), uv);
SHOULD_PASS(ranged_cast<unsigned long>(uv), uv);
SHOULD_PASS(ranged_cast<unsigned int>(uv), uv);
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_PASS(ranged_cast<signed long long>(sv), sv);
SHOULD_PASS(ranged_cast<signed long>(sv), sv);
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
SHOULD_FAIL(ranged_cast<signed int>(sv));
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<signed int>(sv), sv);
} else {
assert(!"unhandled case, please add code here");
}
}
static
void
test_cast_0xffffffff_to_things()
{
auto sv = 0xffffffffLL;
auto uv = 0xffffffffULL;
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_PASS(ranged_cast<uint32_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
SHOULD_FAIL(ranged_cast<uint8_t>(uv));
SHOULD_PASS(ranged_cast<int64_t>(sv), sv);
SHOULD_FAIL(ranged_cast<int32_t>(sv));
SHOULD_FAIL(ranged_cast<int16_t>(sv));
SHOULD_FAIL(ranged_cast<int8_t>(sv));
SHOULD_PASS(ranged_cast<unsigned long long>(uv), uv);
SHOULD_PASS(ranged_cast<unsigned long>(uv), uv);
SHOULD_PASS(ranged_cast<unsigned int>(uv), uv);
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_PASS(ranged_cast<signed long long>(sv), sv);
SHOULD_PASS(ranged_cast<signed long>(sv), sv);
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
SHOULD_FAIL(ranged_cast<signed int>(sv));
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<signed int>(sv), sv);
} else {
assert(!"unhandled case, please add code here");
}
}
static
void
test_cast_0xfffffffff_to_things()
{
auto sv = 0xfffffffffLL;
auto uv = 0xfffffffffULL;
SHOULD_PASS(ranged_cast<off_t>(sv), sv);
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint32_t>(uv));
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
SHOULD_FAIL(ranged_cast<uint8_t>(uv));
SHOULD_PASS(ranged_cast<int64_t>(sv), sv);
SHOULD_FAIL(ranged_cast<int32_t>(sv));
SHOULD_FAIL(ranged_cast<int16_t>(sv));
SHOULD_FAIL(ranged_cast<int8_t>(sv));
SHOULD_PASS(ranged_cast<unsigned long long>(uv), uv);
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_PASS(ranged_cast<signed long long>(sv), sv);
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
SHOULD_FAIL(ranged_cast<signed int>(sv));
SHOULD_FAIL(ranged_cast<unsigned int>(uv));
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<signed int>(sv), sv);
SHOULD_PASS(ranged_cast<unsigned int>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
if (sizeof(long) == 4) {
SHOULD_FAIL(ranged_cast<signed long>(sv));
SHOULD_FAIL(ranged_cast<unsigned long>(uv));
} else if (sizeof(long) == 8) {
SHOULD_PASS(ranged_cast<signed long>(sv), sv);
SHOULD_PASS(ranged_cast<unsigned long>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
}
static
void
test_cast_0x8000000000000000_to_things()
{
auto sv = 0x8000000000000000LL;
auto uv = 0x8000000000000000ULL;
SHOULD_FAIL(ranged_cast<off_t>(sv));
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint32_t>(uv));
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
SHOULD_FAIL(ranged_cast<uint8_t>(uv));
SHOULD_FAIL(ranged_cast<int64_t>(sv));
SHOULD_FAIL(ranged_cast<int32_t>(sv));
SHOULD_FAIL(ranged_cast<int16_t>(sv));
SHOULD_FAIL(ranged_cast<int8_t>(sv));
SHOULD_PASS(ranged_cast<unsigned long long>(uv), uv);
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_FAIL(ranged_cast<signed long long>(sv));
SHOULD_FAIL(ranged_cast<signed long>(sv));
SHOULD_FAIL(ranged_cast<signed int>(sv));
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
SHOULD_FAIL(ranged_cast<unsigned int>(uv));
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<unsigned int>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
if (sizeof(long) == 4) {
SHOULD_FAIL(ranged_cast<unsigned long>(uv));
} else if (sizeof(long) == 8) {
SHOULD_PASS(ranged_cast<unsigned long>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
}
static
void
test_cast_0xffffffffffffffff_to_things()
{
auto sv = 0xffffffffffffffffLL;
auto uv = 0xffffffffffffffffULL;
SHOULD_FAIL(ranged_cast<off_t>(sv));
SHOULD_PASS(ranged_cast<uint64_t>(uv), uv);
SHOULD_FAIL(ranged_cast<uint32_t>(uv));
SHOULD_FAIL(ranged_cast<uint16_t>(uv));
SHOULD_FAIL(ranged_cast<uint8_t>(uv));
SHOULD_FAIL(ranged_cast<int64_t>(sv));
SHOULD_FAIL(ranged_cast<int32_t>(sv));
SHOULD_FAIL(ranged_cast<int16_t>(sv));
SHOULD_FAIL(ranged_cast<int8_t>(sv));
SHOULD_PASS(ranged_cast<unsigned long long>(uv), uv);
SHOULD_FAIL(ranged_cast<unsigned short>(uv));
SHOULD_FAIL(ranged_cast<unsigned char>(uv));
SHOULD_FAIL(ranged_cast<signed long long>(sv));
SHOULD_FAIL(ranged_cast<signed long>(sv));
SHOULD_FAIL(ranged_cast<signed int>(sv));
SHOULD_FAIL(ranged_cast<signed short>(sv));
SHOULD_FAIL(ranged_cast<signed char>(sv));
if (sizeof(int) == 4) {
SHOULD_FAIL(ranged_cast<unsigned int>(uv));
} else if (sizeof(int) == 8) {
SHOULD_PASS(ranged_cast<unsigned int>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
if (sizeof(long) == 4) {
SHOULD_FAIL(ranged_cast<unsigned long>(uv));
} else if (sizeof(long) == 8) {
SHOULD_PASS(ranged_cast<unsigned long>(uv), uv);
} else {
assert(!"unhandled case, please add code here");
}
}
// OK enough with the small values. We want to know if 32-bit machines break.
int
main(int, char**)
{
RUN_A_TEST(test_cast_signed_negative_to_unsigned());
RUN_A_TEST(test_cast_1_to_things());
RUN_A_TEST(test_cast_128_to_things());
RUN_A_TEST(test_cast_256_to_things());
RUN_A_TEST(test_cast_0x80000000_to_things());
RUN_A_TEST(test_cast_0xffffffff_to_things());
RUN_A_TEST(test_cast_0xfffffffff_to_things());
RUN_A_TEST(test_cast_0x8000000000000000_to_things());
RUN_A_TEST(test_cast_0xffffffffffffffff_to_things());
exit(EXIT_SUCCESS);
}

40
test/path.cc Normal file
View File

@ -0,0 +1,40 @@
#include "tests.h"
#include "crucible/path.h"
#include <ios>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <unistd.h>
using namespace crucible;
unsigned failures = 0;
static
void
test_path_basename(string input, string expected)
{
string result = basename(input);
if (expected != result) {
std::cerr << "result was \"" << result << "\"" << std::endl;
++failures;
}
}
int
main(int, char**)
{
RUN_A_TEST(test_path_basename("/foo/bar.c", "bar.c"));
RUN_A_TEST(test_path_basename("/foo/bar/", ""));
RUN_A_TEST(test_path_basename("/foo/", ""));
RUN_A_TEST(test_path_basename("/", ""));
RUN_A_TEST(test_path_basename("foo/bar.c", "bar.c"));
RUN_A_TEST(test_path_basename("bar.c", "bar.c"));
RUN_A_TEST(test_path_basename("", ""));
assert(!failures);
exit(EXIT_SUCCESS);
}

65
test/process.cc Normal file
View File

@ -0,0 +1,65 @@
#include "tests.h"
#include "crucible/process.h"
#include <ios>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <stdexcept>
#include <unistd.h>
using namespace crucible;
using namespace std;
static inline
int
return_value(int val)
{
// cerr << "pid " << getpid() << " returning " << val << endl;
return val;
}
static inline
int
return_value_2(int val, int val2)
{
return val + val2;
}
static inline
void
test_fork_return(int val)
{
Pid child(return_value, val);
assert(child == child->get_id());
assert(child == child->native_handle());
int status = child->join();
int rv_status = WEXITSTATUS(status);
assert(WIFEXITED(status));
assert(rv_status == val);
}
static inline
void
test_fork_return(int val, int val2)
{
Pid child(return_value_2, val, val2);
int status = child->join();
int rv_status = WEXITSTATUS(status);
assert(WIFEXITED(status));
assert(rv_status == val + val2);
}
int
main(int, char**)
{
RUN_A_TEST(test_fork_return(0));
RUN_A_TEST(test_fork_return(1));
RUN_A_TEST(test_fork_return(9));
RUN_A_TEST(test_fork_return(2, 3));
RUN_A_TEST(test_fork_return(7, 9));
exit(EXIT_SUCCESS);
}

14
test/tests.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef CRUCIBLE_TESTS_H
#define CRUCIBLE_TESTS_H
#undef NDEBUG
#include <iostream>
#define RUN_A_TEST(test) do { \
std::cerr << "Testing " << #test << "..." << std::flush; \
do { test ; } while (0); \
std::cerr << "OK" << std::endl; \
} while (0)
#endif // CRUCIBLE_TESTS_H

1
test/tmp/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*