mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
hash: prepare for user-selectable hash functions
Localize the hash function in bees to a single spot to make it easier to change later (or at runtime). Remove some code that was using a property of CRC as an optimization. The optimization doesn't work for other hash functions, and running the CRC function takes more CPU time than the optimization saved. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
b3a8fcb553
commit
7117cb40c5
@ -1,5 +1,6 @@
|
|||||||
#include "bees.h"
|
#include "bees.h"
|
||||||
|
|
||||||
|
#include "crucible/city.h"
|
||||||
#include "crucible/crc64.h"
|
#include "crucible/crc64.h"
|
||||||
#include "crucible/string.h"
|
#include "crucible/string.h"
|
||||||
|
|
||||||
@ -11,6 +12,12 @@
|
|||||||
using namespace crucible;
|
using namespace crucible;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
BeesHash::BeesHash(const uint8_t *ptr, size_t len) :
|
||||||
|
// m_hash(CityHash64(reinterpret_cast<const char *>(ptr), len))
|
||||||
|
m_hash(Digest::CRC::crc64(ptr, len))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
ostream &
|
ostream &
|
||||||
operator<<(ostream &os, const BeesHash &bh)
|
operator<<(ostream &os, const BeesHash &bh)
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include "bees.h"
|
#include "bees.h"
|
||||||
|
|
||||||
#include "crucible/crc64.h"
|
|
||||||
#include "crucible/limits.h"
|
#include "crucible/limits.h"
|
||||||
#include "crucible/ntoa.h"
|
#include "crucible/ntoa.h"
|
||||||
#include "crucible/string.h"
|
#include "crucible/string.h"
|
||||||
@ -964,11 +963,7 @@ BeesBlockData::hash() const
|
|||||||
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
|
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
|
||||||
// so we do NOT round up to a full sum block size.
|
// so we do NOT round up to a full sum block size.
|
||||||
const Blob &blob = data();
|
const Blob &blob = data();
|
||||||
// TODO: It turns out that file formats with 4K block
|
m_hash = BeesHash(blob.data(), blob.size());
|
||||||
// alignment and embedded CRC64 do exist, and every block
|
|
||||||
// of such files has the same hash. Could use a subset
|
|
||||||
// of SHA1 here instead.
|
|
||||||
m_hash = Digest::CRC::crc64(blob.data(), blob.size());
|
|
||||||
m_hash_done = true;
|
m_hash_done = true;
|
||||||
BEESCOUNT(block_hash);
|
BEESCOUNT(block_hash);
|
||||||
}
|
}
|
||||||
@ -980,9 +975,8 @@ bool
|
|||||||
BeesBlockData::is_data_zero() const
|
BeesBlockData::is_data_zero() const
|
||||||
{
|
{
|
||||||
// The CRC64 of zero is zero, so skip some work if we already know the CRC
|
// The CRC64 of zero is zero, so skip some work if we already know the CRC
|
||||||
if (m_hash_done && m_hash != 0) {
|
// ...but that doesn't work for any other hash function, and it
|
||||||
return false;
|
// saves us next to nothing.
|
||||||
}
|
|
||||||
|
|
||||||
// OK read block (maybe) and check every byte
|
// OK read block (maybe) and check every byte
|
||||||
for (auto c : data()) {
|
for (auto c : data()) {
|
||||||
|
@ -605,6 +605,7 @@ struct BeesHash {
|
|||||||
BeesHash(Type that) : m_hash(that) { }
|
BeesHash(Type that) : m_hash(that) { }
|
||||||
operator Type() const { return m_hash; }
|
operator Type() const { return m_hash; }
|
||||||
BeesHash& operator=(const Type that) { m_hash = that; return *this; }
|
BeesHash& operator=(const Type that) { m_hash = that; return *this; }
|
||||||
|
BeesHash(const uint8_t *ptr, size_t len);
|
||||||
private:
|
private:
|
||||||
Type m_hash;
|
Type m_hash;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user