mirror of
				https://github.com/Zygo/bees.git
				synced 2025-11-03 19:50:34 +01:00 
			
		
		
		
	hash: prepare for user-selectable hash functions
Localize the hash function in bees to a single spot to make it easier to change later (or at runtime). Remove some code that was using a property of CRC as an optimization. The optimization doesn't work for other hash functions, and running the CRC function takes more CPU time than the optimization saved. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
		@@ -1,5 +1,6 @@
 | 
			
		||||
#include "bees.h"
 | 
			
		||||
 | 
			
		||||
#include "crucible/city.h"
 | 
			
		||||
#include "crucible/crc64.h"
 | 
			
		||||
#include "crucible/string.h"
 | 
			
		||||
 | 
			
		||||
@@ -11,6 +12,12 @@
 | 
			
		||||
using namespace crucible;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
BeesHash::BeesHash(const uint8_t *ptr, size_t len) :
 | 
			
		||||
	// m_hash(CityHash64(reinterpret_cast<const char *>(ptr), len))
 | 
			
		||||
	m_hash(Digest::CRC::crc64(ptr, len))
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ostream &
 | 
			
		||||
operator<<(ostream &os, const BeesHash &bh)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
#include "bees.h"
 | 
			
		||||
 | 
			
		||||
#include "crucible/crc64.h"
 | 
			
		||||
#include "crucible/limits.h"
 | 
			
		||||
#include "crucible/ntoa.h"
 | 
			
		||||
#include "crucible/string.h"
 | 
			
		||||
@@ -964,11 +963,7 @@ BeesBlockData::hash() const
 | 
			
		||||
		// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
 | 
			
		||||
		// so we do NOT round up to a full sum block size.
 | 
			
		||||
		const Blob &blob = data();
 | 
			
		||||
		// TODO:  It turns out that file formats with 4K block
 | 
			
		||||
		// alignment and embedded CRC64 do exist, and every block
 | 
			
		||||
		// of such files has the same hash.  Could use a subset
 | 
			
		||||
		// of SHA1 here instead.
 | 
			
		||||
		m_hash = Digest::CRC::crc64(blob.data(), blob.size());
 | 
			
		||||
		m_hash = BeesHash(blob.data(), blob.size());
 | 
			
		||||
		m_hash_done = true;
 | 
			
		||||
		BEESCOUNT(block_hash);
 | 
			
		||||
	}
 | 
			
		||||
@@ -980,9 +975,8 @@ bool
 | 
			
		||||
BeesBlockData::is_data_zero() const
 | 
			
		||||
{
 | 
			
		||||
	// The CRC64 of zero is zero, so skip some work if we already know the CRC
 | 
			
		||||
	if (m_hash_done && m_hash != 0) {
 | 
			
		||||
		return false;
 | 
			
		||||
	}
 | 
			
		||||
	// ...but that doesn't work for any other hash function, and it
 | 
			
		||||
	// saves us next to nothing.
 | 
			
		||||
 | 
			
		||||
	// OK read block (maybe) and check every byte
 | 
			
		||||
	for (auto c : data()) {
 | 
			
		||||
 
 | 
			
		||||
@@ -605,6 +605,7 @@ struct BeesHash {
 | 
			
		||||
	BeesHash(Type that) : m_hash(that) { }
 | 
			
		||||
	operator Type() const { return m_hash; }
 | 
			
		||||
	BeesHash& operator=(const Type that) { m_hash = that; return *this; }
 | 
			
		||||
	BeesHash(const uint8_t *ptr, size_t len);
 | 
			
		||||
private:
 | 
			
		||||
	Type	m_hash;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user