Skip to content

Commit d6d9cf4

Browse files
committed
GHash cleanup
1 parent 42f9ced commit d6d9cf4

9 files changed

Lines changed: 664 additions & 626 deletions

File tree

gclib/GBase.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,11 +637,56 @@ int strhash(const char* str){
637637
g=h&0xF0000000;
638638
if(g) h^=g>>24;
639639
h&=0x0fffffff;
640-
}
640+
}
641641
GASSERT(h<=0x0fffffff);
642642
return h;
643643
}
644644

645+
646+
inline uint32_t fmix32 ( uint32_t h ) {
647+
h ^= h >> 16;
648+
h *= 0x85ebca6b;
649+
h ^= h >> 13;
650+
h *= 0xc2b2ae35;
651+
h ^= h >> 16;
652+
return h;
653+
}
654+
655+
int murmur3(const char *key) {
656+
int len=strlen(key);
657+
const uint8_t * data = (const uint8_t*)key;
658+
const int nblocks = len / 4;
659+
660+
uint32_t h1 = 5381; //seed;
661+
662+
const uint32_t c1 = 0xcc9e2d51;
663+
const uint32_t c2 = 0x1b873593;
664+
665+
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
666+
for(int i = -nblocks; i; i++) {
667+
uint32_t k1 = blocks[i];
668+
k1 *= c1;
669+
k1 = (k1 << 15) | (k1 >> (32 - 15));
670+
k1 *= c2;
671+
h1 ^= k1;
672+
h1 = (k1 << 13) | (k1 >> (32 - 13));
673+
h1 = h1*5+0xe6546b64;
674+
}
675+
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
676+
677+
uint32_t k1 = 0;
678+
switch(len & 3) {
679+
case 3: k1 ^= tail[2] << 16;
680+
case 2: k1 ^= tail[1] << 8;
681+
case 1: k1 ^= tail[0];
682+
k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1;
683+
};
684+
685+
h1 ^= len;
686+
return (fmix32(h1) & 0x7FFFFFFF);
687+
}
688+
689+
645690
int djb_hash(const char* cp)
646691
{
647692
int h = 5381;

gclib/GBase.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef G_BASE_DEFINED
22
#define G_BASE_DEFINED
3-
#define GCLIB_VERSION "0.11.4"
3+
#define GCLIB_VERSION "0.11.7"
44

55
#ifdef HAVE_CONFIG_H
66
#include "config.h"
@@ -289,6 +289,7 @@ int strhash(const char* str);
289289
//alternate hash functions:
290290
int fnv1a_hash(const char* cp);
291291
int djb_hash(const char* cp);
292+
int murmur3(const char *key);
292293

293294
//---- generic base GSeg : genomic segment (interval) --
294295
// coordinates are considered 1-based (so 0 is invalid)

gclib/GBitVec.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ class GBitVec {
153153

154154

155155
void bitSizeError() {
156-
GError("Error at GBitVec: unsupported BitWord size (%d)!\n",
156+
GError("Error at GBitVec: unsupported BitWord size (%d)!\n",
157157
sizeof(BitWord));
158158
}
159159
/// count - Returns the number of bits which are set.
@@ -277,6 +277,9 @@ class GBitVec {
277277
}
278278

279279
GBitVec &set(uint Idx) {
280+
#ifndef NDEBUG
281+
indexCheck(Idx, Size);
282+
#endif
280283
fBits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
281284
return *this;
282285
}
@@ -287,6 +290,9 @@ class GBitVec {
287290
}
288291

289292
GBitVec &reset(uint Idx) {
293+
#ifndef NDEBUG
294+
indexCheck(Idx, Size);
295+
#endif
290296
fBits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
291297
return *this;
292298
}
@@ -299,6 +305,9 @@ class GBitVec {
299305
}
300306

301307
GBitVec &flip(uint Idx) {
308+
#ifndef NDEBUG
309+
indexCheck(Idx, Size);
310+
#endif
302311
fBits[Idx / BITWORD_SIZE] ^= 1L << (Idx % BITWORD_SIZE);
303312
return *this;
304313
}
@@ -310,7 +319,7 @@ class GBitVec {
310319

311320
inline static void indexCheck(uint vIdx, uint vSize) {
312321
if (vIdx >= vSize)
313-
GError("Error at GBitVec: index %d out of bounds (size %d)\n",
322+
GError("Error at GBitVec: index %d out of bounds (size %d)\n",
314323
(int)vIdx, vSize);
315324
}
316325

@@ -439,7 +448,7 @@ class GBitVec {
439448

440449
// Then set any stray high bits of the last used word.
441450
uint ExtraBits = Size % BITWORD_SIZE;
442-
451+
443452
if (ExtraBits) {
444453
BitWord ExtraBitMask = ~0UL << ExtraBits;
445454
if (value)

0 commit comments

Comments
 (0)