Skip to content

Commit db37743

Browse files
committed
GHashMap using xxHash64 by default
1 parent 9de60c9 commit db37743

3 files changed

Lines changed: 79 additions & 47 deletions

File tree

gclib/GHashMap.hh

Lines changed: 77 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include "khashl.hh"
1010
#include <type_traits>
1111
#include <typeinfo>
12+
13+
#define XXH_INLINE_ALL 1
14+
#include "xxhash.h"
15+
1216
//#include <type_traits> //for std::is_trivial
1317

1418
/*
@@ -41,20 +45,48 @@ template< typename T >
4145
{};
4246
*/
4347

44-
template <typename K> struct GHashKey_Hash { //K generic (class, primitive, pointer except const char* )
48+
template <typename K> struct GHashKey_city32 { //K generic (class, primitive, pointer except const char* )
4549
//template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
4650
uint32_t operator()(const K& s) const { //only works for trivial types!
4751
static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
4852
return CityHash32((const char *) &s, sizeof(K));
4953
}
5054
};
5155

52-
template <> struct GHashKey_Hash<const char*> {
56+
template <> struct GHashKey_city32<const char*> {
5357
inline uint32_t operator()(const char* s) const {
5458
return CityHash32(s, strlen(s));
5559
}
5660
};
5761

62+
template <typename K> struct GHashKey_xxH32 { //K generic (class, primitive, pointer except const char* )
63+
//template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
64+
uint32_t operator()(const K& s) const { //only works for trivial types!
65+
static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
66+
return XXH32((const void *) &s, sizeof(K), 0);
67+
}
68+
};
69+
70+
template <> struct GHashKey_xxH32<const char*> {
71+
inline uint32_t operator()(const char* s) const {
72+
return XXH32(s, strlen(s), 0);
73+
}
74+
};
75+
76+
template <typename K> struct GHashKey_xxH64 { //K generic (class, primitive, pointer except const char* )
77+
//template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
78+
uint64_t operator()(const K& s) const { //only works for trivial types!
79+
static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
80+
return XXH64((const void *) &s, sizeof(K), 0);
81+
}
82+
};
83+
84+
template <> struct GHashKey_xxH64<const char*> {
85+
inline uint32_t operator()(const char* s) const {
86+
return XXH64(s, strlen(s), 0);
87+
}
88+
};
89+
5890
template <typename K> struct GHashKey_Eq { //K is a type having the == operator defined
5991
inline bool operator()(const K& x, const K& y) const {
6092
return (x == y); //requires == operator to be defined for K
@@ -68,23 +100,23 @@ template <> struct GHashKey_Eq<const char*> {
68100
};
69101

70102
//GHashSet is never making a deep copy of the char* key, it only stores the pointer
71-
template <typename K=const char*, class Hash=GHashKey_Hash<K>, class Eq=GHashKey_Eq<K> >
103+
template <typename K=const char*, class Hash=GHashKey_xxH64<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t >
72104
class GHashSet: public std::conditional< is_char_ptr<K>::value,
73-
klib::KHashSetCached< K, Hash, Eq >,
74-
klib::KHashSet< K, Hash, Eq > >::type {
105+
klib::KHashSetCached< K, Hash, Eq, khInt_t >,
106+
klib::KHashSet< K, Hash, Eq, khInt_t > >::type {
75107
protected:
76-
uint32_t i_iter=0;
108+
khInt_t i_iter=0;
77109
public:
78-
inline int Add(const K ky) { // return -1 if the key already exists
110+
inline khInt_t Add(const K ky) { // return -1 if the key already exists
79111
int absent=-1;
80-
uint32_t i=this->put(ky, &absent);
112+
khInt_t i=this->put(ky, &absent);
81113
if (absent==1) //key was actually added
82114
return i;
83115
return -1;
84116
}
85117

86-
int Remove(K ky) { //return index being removed, or -1 if no such key exists
87-
uint32_t i=this->get(ky);
118+
inline khInt_t Remove(K ky) { //return index being removed, or -1 if no such key exists
119+
khInt_t i=this->get(ky);
88120
if (i!=this->end()) {
89121
this->del(i);
90122
return i;
@@ -116,7 +148,7 @@ public:
116148

117149
int Find(K ky) {//return internal slot location if found,
118150
// or -1 if not found
119-
uint32_t r=this->get(ky);
151+
khInt_t r=this->get(ky);
120152
if (r==this->end()) return -1;
121153
return (int)r;
122154
}
@@ -142,12 +174,12 @@ public:
142174

143175
//GStrSet always allocates a copy of each added string;
144176
// if you don't want that (keys are shared), just use GHashSet<const char*> instead
145-
template <class Hash=GHashKey_Hash<const char*>, class Eq=GHashKey_Eq<const char*> >
146-
class GStrSet: public GHashSet<const char*, Hash, Eq> {
177+
template <class Hash=GHashKey_xxH64<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t>
178+
class GStrSet: public GHashSet<const char*, Hash, Eq, khInt_t> {
147179
public:
148180
inline int Add(const char* ky) { // return -1 if the key already exists
149181
int absent=-1;
150-
uint32_t i=this->put(ky, &absent);
182+
khInt_t i=this->put(ky, &absent);
151183
if (absent==1) {//key was actually added
152184
const char* s=Gstrdup(ky);
153185
this->key(i)=s; //store a copy of the key string
@@ -158,7 +190,7 @@ template <class Hash=GHashKey_Hash<const char*>, class Eq=GHashKey_Eq<const char
158190
}
159191

160192
int Remove(const char* ky) { //return index being removed, or -1 if no such key exists
161-
uint32_t i=this->get(ky);
193+
khInt_t i=this->get(ky);
162194
if (i!=this->end()) {
163195
GFREE(this->key(i)); //free string copy
164196
this->del(i);
@@ -168,8 +200,8 @@ template <class Hash=GHashKey_Hash<const char*>, class Eq=GHashKey_Eq<const char
168200
}
169201

170202
inline void Clear() {
171-
int nb=this->n_buckets();
172-
for (int i = 0; i != nb; ++i) {
203+
khInt_t nb=this->n_buckets();
204+
for (khInt_t i = 0; i != nb; ++i) {
173205
if (!this->__kh_used(this->used, i)) continue;
174206
//deallocate string copy
175207
GFREE(this->key(i));
@@ -190,19 +222,19 @@ template <class Hash=GHashKey_Hash<const char*>, class Eq=GHashKey_Eq<const char
190222
};
191223

192224
//generic hash map where keys and values can be of any type
193-
template <class K, class V, class Hash=GHashKey_Hash<K>, class Eq=GHashKey_Eq<K> >
225+
template <class K, class V, class Hash=GHashKey_xxH64<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t>
194226
class GHashMap:public std::conditional< is_char_ptr<K>::value,
195-
klib::KHashMapCached< K, V, Hash, Eq >,
196-
klib::KHashMap< K, V, Hash, Eq > >::type {
227+
klib::KHashMapCached< K, V, Hash, Eq, khInt_t>,
228+
klib::KHashMap< K, V, Hash, Eq, khInt_t> >::type {
197229
protected:
198-
uint32_t i_iter=0;
230+
khInt_t i_iter=0;
199231
bool freeItems=false;
200232
public:
201233
//---- these should be reimplemented for GHash
202234
inline int Add(const K ky, const V val) { // if a key does not exist allocate a copy of the key
203235
// return -1 if the key already exists
204236
int absent=-1;
205-
uint32_t i=this->put(ky, &absent);
237+
khInt_t i=this->put(ky, &absent);
206238
if (absent==1) { //key was actually added
207239
this->value(i)=val; //value is always copied
208240
return i;
@@ -212,7 +244,7 @@ public:
212244
template <typename T=V> inline
213245
typename std::enable_if< std::is_pointer<T>::value, int>::type
214246
Remove(K ky) { //return index being removed
215-
uint32_t i=this->get(ky);
247+
khInt_t i=this->get(ky);
216248
if (i!=this->end()) {
217249
if (freeItems) delete this->value(i);
218250
this->del(i);
@@ -224,7 +256,7 @@ public:
224256
template <typename T=V> inline
225257
typename std::enable_if< !std::is_pointer<T>::value, int>::type
226258
Remove(K ky) { //return index being removed
227-
uint32_t i=this->get(ky);
259+
khInt_t i=this->get(ky);
228260
if (i!=this->end()) {
229261
this->del(i);
230262
return i;
@@ -240,8 +272,8 @@ public:
240272
this->clear(); //does not shrink !
241273
return;
242274
}
243-
int nb=this->n_buckets();
244-
for (int i = 0; i != nb; ++i) {
275+
khInt_t nb=this->n_buckets();
276+
for (khInt_t i = 0; i != nb; ++i) {
245277
if (!this->__kh_used(this->used, i)) continue;
246278
if (freeItems) delete this->value(i);
247279
}
@@ -255,8 +287,8 @@ public:
255287
this->clear(); //does not shrink !
256288
return;
257289
}
258-
int nb=this->n_buckets();
259-
for (int i = 0; i != nb; ++i) {
290+
khInt_t nb=this->n_buckets();
291+
for (khInt_t i = 0; i != nb; ++i) {
260292
if (!this->__kh_used(this->used, i)) continue;
261293
}
262294
this->clear();
@@ -284,15 +316,15 @@ public:
284316
template <typename T=V> inline
285317
typename std::enable_if< std::is_pointer<T>::value, T>::type
286318
Find(const K ky) {
287-
uint32_t r=this->get(ky);
319+
khInt_t r=this->get(ky);
288320
if (r==this->end()) return NULL;
289321
return this->value(r);
290322
}
291323

292324
template <typename T=V> inline
293325
typename std::enable_if< !std::is_pointer<T>::value, T*>::type
294326
Find(const K ky) {
295-
uint32_t r=this->get(ky);
327+
khInt_t r=this->get(ky);
296328
if (r==this->end()) return NULL;
297329
return &(this->value(r));
298330
}
@@ -301,15 +333,15 @@ public:
301333
template <typename T=V> inline
302334
typename std::enable_if< std::is_pointer<T>::value, T>::type
303335
operator[](const K ky) {
304-
uint32_t r=this->get(ky);
336+
khInt_t r=this->get(ky);
305337
if (r==this->end()) return NULL;
306338
return this->value(r);
307339
}
308340

309341
template <typename T=V> inline
310342
typename std::enable_if< !std::is_pointer<T>::value, T*>::type
311343
operator[](const K ky) {
312-
uint32_t r=this->get(ky);
344+
khInt_t r=this->get(ky);
313345
if (r==this->end()) return NULL;
314346
return &(this->value(r));
315347
}
@@ -327,7 +359,7 @@ public:
327359
Next (V& val) {
328360
//returns a pointer to next key entry in the table (NULL if no more)
329361
if (this->count==0) return NULL;
330-
uint32_t nb=this->n_buckets();
362+
khInt_t nb=this->n_buckets();
331363
while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
332364
if (i_iter==nb) return NULL;
333365
val=this->value(i_iter);
@@ -341,7 +373,7 @@ public:
341373
Next (V& val) {
342374
//returns a pointer to next key entry in the table (NULL if no more)
343375
if (this->count==0) return NULL;
344-
uint32_t nb=this->n_buckets();
376+
khInt_t nb=this->n_buckets();
345377
while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
346378
if (i_iter==nb) return NULL;
347379
val=this->value(i_iter);
@@ -355,7 +387,7 @@ public:
355387
NextData () {
356388
//returns a pointer to next key entry in the table (NULL if no more)
357389
if (this->count==0) return NULL;
358-
uint32_t nb=this->n_buckets();
390+
khInt_t nb=this->n_buckets();
359391
while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
360392
if (i_iter==nb) return NULL;
361393
T* val=&(this->value(i_iter));
@@ -368,7 +400,7 @@ public:
368400
NextData () {
369401
//returns a pointer to next key entry in the table (NULL if no more)
370402
if (this->count==0) return NULL;
371-
uint32_t nb=this->n_buckets();
403+
khInt_t nb=this->n_buckets();
372404
while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
373405
if (i_iter==nb) return NULL;
374406
T val=this->value(i_iter);
@@ -382,8 +414,8 @@ public:
382414

383415
};
384416

385-
template <class V, class Hash=GHashKey_Hash<const char*>, class Eq=GHashKey_Eq<const char*> >
386-
class GHash:public GHashMap<const char*, V, Hash, Eq> {
417+
template <class V, class Hash=GHashKey_xxH64<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t >
418+
class GHash:public GHashMap<const char*, V, Hash, Eq, khInt_t> {
387419
protected:
388420

389421
public:
@@ -394,7 +426,7 @@ public:
394426
inline int Add(const char* ky, const V val) { // if a key does not exist allocate a copy of the key
395427
// return -1 if the key already exists
396428
int absent=-1;
397-
uint32_t i=this->put(ky, &absent);
429+
khInt_t i=this->put(ky, &absent);
398430
if (absent==1) { //key was actually added
399431
const char* s=Gstrdup(ky);
400432
this->key(i)=s; //store a copy of the key string
@@ -406,7 +438,7 @@ public:
406438
template <typename T=V> inline
407439
typename std::enable_if< std::is_pointer<T>::value, int>::type
408440
Remove(const char* ky) { //return index being removed
409-
uint32_t i=this->get(ky);
441+
khInt_t i=this->get(ky);
410442
if (i!=this->end()) {
411443
GFREE(this->key(i)); //free string copy
412444
if (this->freeItems) delete this->value(i);
@@ -419,7 +451,7 @@ public:
419451
template <typename T=V> inline
420452
typename std::enable_if< !std::is_pointer<T>::value, int>::type
421453
Remove(const char* ky) { //return index being removed
422-
uint32_t i=this->get(ky);
454+
khInt_t i=this->get(ky);
423455
if (i!=this->end()) {
424456
GFREE(this->key(i)); //free string copy
425457
this->del(i);
@@ -431,8 +463,8 @@ public:
431463
template <typename T=V> inline
432464
typename std::enable_if< std::is_pointer<T>::value, void>::type
433465
Clear() {
434-
int nb=this->n_buckets();
435-
for (int i = 0; i != nb; ++i) {
466+
khInt_t nb=this->n_buckets();
467+
for (khInt_t i = 0; i != nb; ++i) {
436468
if (!this->__kh_used(this->used, i)) continue;
437469
if (this->freeItems) delete this->value(i);
438470
GFREE(this->key(i));
@@ -443,8 +475,8 @@ public:
443475
template <typename T=V> inline
444476
typename std::enable_if< !std::is_pointer<T>::value, void>::type
445477
Clear() {
446-
int nb=this->n_buckets();
447-
for (int i = 0; i != nb; ++i) {
478+
khInt_t nb=this->n_buckets();
479+
for (khInt_t i = 0; i != nb; ++i) {
448480
if (!this->__kh_used(this->used, i)) continue;
449481
GFREE(this->key(i));
450482
}

prep_source.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ gldir=stringtie-$ver/gclib/
2525
cp Makefile LICENSE README.md run_tests.sh stringtie.cpp prepDE.py {rlink,tablemaker,tmerge}.{h,cpp} $pack/
2626
cp -r samtools-0.1.18 $pack/
2727
/bin/rm -rf $pack/samtools-0.1.18/.svn
28-
cp ./gclib/{GVec,GList,GHash,khashl,GHashMap}.hh ./gclib/GBitVec.h $gldir
28+
cp ./gclib/{GVec,GList,khashl,GHashMap}.hh ./gclib/GBitVec.h ./gclib/xxhash.h $gldir
2929
cp ./gclib/{GArgs,GStr,GBam,GBase,gdna,codons,gff,GFaSeqGet,GFastaIndex,proc_mem,GThreads,city}.{h,cpp} $gldir
3030
tar cvfz $pack.tar.gz $pack
3131
ls -l $pack.tar.gz

rlink.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "GHashMap.hh"
1111

1212
template<typename T>
13-
using GIntHash = GHashMap<int, T>;
13+
using GIntHash = GHashMap<int, T, GHashKey_xxH32<int>, GHashKey_Eq<int>, uint32_t>;
1414

1515
#define MAX_NODE 1000000
1616
#define KMER 31

0 commit comments

Comments
 (0)