Post Scarcity
A prototype for a post scarcity programming environment
Loading...
Searching...
No Matches
lookup3.c
Go to the documentation of this file.
1/*
2-------------------------------------------------------------------------------
3lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4
5These are functions for producing 32-bit hashes for hash table lookup.
6hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7are externally useful functions. Routines to test the hash are included
8if SELF_TEST is defined. You can use this free for any purpose. It's in
9the public domain. It has no warranty.
10
11You probably want to use hashlittle(). hashlittle() and hashbig()
12hash byte arrays. hashlittle() is is faster than hashbig() on
13little-endian machines. Intel and AMD are little-endian machines.
14On second thought, you probably want hashlittle2(), which is identical to
15hashlittle() except it returns two 32-bit hashes for the price of one.
16You could implement hashbig2() if you wanted but I haven't bothered here.
17
18If you want to find a hash of, say, exactly 7 integers, do
19 a = i1; b = i2; c = i3;
20 mix(a,b,c);
21 a += i4; b += i5; c += i6;
22 mix(a,b,c);
23 a += i7;
24 final(a,b,c);
25then use c as the hash value. If you have a variable length array of
264-byte integers to hash, use hashword(). If you have a byte array (like
27a character string), use hashlittle(). If you have several byte arrays, or
28a mix of things, see the comments above hashlittle().
29
30Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31then mix those integers. This is fast (you can do a lot more thorough
32mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34-------------------------------------------------------------------------------
35*/
36// #define SELF_TEST 1
37
38#include <stdio.h> /* defines printf for tests */
39#include <time.h> /* defines time_t for timings in the test */
40#include <stdint.h> /* defines uint32_t etc */
41#include <sys/param.h> /* attempt to define endianness */
42#ifdef linux
43#include <endian.h> /* attempt to define endianness */
44#endif
45
46/*
47 * My best guess at if you are big-endian or little-endian. This may
48 * need adjustment.
49 */
50#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
51 __BYTE_ORDER == __LITTLE_ENDIAN) || \
52 (defined(i386) || defined(__i386__) || defined(__i486__) || \
53 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
54#define HASH_LITTLE_ENDIAN 1
55#define HASH_BIG_ENDIAN 0
56#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
57 __BYTE_ORDER == __BIG_ENDIAN) || \
58 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
59#define HASH_LITTLE_ENDIAN 0
60#define HASH_BIG_ENDIAN 1
61#else
62#define HASH_LITTLE_ENDIAN 0
63#define HASH_BIG_ENDIAN 0
64#endif
65
66#define hashsize(n) ((uint32_t)1<<(n))
67#define hashmask(n) (hashsize(n)-1)
68#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
69
70/*
71-------------------------------------------------------------------------------
72mix -- mix 3 32-bit values reversibly.
73
74This is reversible, so any information in (a,b,c) before mix() is
75still in (a,b,c) after mix().
76
77If four pairs of (a,b,c) inputs are run through mix(), or through
78mix() in reverse, there are at least 32 bits of the output that
79are sometimes the same for one pair and different for another pair.
80This was tested for:
81* pairs that differed by one bit, by two bits, in any combination
82 of top bits of (a,b,c), or in any combination of bottom bits of
83 (a,b,c).
84* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
85 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
86 is commonly produced by subtraction) look like a single 1-bit
87 difference.
88* the base values were pseudorandom, all zero but one bit set, or
89 all zero plus a counter that starts at zero.
90
91Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
92satisfy this are
93 4 6 8 16 19 4
94 9 15 3 18 27 15
95 14 9 3 7 17 3
96Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
97for "differ" defined as + with a one-bit base and a two-bit delta. I
98used http://burtleburtle.net/bob/hash/avalanche.html to choose
99the operations, constants, and arrangements of the variables.
100
101This does not achieve avalanche. There are input bits of (a,b,c)
102that fail to affect some output bits of (a,b,c), especially of a. The
103most thoroughly mixed value is c, but it doesn't really even achieve
104avalanche in c.
105
106This allows some parallelism. Read-after-writes are good at doubling
107the number of bits affected, so the goal of mixing pulls in the opposite
108direction as the goal of parallelism. I did what I could. Rotates
109seem to cost as much as shifts on every machine I could lay my hands
110on, and rotates are much kinder to the top and bottom bits, so I used
111rotates.
112-------------------------------------------------------------------------------
113*/
114#define mix(a,b,c) \
115{ \
116 a -= c; a ^= rot(c, 4); c += b; \
117 b -= a; b ^= rot(a, 6); a += c; \
118 c -= b; c ^= rot(b, 8); b += a; \
119 a -= c; a ^= rot(c,16); c += b; \
120 b -= a; b ^= rot(a,19); a += c; \
121 c -= b; c ^= rot(b, 4); b += a; \
122}
123
124/*
125-------------------------------------------------------------------------------
126final -- final mixing of 3 32-bit values (a,b,c) into c
127
128Pairs of (a,b,c) values differing in only a few bits will usually
129produce values of c that look totally different. This was tested for
130* pairs that differed by one bit, by two bits, in any combination
131 of top bits of (a,b,c), or in any combination of bottom bits of
132 (a,b,c).
133* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
134 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
135 is commonly produced by subtraction) look like a single 1-bit
136 difference.
137* the base values were pseudorandom, all zero but one bit set, or
138 all zero plus a counter that starts at zero.
139
140These constants passed:
141 14 11 25 16 4 14 24
142 12 14 25 16 4 14 24
143and these came close:
144 4 8 15 26 3 22 24
145 10 8 15 26 3 22 24
146 11 8 15 26 3 22 24
147-------------------------------------------------------------------------------
148*/
149#define final(a,b,c) \
150{ \
151 c ^= b; c -= rot(b,14); \
152 a ^= c; a -= rot(c,11); \
153 b ^= a; b -= rot(a,25); \
154 c ^= b; c -= rot(b,16); \
155 a ^= c; a -= rot(c,4); \
156 b ^= a; b -= rot(a,14); \
157 c ^= b; c -= rot(b,24); \
158}
159
160/*
161--------------------------------------------------------------------
162 This works on all machines. To be useful, it requires
163 -- that the key be an array of uint32_t's, and
164 -- that the length be the number of uint32_t's in the key
165
166 The function hashword() is identical to hashlittle() on little-endian
167 machines, and identical to hashbig() on big-endian machines,
168 except that the length has to be measured in uint32_ts rather than in
169 bytes. hashlittle() is more complicated than hashword() only because
170 hashlittle() has to dance around fitting the key bytes into registers.
171--------------------------------------------------------------------
172*/
173uint32_t hashword( const uint32_t *k, /* the key, an array of uint32_t values */
174 size_t length, /* the length of the key, in uint32_ts */
175 uint32_t initval ) { /* the previous hash, or an arbitrary value */
176 uint32_t a, b, c;
177
178 /* Set up the internal state */
179 a = b = c = 0xdeadbeef + ( ( ( uint32_t ) length ) << 2 ) + initval;
180
181 /*------------------------------------------------- handle most of the key */
182 while ( length > 3 ) {
183 a += k[0];
184 b += k[1];
185 c += k[2];
186 mix( a, b, c );
187 length -= 3;
188 k += 3;
189 }
190
191 /*------------------------------------------- handle the last 3 uint32_t's */
192 switch ( length ) { /* all the case statements fall through */
193 case 3:
194 c += k[2];
195 case 2:
196 b += k[1];
197 case 1:
198 a += k[0];
199 final( a, b, c );
200 case 0: /* case 0: nothing left to add */
201 break;
202 }
203 /*------------------------------------------------------ report the result */
204 return c;
205}
206
207
208/*
209--------------------------------------------------------------------
210hashword2() -- same as hashword(), but take two seeds and return two
21132-bit values. pc and pb must both be nonnull, and *pc and *pb must
212both be initialized with seeds. If you pass in (*pb)==0, the output
213(*pc) will be the same as the return value from hashword().
214--------------------------------------------------------------------
215*/
216void hashword2( const uint32_t *k, /* the key, an array of uint32_t values */
217 size_t length, /* the length of the key, in uint32_ts */
218 uint32_t *pc, /* IN: seed OUT: primary hash value */
219 uint32_t *pb ) { /* IN: more seed OUT: secondary hash value */
220 uint32_t a, b, c;
221
222 /* Set up the internal state */
223 a = b = c = 0xdeadbeef + ( ( uint32_t ) ( length << 2 ) ) + *pc;
224 c += *pb;
225
226 /*------------------------------------------------- handle most of the key */
227 while ( length > 3 ) {
228 a += k[0];
229 b += k[1];
230 c += k[2];
231 mix( a, b, c );
232 length -= 3;
233 k += 3;
234 }
235
236 /*------------------------------------------- handle the last 3 uint32_t's */
237 switch ( length ) { /* all the case statements fall through */
238 case 3:
239 c += k[2];
240 case 2:
241 b += k[1];
242 case 1:
243 a += k[0];
244 final( a, b, c );
245 case 0: /* case 0: nothing left to add */
246 break;
247 }
248 /*------------------------------------------------------ report the result */
249 *pc = c;
250 *pb = b;
251}
252
253
254/*
255-------------------------------------------------------------------------------
256hashlittle() -- hash a variable-length key into a 32-bit value
257 k : the key (the unaligned variable-length array of bytes)
258 length : the length of the key, counting by bytes
259 initval : can be any 4-byte value
260Returns a 32-bit value. Every bit of the key affects every bit of
261the return value. Two keys differing by one or two bits will have
262totally different hash values.
263
264The best hash table sizes are powers of 2. There is no need to do
265mod a prime (mod is sooo slow!). If you need less than 32 bits,
266use a bitmask. For example, if you need only 10 bits, do
267 h = (h & hashmask(10));
268In which case, the hash table should have hashsize(10) elements.
269
270If you are hashing n strings (uint8_t **)k, do it like this:
271 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
272
273By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
274code any way you wish, private, educational, or commercial. It's free.
275
276Use for hash table lookup, or anything where one collision in 2^^32 is
277acceptable. Do NOT use for cryptographic purposes.
278-------------------------------------------------------------------------------
279*/
280
281uint32_t hashlittle( const void *key, size_t length, uint32_t initval ) {
282 uint32_t a, b, c; /* internal state */
283 union {
284 const void *ptr;
285 size_t i;
286 } u; /* needed for Mac Powerbook G4 */
287
288 /* Set up the internal state */
289 a = b = c = 0xdeadbeef + ( ( uint32_t ) length ) + initval;
290
291 u.ptr = key;
292 if ( HASH_LITTLE_ENDIAN && ( ( u.i & 0x3 ) == 0 ) ) {
293 const uint32_t *k = ( const uint32_t * ) key; /* read 32-bit chunks */
294 const uint8_t *k8;
295
296 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
297 while ( length > 12 ) {
298 a += k[0];
299 b += k[1];
300 c += k[2];
301 mix( a, b, c );
302 length -= 12;
303 k += 3;
304 }
305
306 /*----------------------------- handle the last (probably partial) block */
307 /*
308 * "k[2]&0xffffff" actually reads beyond the end of the string, but
309 * then masks off the part it's not allowed to read. Because the
310 * string is aligned, the masked-off tail is in the same word as the
311 * rest of the string. Every machine with memory protection I've seen
312 * does it on word boundaries, so is OK with this. But VALGRIND will
313 * still catch it and complain. The masking trick does make the hash
314 * noticably faster for short strings (like English words).
315 */
316#ifndef VALGRIND
317
318 switch ( length ) {
319 case 12:
320 c += k[2];
321 b += k[1];
322 a += k[0];
323 break;
324 case 11:
325 c += k[2] & 0xffffff;
326 b += k[1];
327 a += k[0];
328 break;
329 case 10:
330 c += k[2] & 0xffff;
331 b += k[1];
332 a += k[0];
333 break;
334 case 9:
335 c += k[2] & 0xff;
336 b += k[1];
337 a += k[0];
338 break;
339 case 8:
340 b += k[1];
341 a += k[0];
342 break;
343 case 7:
344 b += k[1] & 0xffffff;
345 a += k[0];
346 break;
347 case 6:
348 b += k[1] & 0xffff;
349 a += k[0];
350 break;
351 case 5:
352 b += k[1] & 0xff;
353 a += k[0];
354 break;
355 case 4:
356 a += k[0];
357 break;
358 case 3:
359 a += k[0] & 0xffffff;
360 break;
361 case 2:
362 a += k[0] & 0xffff;
363 break;
364 case 1:
365 a += k[0] & 0xff;
366 break;
367 case 0:
368 return c; /* zero length strings require no mixing */
369 }
370
371#else /* make valgrind happy */
372
373 k8 = ( const uint8_t * ) k;
374 switch ( length ) {
375 case 12:
376 c += k[2];
377 b += k[1];
378 a += k[0];
379 break;
380 case 11:
381 c += ( ( uint32_t ) k8[10] ) << 16; /* fall through */
382 case 10:
383 c += ( ( uint32_t ) k8[9] ) << 8; /* fall through */
384 case 9:
385 c += k8[8]; /* fall through */
386 case 8:
387 b += k[1];
388 a += k[0];
389 break;
390 case 7:
391 b += ( ( uint32_t ) k8[6] ) << 16; /* fall through */
392 case 6:
393 b += ( ( uint32_t ) k8[5] ) << 8; /* fall through */
394 case 5:
395 b += k8[4]; /* fall through */
396 case 4:
397 a += k[0];
398 break;
399 case 3:
400 a += ( ( uint32_t ) k8[2] ) << 16; /* fall through */
401 case 2:
402 a += ( ( uint32_t ) k8[1] ) << 8; /* fall through */
403 case 1:
404 a += k8[0];
405 break;
406 case 0:
407 return c;
408 }
409
410#endif /* !valgrind */
411
412 } else if ( HASH_LITTLE_ENDIAN && ( ( u.i & 0x1 ) == 0 ) ) {
413 const uint16_t *k = ( const uint16_t * ) key; /* read 16-bit chunks */
414 const uint8_t *k8;
415
416 /*--------------- all but last block: aligned reads and different mixing */
417 while ( length > 12 ) {
418 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
419 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
420 c += k[4] + ( ( ( uint32_t ) k[5] ) << 16 );
421 mix( a, b, c );
422 length -= 12;
423 k += 6;
424 }
425
426 /*----------------------------- handle the last (probably partial) block */
427 k8 = ( const uint8_t * ) k;
428 switch ( length ) {
429 case 12:
430 c += k[4] + ( ( ( uint32_t ) k[5] ) << 16 );
431 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
432 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
433 break;
434 case 11:
435 c += ( ( uint32_t ) k8[10] ) << 16; /* fall through */
436 case 10:
437 c += k[4];
438 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
439 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
440 break;
441 case 9:
442 c += k8[8]; /* fall through */
443 case 8:
444 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
445 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
446 break;
447 case 7:
448 b += ( ( uint32_t ) k8[6] ) << 16; /* fall through */
449 case 6:
450 b += k[2];
451 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
452 break;
453 case 5:
454 b += k8[4]; /* fall through */
455 case 4:
456 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
457 break;
458 case 3:
459 a += ( ( uint32_t ) k8[2] ) << 16; /* fall through */
460 case 2:
461 a += k[0];
462 break;
463 case 1:
464 a += k8[0];
465 break;
466 case 0:
467 return c; /* zero length requires no mixing */
468 }
469
470 } else { /* need to read the key one byte at a time */
471 const uint8_t *k = ( const uint8_t * ) key;
472
473 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
474 while ( length > 12 ) {
475 a += k[0];
476 a += ( ( uint32_t ) k[1] ) << 8;
477 a += ( ( uint32_t ) k[2] ) << 16;
478 a += ( ( uint32_t ) k[3] ) << 24;
479 b += k[4];
480 b += ( ( uint32_t ) k[5] ) << 8;
481 b += ( ( uint32_t ) k[6] ) << 16;
482 b += ( ( uint32_t ) k[7] ) << 24;
483 c += k[8];
484 c += ( ( uint32_t ) k[9] ) << 8;
485 c += ( ( uint32_t ) k[10] ) << 16;
486 c += ( ( uint32_t ) k[11] ) << 24;
487 mix( a, b, c );
488 length -= 12;
489 k += 12;
490 }
491
492 /*-------------------------------- last block: affect all 32 bits of (c) */
493 switch ( length ) { /* all the case statements fall through */
494 case 12:
495 c += ( ( uint32_t ) k[11] ) << 24;
496 case 11:
497 c += ( ( uint32_t ) k[10] ) << 16;
498 case 10:
499 c += ( ( uint32_t ) k[9] ) << 8;
500 case 9:
501 c += k[8];
502 case 8:
503 b += ( ( uint32_t ) k[7] ) << 24;
504 case 7:
505 b += ( ( uint32_t ) k[6] ) << 16;
506 case 6:
507 b += ( ( uint32_t ) k[5] ) << 8;
508 case 5:
509 b += k[4];
510 case 4:
511 a += ( ( uint32_t ) k[3] ) << 24;
512 case 3:
513 a += ( ( uint32_t ) k[2] ) << 16;
514 case 2:
515 a += ( ( uint32_t ) k[1] ) << 8;
516 case 1:
517 a += k[0];
518 break;
519 case 0:
520 return c;
521 }
522 }
523
524 final( a, b, c );
525 return c;
526}
527
528
529/*
530 * hashlittle2: return 2 32-bit hash values
531 *
532 * This is identical to hashlittle(), except it returns two 32-bit hash
533 * values instead of just one. This is good enough for hash table
534 * lookup with 2^^64 buckets, or if you want a second hash if you're not
535 * happy with the first, or if you want a probably-unique 64-bit ID for
536 * the key. *pc is better mixed than *pb, so use *pc first. If you want
537 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
538 */
539void hashlittle2( const void *key, /* the key to hash */
540 size_t length, /* length of the key */
541 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
542 uint32_t *pb ) { /* IN: secondary initval, OUT: secondary hash */
543 uint32_t a, b, c; /* internal state */
544 union {
545 const void *ptr;
546 size_t i;
547 } u; /* needed for Mac Powerbook G4 */
548
549 /* Set up the internal state */
550 a = b = c = 0xdeadbeef + ( ( uint32_t ) length ) + *pc;
551 c += *pb;
552
553 u.ptr = key;
554 if ( HASH_LITTLE_ENDIAN && ( ( u.i & 0x3 ) == 0 ) ) {
555 const uint32_t *k = ( const uint32_t * ) key; /* read 32-bit chunks */
556 const uint8_t *k8;
557
558 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
559 while ( length > 12 ) {
560 a += k[0];
561 b += k[1];
562 c += k[2];
563 mix( a, b, c );
564 length -= 12;
565 k += 3;
566 }
567
568 /*----------------------------- handle the last (probably partial) block */
569 /*
570 * "k[2]&0xffffff" actually reads beyond the end of the string, but
571 * then masks off the part it's not allowed to read. Because the
572 * string is aligned, the masked-off tail is in the same word as the
573 * rest of the string. Every machine with memory protection I've seen
574 * does it on word boundaries, so is OK with this. But VALGRIND will
575 * still catch it and complain. The masking trick does make the hash
576 * noticably faster for short strings (like English words).
577 */
578#ifndef VALGRIND
579
580 switch ( length ) {
581 case 12:
582 c += k[2];
583 b += k[1];
584 a += k[0];
585 break;
586 case 11:
587 c += k[2] & 0xffffff;
588 b += k[1];
589 a += k[0];
590 break;
591 case 10:
592 c += k[2] & 0xffff;
593 b += k[1];
594 a += k[0];
595 break;
596 case 9:
597 c += k[2] & 0xff;
598 b += k[1];
599 a += k[0];
600 break;
601 case 8:
602 b += k[1];
603 a += k[0];
604 break;
605 case 7:
606 b += k[1] & 0xffffff;
607 a += k[0];
608 break;
609 case 6:
610 b += k[1] & 0xffff;
611 a += k[0];
612 break;
613 case 5:
614 b += k[1] & 0xff;
615 a += k[0];
616 break;
617 case 4:
618 a += k[0];
619 break;
620 case 3:
621 a += k[0] & 0xffffff;
622 break;
623 case 2:
624 a += k[0] & 0xffff;
625 break;
626 case 1:
627 a += k[0] & 0xff;
628 break;
629 case 0:
630 *pc = c;
631 *pb = b;
632 return; /* zero length strings require no mixing */
633 }
634
635#else /* make valgrind happy */
636
637 k8 = ( const uint8_t * ) k;
638 switch ( length ) {
639 case 12:
640 c += k[2];
641 b += k[1];
642 a += k[0];
643 break;
644 case 11:
645 c += ( ( uint32_t ) k8[10] ) << 16; /* fall through */
646 case 10:
647 c += ( ( uint32_t ) k8[9] ) << 8; /* fall through */
648 case 9:
649 c += k8[8]; /* fall through */
650 case 8:
651 b += k[1];
652 a += k[0];
653 break;
654 case 7:
655 b += ( ( uint32_t ) k8[6] ) << 16; /* fall through */
656 case 6:
657 b += ( ( uint32_t ) k8[5] ) << 8; /* fall through */
658 case 5:
659 b += k8[4]; /* fall through */
660 case 4:
661 a += k[0];
662 break;
663 case 3:
664 a += ( ( uint32_t ) k8[2] ) << 16; /* fall through */
665 case 2:
666 a += ( ( uint32_t ) k8[1] ) << 8; /* fall through */
667 case 1:
668 a += k8[0];
669 break;
670 case 0:
671 *pc = c;
672 *pb = b;
673 return; /* zero length strings require no mixing */
674 }
675
676#endif /* !valgrind */
677
678 } else if ( HASH_LITTLE_ENDIAN && ( ( u.i & 0x1 ) == 0 ) ) {
679 const uint16_t *k = ( const uint16_t * ) key; /* read 16-bit chunks */
680 const uint8_t *k8;
681
682 /*--------------- all but last block: aligned reads and different mixing */
683 while ( length > 12 ) {
684 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
685 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
686 c += k[4] + ( ( ( uint32_t ) k[5] ) << 16 );
687 mix( a, b, c );
688 length -= 12;
689 k += 6;
690 }
691
692 /*----------------------------- handle the last (probably partial) block */
693 k8 = ( const uint8_t * ) k;
694 switch ( length ) {
695 case 12:
696 c += k[4] + ( ( ( uint32_t ) k[5] ) << 16 );
697 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
698 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
699 break;
700 case 11:
701 c += ( ( uint32_t ) k8[10] ) << 16; /* fall through */
702 case 10:
703 c += k[4];
704 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
705 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
706 break;
707 case 9:
708 c += k8[8]; /* fall through */
709 case 8:
710 b += k[2] + ( ( ( uint32_t ) k[3] ) << 16 );
711 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
712 break;
713 case 7:
714 b += ( ( uint32_t ) k8[6] ) << 16; /* fall through */
715 case 6:
716 b += k[2];
717 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
718 break;
719 case 5:
720 b += k8[4]; /* fall through */
721 case 4:
722 a += k[0] + ( ( ( uint32_t ) k[1] ) << 16 );
723 break;
724 case 3:
725 a += ( ( uint32_t ) k8[2] ) << 16; /* fall through */
726 case 2:
727 a += k[0];
728 break;
729 case 1:
730 a += k8[0];
731 break;
732 case 0:
733 *pc = c;
734 *pb = b;
735 return; /* zero length strings require no mixing */
736 }
737
738 } else { /* need to read the key one byte at a time */
739 const uint8_t *k = ( const uint8_t * ) key;
740
741 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
742 while ( length > 12 ) {
743 a += k[0];
744 a += ( ( uint32_t ) k[1] ) << 8;
745 a += ( ( uint32_t ) k[2] ) << 16;
746 a += ( ( uint32_t ) k[3] ) << 24;
747 b += k[4];
748 b += ( ( uint32_t ) k[5] ) << 8;
749 b += ( ( uint32_t ) k[6] ) << 16;
750 b += ( ( uint32_t ) k[7] ) << 24;
751 c += k[8];
752 c += ( ( uint32_t ) k[9] ) << 8;
753 c += ( ( uint32_t ) k[10] ) << 16;
754 c += ( ( uint32_t ) k[11] ) << 24;
755 mix( a, b, c );
756 length -= 12;
757 k += 12;
758 }
759
760 /*-------------------------------- last block: affect all 32 bits of (c) */
761 switch ( length ) { /* all the case statements fall through */
762 case 12:
763 c += ( ( uint32_t ) k[11] ) << 24;
764 case 11:
765 c += ( ( uint32_t ) k[10] ) << 16;
766 case 10:
767 c += ( ( uint32_t ) k[9] ) << 8;
768 case 9:
769 c += k[8];
770 case 8:
771 b += ( ( uint32_t ) k[7] ) << 24;
772 case 7:
773 b += ( ( uint32_t ) k[6] ) << 16;
774 case 6:
775 b += ( ( uint32_t ) k[5] ) << 8;
776 case 5:
777 b += k[4];
778 case 4:
779 a += ( ( uint32_t ) k[3] ) << 24;
780 case 3:
781 a += ( ( uint32_t ) k[2] ) << 16;
782 case 2:
783 a += ( ( uint32_t ) k[1] ) << 8;
784 case 1:
785 a += k[0];
786 break;
787 case 0:
788 *pc = c;
789 *pb = b;
790 return; /* zero length strings require no mixing */
791 }
792 }
793
794 final( a, b, c );
795 *pc = c;
796 *pb = b;
797}
798
799
800
801/*
802 * hashbig():
803 * This is the same as hashword() on big-endian machines. It is different
804 * from hashlittle() on all machines. hashbig() takes advantage of
805 * big-endian byte ordering.
806 */
807uint32_t hashbig( const void *key, size_t length, uint32_t initval ) {
808 uint32_t a, b, c;
809 union {
810 const void *ptr;
811 size_t i;
812 } u; /* to cast key to (size_t) happily */
813
814 /* Set up the internal state */
815 a = b = c = 0xdeadbeef + ( ( uint32_t ) length ) + initval;
816
817 u.ptr = key;
818 if ( HASH_BIG_ENDIAN && ( ( u.i & 0x3 ) == 0 ) ) {
819 const uint32_t *k = ( const uint32_t * ) key; /* read 32-bit chunks */
820 const uint8_t *k8;
821
822 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
823 while ( length > 12 ) {
824 a += k[0];
825 b += k[1];
826 c += k[2];
827 mix( a, b, c );
828 length -= 12;
829 k += 3;
830 }
831
832 /*----------------------------- handle the last (probably partial) block */
833 /*
834 * "k[2]<<8" actually reads beyond the end of the string, but
835 * then shifts out the part it's not allowed to read. Because the
836 * string is aligned, the illegal read is in the same word as the
837 * rest of the string. Every machine with memory protection I've seen
838 * does it on word boundaries, so is OK with this. But VALGRIND will
839 * still catch it and complain. The masking trick does make the hash
840 * noticably faster for short strings (like English words).
841 */
842#ifndef VALGRIND
843
844 switch ( length ) {
845 case 12:
846 c += k[2];
847 b += k[1];
848 a += k[0];
849 break;
850 case 11:
851 c += k[2] & 0xffffff00;
852 b += k[1];
853 a += k[0];
854 break;
855 case 10:
856 c += k[2] & 0xffff0000;
857 b += k[1];
858 a += k[0];
859 break;
860 case 9:
861 c += k[2] & 0xff000000;
862 b += k[1];
863 a += k[0];
864 break;
865 case 8:
866 b += k[1];
867 a += k[0];
868 break;
869 case 7:
870 b += k[1] & 0xffffff00;
871 a += k[0];
872 break;
873 case 6:
874 b += k[1] & 0xffff0000;
875 a += k[0];
876 break;
877 case 5:
878 b += k[1] & 0xff000000;
879 a += k[0];
880 break;
881 case 4:
882 a += k[0];
883 break;
884 case 3:
885 a += k[0] & 0xffffff00;
886 break;
887 case 2:
888 a += k[0] & 0xffff0000;
889 break;
890 case 1:
891 a += k[0] & 0xff000000;
892 break;
893 case 0:
894 return c; /* zero length strings require no mixing */
895 }
896
897#else /* make valgrind happy */
898
899 k8 = ( const uint8_t * ) k;
900 switch ( length ) { /* all the case statements fall through */
901 case 12:
902 c += k[2];
903 b += k[1];
904 a += k[0];
905 break;
906 case 11:
907 c += ( ( uint32_t ) k8[10] ) << 8; /* fall through */
908 case 10:
909 c += ( ( uint32_t ) k8[9] ) << 16; /* fall through */
910 case 9:
911 c += ( ( uint32_t ) k8[8] ) << 24; /* fall through */
912 case 8:
913 b += k[1];
914 a += k[0];
915 break;
916 case 7:
917 b += ( ( uint32_t ) k8[6] ) << 8; /* fall through */
918 case 6:
919 b += ( ( uint32_t ) k8[5] ) << 16; /* fall through */
920 case 5:
921 b += ( ( uint32_t ) k8[4] ) << 24; /* fall through */
922 case 4:
923 a += k[0];
924 break;
925 case 3:
926 a += ( ( uint32_t ) k8[2] ) << 8; /* fall through */
927 case 2:
928 a += ( ( uint32_t ) k8[1] ) << 16; /* fall through */
929 case 1:
930 a += ( ( uint32_t ) k8[0] ) << 24;
931 break;
932 case 0:
933 return c;
934 }
935
936#endif /* !VALGRIND */
937
938 } else { /* need to read the key one byte at a time */
939 const uint8_t *k = ( const uint8_t * ) key;
940
941 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
942 while ( length > 12 ) {
943 a += ( ( uint32_t ) k[0] ) << 24;
944 a += ( ( uint32_t ) k[1] ) << 16;
945 a += ( ( uint32_t ) k[2] ) << 8;
946 a += ( ( uint32_t ) k[3] );
947 b += ( ( uint32_t ) k[4] ) << 24;
948 b += ( ( uint32_t ) k[5] ) << 16;
949 b += ( ( uint32_t ) k[6] ) << 8;
950 b += ( ( uint32_t ) k[7] );
951 c += ( ( uint32_t ) k[8] ) << 24;
952 c += ( ( uint32_t ) k[9] ) << 16;
953 c += ( ( uint32_t ) k[10] ) << 8;
954 c += ( ( uint32_t ) k[11] );
955 mix( a, b, c );
956 length -= 12;
957 k += 12;
958 }
959
960 /*-------------------------------- last block: affect all 32 bits of (c) */
961 switch ( length ) { /* all the case statements fall through */
962 case 12:
963 c += k[11];
964 case 11:
965 c += ( ( uint32_t ) k[10] ) << 8;
966 case 10:
967 c += ( ( uint32_t ) k[9] ) << 16;
968 case 9:
969 c += ( ( uint32_t ) k[8] ) << 24;
970 case 8:
971 b += k[7];
972 case 7:
973 b += ( ( uint32_t ) k[6] ) << 8;
974 case 6:
975 b += ( ( uint32_t ) k[5] ) << 16;
976 case 5:
977 b += ( ( uint32_t ) k[4] ) << 24;
978 case 4:
979 a += k[3];
980 case 3:
981 a += ( ( uint32_t ) k[2] ) << 8;
982 case 2:
983 a += ( ( uint32_t ) k[1] ) << 16;
984 case 1:
985 a += ( ( uint32_t ) k[0] ) << 24;
986 break;
987 case 0:
988 return c;
989 }
990 }
991
992 final( a, b, c );
993 return c;
994}
995
996
997#ifdef SELF_TEST
998
999/* used for timings */
1000void driver1( ) {
1001 uint8_t buf[256];
1002 uint32_t i;
1003 uint32_t h = 0;
1004 time_t a, z;
1005
1006 time( &a );
1007 for ( i = 0; i < 256; ++i )
1008 buf[i] = 'x';
1009 for ( i = 0; i < 1; ++i ) {
1010 h = hashlittle( &buf[0], 1, h );
1011 }
1012 time( &z );
1013 if ( z - a > 0 )
1014 printf( "time %d %.8x\n", z - a, h );
1015}
1016
1017/* check that every input bit changes every output bit half the time */
1018#define HASHSTATE 1
1019#define HASHLEN 1
1020#define MAXPAIR 60
1021#define MAXLEN 70
1022void driver2( ) {
1023 uint8_t qa[MAXLEN + 1], qb[MAXLEN + 2], *a = &qa[0], *b = &qb[1];
1024 uint32_t c[HASHSTATE], d[HASHSTATE], i = 0, j = 0, k, l, m = 0, z;
1025 uint32_t e[HASHSTATE], f[HASHSTATE], g[HASHSTATE], h[HASHSTATE];
1026 uint32_t x[HASHSTATE], y[HASHSTATE];
1027 uint32_t hlen;
1028
1029 printf( "No more than %d trials should ever be needed \n", MAXPAIR / 2 );
1030 for ( hlen = 0; hlen < MAXLEN; ++hlen ) {
1031 z = 0;
1032 for ( i = 0; i < hlen; ++i ) {
1033/*----------------------- for each input byte, */
1034 for ( j = 0; j < 8; ++j ) {
1035/*------------------------ for each input bit, */
1036 for ( m = 1; m < 8; ++m ) {
1037/*------------ for serveral possible initvals, */
1038 for ( l = 0; l < HASHSTATE; ++l )
1039 e[l] = f[l] = g[l] = h[l] = x[l] = y[l] =
1040 ~( ( uint32_t ) 0 );
1041
1042 /*---- check that every output bit is affected by that input bit */
1043 for ( k = 0; k < MAXPAIR; k += 2 ) {
1044 uint32_t finished = 1;
1045 /* keys have one bit different */
1046 for ( l = 0; l < hlen + 1; ++l ) {
1047 a[l] = b[l] = ( uint8_t ) 0;
1048 }
1049 /* have a and b be two keys differing in only one bit */
1050 a[i] ^= ( k << j );
1051 a[i] ^= ( k >> ( 8 - j ) );
1052 c[0] = hashlittle( a, hlen, m );
1053 b[i] ^= ( ( k + 1 ) << j );
1054 b[i] ^= ( ( k + 1 ) >> ( 8 - j ) );
1055 d[0] = hashlittle( b, hlen, m );
1056 /* check every bit is 1, 0, set, and not set at least once */
1057 for ( l = 0; l < HASHSTATE; ++l ) {
1058 e[l] &= ( c[l] ^ d[l] );
1059 f[l] &= ~( c[l] ^ d[l] );
1060 g[l] &= c[l];
1061 h[l] &= ~c[l];
1062 x[l] &= d[l];
1063 y[l] &= ~d[l];
1064 if ( e[l] | f[l] | g[l] | h[l] | x[l] | y[l] )
1065 finished = 0;
1066 }
1067 if ( finished )
1068 break;
1069 }
1070 if ( k > z )
1071 z = k;
1072 if ( k == MAXPAIR ) {
1073 printf( "Some bit didn't change: " );
1074 printf( "%.8x %.8x %.8x %.8x %.8x %.8x ",
1075 e[0], f[0], g[0], h[0], x[0], y[0] );
1076 printf( "i %d j %d m %d len %d\n", i, j, m, hlen );
1077 }
1078 if ( z == MAXPAIR )
1079 goto done;
1080 }
1081 }
1082 }
1083 done:
1084 if ( z < MAXPAIR ) {
1085 printf( "Mix success %2d bytes %2d initvals ", i, m );
1086 printf( "required %d trials\n", z / 2 );
1087 }
1088 }
1089 printf( "\n" );
1090}
1091
1092/* Check for reading beyond the end of the buffer and alignment problems */
1093void driver3( ) {
1094 uint8_t buf[MAXLEN + 20], *b;
1095 uint32_t len;
1096 uint8_t q[] =
1097 "This is the time for all good men to come to the aid of their country...";
1098 uint32_t h;
1099 uint8_t qq[] =
1100 "xThis is the time for all good men to come to the aid of their country...";
1101 uint32_t i;
1102 uint8_t qqq[] =
1103 "xxThis is the time for all good men to come to the aid of their country...";
1104 uint32_t j;
1105 uint8_t qqqq[] =
1106 "xxxThis is the time for all good men to come to the aid of their country...";
1107 uint32_t ref, x, y;
1108 uint8_t *p;
1109
1110 printf
1111 ( "Endianness. These lines should all be the same (for values filled in):\n" );
1112 printf
1113 ( "%.8x %.8x %.8x\n",
1114 hashword( ( const uint32_t * ) q, ( sizeof( q ) - 1 ) / 4, 13 ),
1115 hashword( ( const uint32_t * ) q, ( sizeof( q ) - 5 ) / 4, 13 ),
1116 hashword( ( const uint32_t * ) q, ( sizeof( q ) - 9 ) / 4, 13 ) );
1117 p = q;
1118 printf( "%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1119 hashlittle( p, sizeof( q ) - 1, 13 ), hashlittle( p,
1120 sizeof( q ) - 2,
1121 13 ),
1122 hashlittle( p, sizeof( q ) - 3, 13 ), hashlittle( p,
1123 sizeof( q ) - 4,
1124 13 ),
1125 hashlittle( p, sizeof( q ) - 5, 13 ), hashlittle( p,
1126 sizeof( q ) - 6,
1127 13 ),
1128 hashlittle( p, sizeof( q ) - 7, 13 ), hashlittle( p,
1129 sizeof( q ) - 8,
1130 13 ),
1131 hashlittle( p, sizeof( q ) - 9, 13 ), hashlittle( p,
1132 sizeof( q ) - 10,
1133 13 ),
1134 hashlittle( p, sizeof( q ) - 11, 13 ), hashlittle( p,
1135 sizeof( q ) -
1136 12, 13 ) );
1137 p = &qq[1];
1138 printf( "%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1139 hashlittle( p, sizeof( q ) - 1, 13 ), hashlittle( p,
1140 sizeof( q ) - 2,
1141 13 ),
1142 hashlittle( p, sizeof( q ) - 3, 13 ), hashlittle( p,
1143 sizeof( q ) - 4,
1144 13 ),
1145 hashlittle( p, sizeof( q ) - 5, 13 ), hashlittle( p,
1146 sizeof( q ) - 6,
1147 13 ),
1148 hashlittle( p, sizeof( q ) - 7, 13 ), hashlittle( p,
1149 sizeof( q ) - 8,
1150 13 ),
1151 hashlittle( p, sizeof( q ) - 9, 13 ), hashlittle( p,
1152 sizeof( q ) - 10,
1153 13 ),
1154 hashlittle( p, sizeof( q ) - 11, 13 ), hashlittle( p,
1155 sizeof( q ) -
1156 12, 13 ) );
1157 p = &qqq[2];
1158 printf( "%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1159 hashlittle( p, sizeof( q ) - 1, 13 ), hashlittle( p,
1160 sizeof( q ) - 2,
1161 13 ),
1162 hashlittle( p, sizeof( q ) - 3, 13 ), hashlittle( p,
1163 sizeof( q ) - 4,
1164 13 ),
1165 hashlittle( p, sizeof( q ) - 5, 13 ), hashlittle( p,
1166 sizeof( q ) - 6,
1167 13 ),
1168 hashlittle( p, sizeof( q ) - 7, 13 ), hashlittle( p,
1169 sizeof( q ) - 8,
1170 13 ),
1171 hashlittle( p, sizeof( q ) - 9, 13 ), hashlittle( p,
1172 sizeof( q ) - 10,
1173 13 ),
1174 hashlittle( p, sizeof( q ) - 11, 13 ), hashlittle( p,
1175 sizeof( q ) -
1176 12, 13 ) );
1177 p = &qqqq[3];
1178 printf( "%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1179 hashlittle( p, sizeof( q ) - 1, 13 ), hashlittle( p,
1180 sizeof( q ) - 2,
1181 13 ),
1182 hashlittle( p, sizeof( q ) - 3, 13 ), hashlittle( p,
1183 sizeof( q ) - 4,
1184 13 ),
1185 hashlittle( p, sizeof( q ) - 5, 13 ), hashlittle( p,
1186 sizeof( q ) - 6,
1187 13 ),
1188 hashlittle( p, sizeof( q ) - 7, 13 ), hashlittle( p,
1189 sizeof( q ) - 8,
1190 13 ),
1191 hashlittle( p, sizeof( q ) - 9, 13 ), hashlittle( p,
1192 sizeof( q ) - 10,
1193 13 ),
1194 hashlittle( p, sizeof( q ) - 11, 13 ), hashlittle( p,
1195 sizeof( q ) -
1196 12, 13 ) );
1197 printf( "\n" );
1198
1199 /* check that hashlittle2 and hashlittle produce the same results */
1200 i = 47;
1201 j = 0;
1202 hashlittle2( q, sizeof( q ), &i, &j );
1203 if ( hashlittle( q, sizeof( q ), 47 ) != i )
1204 printf( "hashlittle2 and hashlittle mismatch\n" );
1205
1206 /* check that hashword2 and hashword produce the same results */
1207 len = 0xdeadbeef;
1208 i = 47, j = 0;
1209 hashword2( &len, 1, &i, &j );
1210 if ( hashword( &len, 1, 47 ) != i )
1211 printf( "hashword2 and hashword mismatch %x %x\n",
1212 i, hashword( &len, 1, 47 ) );
1213
1214 /* check hashlittle doesn't read before or after the ends of the string */
1215 for ( h = 0, b = buf + 1; h < 8; ++h, ++b ) {
1216 for ( i = 0; i < MAXLEN; ++i ) {
1217 len = i;
1218 for ( j = 0; j < i; ++j )
1219 *( b + j ) = 0;
1220
1221 /* these should all be equal */
1222 ref = hashlittle( b, len, ( uint32_t ) 1 );
1223 *( b + i ) = ( uint8_t ) ~ 0;
1224 *( b - 1 ) = ( uint8_t ) ~ 0;
1225 x = hashlittle( b, len, ( uint32_t ) 1 );
1226 y = hashlittle( b, len, ( uint32_t ) 1 );
1227 if ( ( ref != x ) || ( ref != y ) ) {
1228 printf( "alignment error: %.8x %.8x %.8x %d %d\n", ref, x, y,
1229 h, i );
1230 }
1231 }
1232 }
1233}
1234
1235/* check for problems with nulls */
1236void driver4( ) {
1237 uint8_t buf[1];
1238 uint32_t h, i, state[HASHSTATE];
1239
1240
1241 buf[0] = ~0;
1242 for ( i = 0; i < HASHSTATE; ++i )
1243 state[i] = 1;
1244 printf( "These should all be different\n" );
1245 for ( i = 0, h = 0; i < 8; ++i ) {
1246 h = hashlittle( buf, 0, h );
1247 printf( "%2ld 0-byte strings, hash is %.8x\n", i, h );
1248 }
1249}
1250
1251void driver5( ) {
1252 uint32_t b, c;
1253 b = 0, c = 0, hashlittle2( "", 0, &c, &b );
1254 printf( "hash is %.8lx %.8lx\n", c, b ); /* deadbeef deadbeef */
1255 b = 0xdeadbeef, c = 0, hashlittle2( "", 0, &c, &b );
1256 printf( "hash is %.8lx %.8lx\n", c, b ); /* bd5b7dde deadbeef */
1257 b = 0xdeadbeef, c = 0xdeadbeef, hashlittle2( "", 0, &c, &b );
1258 printf( "hash is %.8lx %.8lx\n", c, b ); /* 9c093ccd bd5b7dde */
1259 b = 0, c = 0, hashlittle2( "Four score and seven years ago", 30, &c, &b );
1260 printf( "hash is %.8lx %.8lx\n", c, b ); /* 17770551 ce7226e6 */
1261 b = 1, c = 0, hashlittle2( "Four score and seven years ago", 30, &c, &b );
1262 printf( "hash is %.8lx %.8lx\n", c, b ); /* e3607cae bd371de4 */
1263 b = 0, c = 1, hashlittle2( "Four score and seven years ago", 30, &c, &b );
1264 printf( "hash is %.8lx %.8lx\n", c, b ); /* cd628161 6cbea4b3 */
1265 c = hashlittle( "Four score and seven years ago", 30, 0 );
1266 printf( "hash is %.8lx\n", c ); /* 17770551 */
1267 c = hashlittle( "Four score and seven years ago", 30, 1 );
1268 printf( "hash is %.8lx\n", c ); /* cd628161 */
1269}
1270
1271
1272int main( ) {
1273 driver1( ); /* test that the key is hashed: used for timings */
1274 driver2( ); /* test that whole key is hashed thoroughly */
1275 driver3( ); /* test that nothing but the key is hashed */
1276 driver4( ); /* test hashing multiple buffers (all buffers are null) */
1277 driver5( ); /* test the hash against known vectors */
1278 return 1;
1279}
1280
1281#endif /* SELF_TEST */
int main(int argc, char *argv[])
main entry point; parse command line arguments, initialise the environment, and enter the read-eval-p...
Definition init.c:206
uint32_t hashbig(const void *key, size_t length, uint32_t initval)
Definition lookup3.c:807
#define HASH_BIG_ENDIAN
Definition lookup3.c:63
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition lookup3.c:539
uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval)
lookup3.h
Definition lookup3.c:173
#define HASH_LITTLE_ENDIAN
Definition lookup3.c:62
uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
Definition lookup3.c:281
void hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb)
Definition lookup3.c:216
#define mix(a, b, c)
Definition lookup3.c:114