1 #ifndef NU_MPH_H
2 #define NU_MPH_H
3
4 /* Intentionally undocumented
5 *
6 * http://iswsa.acm.org/mphf/index.html
7 */
8
9 #include <stdint.h>
10 #include <sys/types.h>
11
12 #include <libnu/config.h>
13
14 #if defined (__cplusplus) || defined (c_plusplus)
15 extern "C" {
16 #endif
17
18 #ifdef NU_WITH_UDB
19
20 /* those need to be the same values as used in MPH generation */
21 #define PRIME 0x01000193
22
23 /** Calculate G offset from codepoint
24 */
25 static inline
_nu_hash(uint32_t hash,uint32_t codepoint)26 uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) {
27 if (hash == 0) {
28 hash = PRIME;
29 }
30
31 return hash ^ codepoint;
32 }
33
34 /** Get hash value of Unicode codepoint
35 */
36 static inline
nu_mph_hash(const int16_t * G,size_t G_SIZE,uint32_t codepoint)37 uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE,
38 uint32_t codepoint) {
39
40 uint32_t h = _nu_hash(0, codepoint);
41 int16_t offset = G[h % G_SIZE];
42 if (offset < 0) {
43 return (uint32_t)(-offset - 1);
44 }
45 return (_nu_hash(offset, codepoint) % G_SIZE);
46 }
47
48 /** Lookup value in MPH
49 */
50 static inline
nu_mph_lookup(const uint32_t * V_C,const uint16_t * V_I,uint32_t codepoint,uint32_t hash)51 uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I,
52 uint32_t codepoint, uint32_t hash) {
53
54 const uint32_t *c = (V_C + hash);
55 const uint16_t *i = (V_I + hash);
56
57 /* due to nature of minimal perfect hash, it will always
58 * produce collision for codepoints outside of MPH original set.
59 * thus VALUES_C contain original codepoint to check if
60 * collision occurred */
61
62 return (*c != codepoint ? 0 : *i);
63 }
64
65 #endif /* NU_WITH_UDB */
66
67 #if defined (__cplusplus) || defined (c_plusplus)
68 }
69 #endif
70
71 #endif /* NU_MPH_H */
72