1 #ifndef NU_MPH_H
2 #define NU_MPH_H
3 
4 /* Intentionally undocumented
5  *
6  * http://iswsa.acm.org/mphf/index.html
7  */
8 
9 #include <stdint.h>
10 #include <sys/types.h>
11 
12 #include <libnu/config.h>
13 
14 #if defined (__cplusplus) || defined (c_plusplus)
15 extern "C" {
16 #endif
17 
18 #ifdef NU_WITH_UDB
19 
20 /* those need to be the same values as used in MPH generation */
21 #define PRIME        0x01000193
22 
23 /** Calculate G offset from codepoint
24  */
25 static inline
_nu_hash(uint32_t hash,uint32_t codepoint)26 uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) {
27 	if (hash == 0) {
28 		hash = PRIME;
29 	}
30 
31 	return hash ^ codepoint;
32 }
33 
34 /** Get hash value of Unicode codepoint
35  */
36 static inline
nu_mph_hash(const int16_t * G,size_t G_SIZE,uint32_t codepoint)37 uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE,
38 	uint32_t codepoint) {
39 
40 	uint32_t h = _nu_hash(0, codepoint);
41 	int16_t offset = G[h % G_SIZE];
42 	if (offset < 0) {
43 		return (uint32_t)(-offset - 1);
44 	}
45 	return (_nu_hash(offset, codepoint) % G_SIZE);
46 }
47 
48 /** Lookup value in MPH
49  */
50 static inline
nu_mph_lookup(const uint32_t * V_C,const uint16_t * V_I,uint32_t codepoint,uint32_t hash)51 uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I,
52 	uint32_t codepoint, uint32_t hash) {
53 
54 	const uint32_t *c = (V_C + hash);
55 	const uint16_t *i = (V_I + hash);
56 
57 	/* due to nature of minimal perfect hash, it will always
58 	 * produce collision for codepoints outside of MPH original set.
59 	 * thus VALUES_C contain original codepoint to check if
60 	 * collision occurred */
61 
62 	return (*c != codepoint ? 0 : *i);
63 }
64 
65 #endif /* NU_WITH_UDB */
66 
67 #if defined (__cplusplus) || defined (c_plusplus)
68 }
69 #endif
70 
71 #endif /* NU_MPH_H */
72