xref: /OK3568_Linux_fs/kernel/Documentation/security/siphash.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun===========================
2*4882a593SmuzhiyunSipHash - a short input PRF
3*4882a593Smuzhiyun===========================
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
6*4882a593Smuzhiyun
7*4882a593SmuzhiyunSipHash is a cryptographically secure PRF -- a keyed hash function -- that
8*4882a593Smuzhiyunperforms very well for short inputs, hence the name. It was designed by
9*4882a593Smuzhiyuncryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
10*4882a593Smuzhiyunas a replacement for some uses of: `jhash`, `md5_transform`, `sha1_transform`,
11*4882a593Smuzhiyunand so forth.
12*4882a593Smuzhiyun
13*4882a593SmuzhiyunSipHash takes a secret key filled with randomly generated numbers and either
14*4882a593Smuzhiyunan input buffer or several input integers. It spits out an integer that is
15*4882a593Smuzhiyunindistinguishable from random. You may then use that integer as part of secure
16*4882a593Smuzhiyunsequence numbers, secure cookies, or mask it off for use in a hash table.
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunGenerating a key
19*4882a593Smuzhiyun================
20*4882a593Smuzhiyun
21*4882a593SmuzhiyunKeys should always be generated from a cryptographically secure source of
22*4882a593Smuzhiyunrandom numbers, either using get_random_bytes or get_random_once::
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun	siphash_key_t key;
25*4882a593Smuzhiyun	get_random_bytes(&key, sizeof(key));
26*4882a593Smuzhiyun
27*4882a593SmuzhiyunIf you're not deriving your key from here, you're doing it wrong.
28*4882a593Smuzhiyun
29*4882a593SmuzhiyunUsing the functions
30*4882a593Smuzhiyun===================
31*4882a593Smuzhiyun
32*4882a593SmuzhiyunThere are two variants of the function, one that takes a list of integers, and
33*4882a593Smuzhiyunone that takes a buffer::
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	u64 siphash(const void *data, size_t len, const siphash_key_t *key);
36*4882a593Smuzhiyun
37*4882a593SmuzhiyunAnd::
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	u64 siphash_1u64(u64, const siphash_key_t *key);
40*4882a593Smuzhiyun	u64 siphash_2u64(u64, u64, const siphash_key_t *key);
41*4882a593Smuzhiyun	u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
42*4882a593Smuzhiyun	u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
43*4882a593Smuzhiyun	u64 siphash_1u32(u32, const siphash_key_t *key);
44*4882a593Smuzhiyun	u64 siphash_2u32(u32, u32, const siphash_key_t *key);
45*4882a593Smuzhiyun	u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
46*4882a593Smuzhiyun	u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
47*4882a593Smuzhiyun
48*4882a593SmuzhiyunIf you pass the generic siphash function something of a constant length, it
49*4882a593Smuzhiyunwill constant fold at compile-time and automatically choose one of the
50*4882a593Smuzhiyunoptimized functions.
51*4882a593Smuzhiyun
52*4882a593SmuzhiyunHashtable key function usage::
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	struct some_hashtable {
55*4882a593Smuzhiyun		DECLARE_HASHTABLE(hashtable, 8);
56*4882a593Smuzhiyun		siphash_key_t key;
57*4882a593Smuzhiyun	};
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	void init_hashtable(struct some_hashtable *table)
60*4882a593Smuzhiyun	{
61*4882a593Smuzhiyun		get_random_bytes(&table->key, sizeof(table->key));
62*4882a593Smuzhiyun	}
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
65*4882a593Smuzhiyun	{
66*4882a593Smuzhiyun		return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
67*4882a593Smuzhiyun	}
68*4882a593Smuzhiyun
69*4882a593SmuzhiyunYou may then iterate like usual over the returned hash bucket.
70*4882a593Smuzhiyun
71*4882a593SmuzhiyunSecurity
72*4882a593Smuzhiyun========
73*4882a593Smuzhiyun
74*4882a593SmuzhiyunSipHash has a very high security margin, with its 128-bit key. So long as the
75*4882a593Smuzhiyunkey is kept secret, it is impossible for an attacker to guess the outputs of
76*4882a593Smuzhiyunthe function, even if being able to observe many outputs, since 2^128 outputs
77*4882a593Smuzhiyunis significant.
78*4882a593Smuzhiyun
79*4882a593SmuzhiyunLinux implements the "2-4" variant of SipHash.
80*4882a593Smuzhiyun
81*4882a593SmuzhiyunStruct-passing Pitfalls
82*4882a593Smuzhiyun=======================
83*4882a593Smuzhiyun
84*4882a593SmuzhiyunOften times the XuY functions will not be large enough, and instead you'll
85*4882a593Smuzhiyunwant to pass a pre-filled struct to siphash. When doing this, it's important
86*4882a593Smuzhiyunto always ensure the struct has no padding holes. The easiest way to do this
87*4882a593Smuzhiyunis to simply arrange the members of the struct in descending order of size,
88*4882a593Smuzhiyunand to use offsetendof() instead of sizeof() for getting the size. For
89*4882a593Smuzhiyunperformance reasons, if possible, it's probably a good thing to align the
90*4882a593Smuzhiyunstruct to the right boundary. Here's an example::
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	const struct {
93*4882a593Smuzhiyun		struct in6_addr saddr;
94*4882a593Smuzhiyun		u32 counter;
95*4882a593Smuzhiyun		u16 dport;
96*4882a593Smuzhiyun	} __aligned(SIPHASH_ALIGNMENT) combined = {
97*4882a593Smuzhiyun		.saddr = *(struct in6_addr *)saddr,
98*4882a593Smuzhiyun		.counter = counter,
99*4882a593Smuzhiyun		.dport = dport
100*4882a593Smuzhiyun	};
101*4882a593Smuzhiyun	u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
102*4882a593Smuzhiyun
103*4882a593SmuzhiyunResources
104*4882a593Smuzhiyun=========
105*4882a593Smuzhiyun
106*4882a593SmuzhiyunRead the SipHash paper if you're interested in learning more:
107*4882a593Smuzhiyunhttps://131002.net/siphash/siphash.pdf
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun-------------------------------------------------------------------------------
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun===============================================
112*4882a593SmuzhiyunHalfSipHash - SipHash's insecure younger cousin
113*4882a593Smuzhiyun===============================================
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
116*4882a593Smuzhiyun
117*4882a593SmuzhiyunOn the off-chance that SipHash is not fast enough for your needs, you might be
118*4882a593Smuzhiyunable to justify using HalfSipHash, a terrifying but potentially useful
119*4882a593Smuzhiyunpossibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
120*4882a593Smuzhiyuneven scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
121*4882a593Smuzhiyuninstead of SipHash's 128-bit key. However, this may appeal to some
122*4882a593Smuzhiyunhigh-performance `jhash` users.
123*4882a593Smuzhiyun
124*4882a593SmuzhiyunDanger!
125*4882a593Smuzhiyun
126*4882a593SmuzhiyunDo not ever use HalfSipHash except for as a hashtable key function, and only
127*4882a593Smuzhiyunthen when you can be absolutely certain that the outputs will never be
128*4882a593Smuzhiyuntransmitted out of the kernel. This is only remotely useful over `jhash` as a
129*4882a593Smuzhiyunmeans of mitigating hashtable flooding denial of service attacks.
130*4882a593Smuzhiyun
131*4882a593SmuzhiyunGenerating a HalfSipHash key
132*4882a593Smuzhiyun============================
133*4882a593Smuzhiyun
134*4882a593SmuzhiyunKeys should always be generated from a cryptographically secure source of
135*4882a593Smuzhiyunrandom numbers, either using get_random_bytes or get_random_once:
136*4882a593Smuzhiyun
137*4882a593Smuzhiyunhsiphash_key_t key;
138*4882a593Smuzhiyunget_random_bytes(&key, sizeof(key));
139*4882a593Smuzhiyun
140*4882a593SmuzhiyunIf you're not deriving your key from here, you're doing it wrong.
141*4882a593Smuzhiyun
142*4882a593SmuzhiyunUsing the HalfSipHash functions
143*4882a593Smuzhiyun===============================
144*4882a593Smuzhiyun
145*4882a593SmuzhiyunThere are two variants of the function, one that takes a list of integers, and
146*4882a593Smuzhiyunone that takes a buffer::
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun	u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
149*4882a593Smuzhiyun
150*4882a593SmuzhiyunAnd::
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun	u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
153*4882a593Smuzhiyun	u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
154*4882a593Smuzhiyun	u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
155*4882a593Smuzhiyun	u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
156*4882a593Smuzhiyun
157*4882a593SmuzhiyunIf you pass the generic hsiphash function something of a constant length, it
158*4882a593Smuzhiyunwill constant fold at compile-time and automatically choose one of the
159*4882a593Smuzhiyunoptimized functions.
160*4882a593Smuzhiyun
161*4882a593SmuzhiyunHashtable key function usage
162*4882a593Smuzhiyun============================
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun::
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun	struct some_hashtable {
167*4882a593Smuzhiyun		DECLARE_HASHTABLE(hashtable, 8);
168*4882a593Smuzhiyun		hsiphash_key_t key;
169*4882a593Smuzhiyun	};
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun	void init_hashtable(struct some_hashtable *table)
172*4882a593Smuzhiyun	{
173*4882a593Smuzhiyun		get_random_bytes(&table->key, sizeof(table->key));
174*4882a593Smuzhiyun	}
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
177*4882a593Smuzhiyun	{
178*4882a593Smuzhiyun		return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
179*4882a593Smuzhiyun	}
180*4882a593Smuzhiyun
181*4882a593SmuzhiyunYou may then iterate like usual over the returned hash bucket.
182*4882a593Smuzhiyun
183*4882a593SmuzhiyunPerformance
184*4882a593Smuzhiyun===========
185*4882a593Smuzhiyun
186*4882a593SmuzhiyunHalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
187*4882a593Smuzhiyunthis will not be a problem, as the hashtable lookup isn't the bottleneck. And
188*4882a593Smuzhiyunin general, this is probably a good sacrifice to make for the security and DoS
189*4882a593Smuzhiyunresistance of HalfSipHash.
190