1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * net/dccp/proto.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * An implementation of the DCCP protocol
6*4882a593Smuzhiyun * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/dccp.h>
10*4882a593Smuzhiyun #include <linux/module.h>
11*4882a593Smuzhiyun #include <linux/types.h>
12*4882a593Smuzhiyun #include <linux/sched.h>
13*4882a593Smuzhiyun #include <linux/kernel.h>
14*4882a593Smuzhiyun #include <linux/skbuff.h>
15*4882a593Smuzhiyun #include <linux/netdevice.h>
16*4882a593Smuzhiyun #include <linux/in.h>
17*4882a593Smuzhiyun #include <linux/if_arp.h>
18*4882a593Smuzhiyun #include <linux/init.h>
19*4882a593Smuzhiyun #include <linux/random.h>
20*4882a593Smuzhiyun #include <linux/slab.h>
21*4882a593Smuzhiyun #include <net/checksum.h>
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #include <net/inet_sock.h>
24*4882a593Smuzhiyun #include <net/inet_common.h>
25*4882a593Smuzhiyun #include <net/sock.h>
26*4882a593Smuzhiyun #include <net/xfrm.h>
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #include <asm/ioctls.h>
29*4882a593Smuzhiyun #include <linux/spinlock.h>
30*4882a593Smuzhiyun #include <linux/timer.h>
31*4882a593Smuzhiyun #include <linux/delay.h>
32*4882a593Smuzhiyun #include <linux/poll.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "ccid.h"
35*4882a593Smuzhiyun #include "dccp.h"
36*4882a593Smuzhiyun #include "feat.h"
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
39*4882a593Smuzhiyun #include "trace.h"
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_statistics);
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun struct percpu_counter dccp_orphan_count;
46*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_orphan_count);
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun struct inet_hashinfo dccp_hashinfo;
49*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_hashinfo);
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun /* the maximum queue length for tx in packets. 0 is no limit */
52*4882a593Smuzhiyun int sysctl_dccp_tx_qlen __read_mostly = 5;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun #ifdef CONFIG_IP_DCCP_DEBUG
dccp_state_name(const int state)55*4882a593Smuzhiyun static const char *dccp_state_name(const int state)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun static const char *const dccp_state_names[] = {
58*4882a593Smuzhiyun [DCCP_OPEN] = "OPEN",
59*4882a593Smuzhiyun [DCCP_REQUESTING] = "REQUESTING",
60*4882a593Smuzhiyun [DCCP_PARTOPEN] = "PARTOPEN",
61*4882a593Smuzhiyun [DCCP_LISTEN] = "LISTEN",
62*4882a593Smuzhiyun [DCCP_RESPOND] = "RESPOND",
63*4882a593Smuzhiyun [DCCP_CLOSING] = "CLOSING",
64*4882a593Smuzhiyun [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
65*4882a593Smuzhiyun [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
66*4882a593Smuzhiyun [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67*4882a593Smuzhiyun [DCCP_TIME_WAIT] = "TIME_WAIT",
68*4882a593Smuzhiyun [DCCP_CLOSED] = "CLOSED",
69*4882a593Smuzhiyun };
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun if (state >= DCCP_MAX_STATES)
72*4882a593Smuzhiyun return "INVALID STATE!";
73*4882a593Smuzhiyun else
74*4882a593Smuzhiyun return dccp_state_names[state];
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun #endif
77*4882a593Smuzhiyun
dccp_set_state(struct sock * sk,const int state)78*4882a593Smuzhiyun void dccp_set_state(struct sock *sk, const int state)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun const int oldstate = sk->sk_state;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
83*4882a593Smuzhiyun dccp_state_name(oldstate), dccp_state_name(state));
84*4882a593Smuzhiyun WARN_ON(state == oldstate);
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun switch (state) {
87*4882a593Smuzhiyun case DCCP_OPEN:
88*4882a593Smuzhiyun if (oldstate != DCCP_OPEN)
89*4882a593Smuzhiyun DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90*4882a593Smuzhiyun /* Client retransmits all Confirm options until entering OPEN */
91*4882a593Smuzhiyun if (oldstate == DCCP_PARTOPEN)
92*4882a593Smuzhiyun dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93*4882a593Smuzhiyun break;
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun case DCCP_CLOSED:
96*4882a593Smuzhiyun if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97*4882a593Smuzhiyun oldstate == DCCP_CLOSING)
98*4882a593Smuzhiyun DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun sk->sk_prot->unhash(sk);
101*4882a593Smuzhiyun if (inet_csk(sk)->icsk_bind_hash != NULL &&
102*4882a593Smuzhiyun !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103*4882a593Smuzhiyun inet_put_port(sk);
104*4882a593Smuzhiyun fallthrough;
105*4882a593Smuzhiyun default:
106*4882a593Smuzhiyun if (oldstate == DCCP_OPEN)
107*4882a593Smuzhiyun DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun /* Change state AFTER socket is unhashed to avoid closed
111*4882a593Smuzhiyun * socket sitting in hash tables.
112*4882a593Smuzhiyun */
113*4882a593Smuzhiyun inet_sk_set_state(sk, state);
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_set_state);
117*4882a593Smuzhiyun
dccp_finish_passive_close(struct sock * sk)118*4882a593Smuzhiyun static void dccp_finish_passive_close(struct sock *sk)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun switch (sk->sk_state) {
121*4882a593Smuzhiyun case DCCP_PASSIVE_CLOSE:
122*4882a593Smuzhiyun /* Node (client or server) has received Close packet. */
123*4882a593Smuzhiyun dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSED);
125*4882a593Smuzhiyun break;
126*4882a593Smuzhiyun case DCCP_PASSIVE_CLOSEREQ:
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun * Client received CloseReq. We set the `active' flag so that
129*4882a593Smuzhiyun * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130*4882a593Smuzhiyun */
131*4882a593Smuzhiyun dccp_send_close(sk, 1);
132*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSING);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
dccp_done(struct sock * sk)136*4882a593Smuzhiyun void dccp_done(struct sock *sk)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSED);
139*4882a593Smuzhiyun dccp_clear_xmit_timers(sk);
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun sk->sk_shutdown = SHUTDOWN_MASK;
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun if (!sock_flag(sk, SOCK_DEAD))
144*4882a593Smuzhiyun sk->sk_state_change(sk);
145*4882a593Smuzhiyun else
146*4882a593Smuzhiyun inet_csk_destroy_sock(sk);
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_done);
150*4882a593Smuzhiyun
dccp_packet_name(const int type)151*4882a593Smuzhiyun const char *dccp_packet_name(const int type)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun static const char *const dccp_packet_names[] = {
154*4882a593Smuzhiyun [DCCP_PKT_REQUEST] = "REQUEST",
155*4882a593Smuzhiyun [DCCP_PKT_RESPONSE] = "RESPONSE",
156*4882a593Smuzhiyun [DCCP_PKT_DATA] = "DATA",
157*4882a593Smuzhiyun [DCCP_PKT_ACK] = "ACK",
158*4882a593Smuzhiyun [DCCP_PKT_DATAACK] = "DATAACK",
159*4882a593Smuzhiyun [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160*4882a593Smuzhiyun [DCCP_PKT_CLOSE] = "CLOSE",
161*4882a593Smuzhiyun [DCCP_PKT_RESET] = "RESET",
162*4882a593Smuzhiyun [DCCP_PKT_SYNC] = "SYNC",
163*4882a593Smuzhiyun [DCCP_PKT_SYNCACK] = "SYNCACK",
164*4882a593Smuzhiyun };
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun if (type >= DCCP_NR_PKT_TYPES)
167*4882a593Smuzhiyun return "INVALID";
168*4882a593Smuzhiyun else
169*4882a593Smuzhiyun return dccp_packet_names[type];
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_packet_name);
173*4882a593Smuzhiyun
dccp_sk_destruct(struct sock * sk)174*4882a593Smuzhiyun static void dccp_sk_destruct(struct sock *sk)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179*4882a593Smuzhiyun dp->dccps_hc_tx_ccid = NULL;
180*4882a593Smuzhiyun inet_sock_destruct(sk);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
dccp_init_sock(struct sock * sk,const __u8 ctl_sock_initialized)183*4882a593Smuzhiyun int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
186*4882a593Smuzhiyun struct inet_connection_sock *icsk = inet_csk(sk);
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun icsk->icsk_rto = DCCP_TIMEOUT_INIT;
189*4882a593Smuzhiyun icsk->icsk_syn_retries = sysctl_dccp_request_retries;
190*4882a593Smuzhiyun sk->sk_state = DCCP_CLOSED;
191*4882a593Smuzhiyun sk->sk_write_space = dccp_write_space;
192*4882a593Smuzhiyun sk->sk_destruct = dccp_sk_destruct;
193*4882a593Smuzhiyun icsk->icsk_sync_mss = dccp_sync_mss;
194*4882a593Smuzhiyun dp->dccps_mss_cache = 536;
195*4882a593Smuzhiyun dp->dccps_rate_last = jiffies;
196*4882a593Smuzhiyun dp->dccps_role = DCCP_ROLE_UNDEFINED;
197*4882a593Smuzhiyun dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
198*4882a593Smuzhiyun dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun dccp_init_xmit_timers(sk);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun INIT_LIST_HEAD(&dp->dccps_featneg);
203*4882a593Smuzhiyun /* control socket doesn't need feat nego */
204*4882a593Smuzhiyun if (likely(ctl_sock_initialized))
205*4882a593Smuzhiyun return dccp_feat_init(sk);
206*4882a593Smuzhiyun return 0;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_init_sock);
210*4882a593Smuzhiyun
dccp_destroy_sock(struct sock * sk)211*4882a593Smuzhiyun void dccp_destroy_sock(struct sock *sk)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun __skb_queue_purge(&sk->sk_write_queue);
216*4882a593Smuzhiyun if (sk->sk_send_head != NULL) {
217*4882a593Smuzhiyun kfree_skb(sk->sk_send_head);
218*4882a593Smuzhiyun sk->sk_send_head = NULL;
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun /* Clean up a referenced DCCP bind bucket. */
222*4882a593Smuzhiyun if (inet_csk(sk)->icsk_bind_hash != NULL)
223*4882a593Smuzhiyun inet_put_port(sk);
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun kfree(dp->dccps_service_list);
226*4882a593Smuzhiyun dp->dccps_service_list = NULL;
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun if (dp->dccps_hc_rx_ackvec != NULL) {
229*4882a593Smuzhiyun dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230*4882a593Smuzhiyun dp->dccps_hc_rx_ackvec = NULL;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233*4882a593Smuzhiyun dp->dccps_hc_rx_ccid = NULL;
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun /* clean up feature negotiation state */
236*4882a593Smuzhiyun dccp_feat_list_purge(&dp->dccps_featneg);
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240*4882a593Smuzhiyun
dccp_listen_start(struct sock * sk,int backlog)241*4882a593Smuzhiyun static inline int dccp_listen_start(struct sock *sk, int backlog)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun dp->dccps_role = DCCP_ROLE_LISTEN;
246*4882a593Smuzhiyun /* do not start to listen if feature negotiation setup fails */
247*4882a593Smuzhiyun if (dccp_feat_finalise_settings(dp))
248*4882a593Smuzhiyun return -EPROTO;
249*4882a593Smuzhiyun return inet_csk_listen_start(sk, backlog);
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
dccp_need_reset(int state)252*4882a593Smuzhiyun static inline int dccp_need_reset(int state)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255*4882a593Smuzhiyun state != DCCP_REQUESTING;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
dccp_disconnect(struct sock * sk,int flags)258*4882a593Smuzhiyun int dccp_disconnect(struct sock *sk, int flags)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun struct inet_connection_sock *icsk = inet_csk(sk);
261*4882a593Smuzhiyun struct inet_sock *inet = inet_sk(sk);
262*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
263*4882a593Smuzhiyun const int old_state = sk->sk_state;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun if (old_state != DCCP_CLOSED)
266*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSED);
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /*
269*4882a593Smuzhiyun * This corresponds to the ABORT function of RFC793, sec. 3.8
270*4882a593Smuzhiyun * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271*4882a593Smuzhiyun */
272*4882a593Smuzhiyun if (old_state == DCCP_LISTEN) {
273*4882a593Smuzhiyun inet_csk_listen_stop(sk);
274*4882a593Smuzhiyun } else if (dccp_need_reset(old_state)) {
275*4882a593Smuzhiyun dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276*4882a593Smuzhiyun sk->sk_err = ECONNRESET;
277*4882a593Smuzhiyun } else if (old_state == DCCP_REQUESTING)
278*4882a593Smuzhiyun sk->sk_err = ECONNRESET;
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun dccp_clear_xmit_timers(sk);
281*4882a593Smuzhiyun ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
282*4882a593Smuzhiyun dp->dccps_hc_rx_ccid = NULL;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun __skb_queue_purge(&sk->sk_receive_queue);
285*4882a593Smuzhiyun __skb_queue_purge(&sk->sk_write_queue);
286*4882a593Smuzhiyun if (sk->sk_send_head != NULL) {
287*4882a593Smuzhiyun __kfree_skb(sk->sk_send_head);
288*4882a593Smuzhiyun sk->sk_send_head = NULL;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun inet->inet_dport = 0;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
294*4882a593Smuzhiyun inet_reset_saddr(sk);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun sk->sk_shutdown = 0;
297*4882a593Smuzhiyun sock_reset_flag(sk, SOCK_DONE);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun icsk->icsk_backoff = 0;
300*4882a593Smuzhiyun inet_csk_delack_init(sk);
301*4882a593Smuzhiyun __sk_dst_reset(sk);
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun sk->sk_error_report(sk);
306*4882a593Smuzhiyun return 0;
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_disconnect);
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun /*
312*4882a593Smuzhiyun * Wait for a DCCP event.
313*4882a593Smuzhiyun *
314*4882a593Smuzhiyun * Note that we don't need to lock the socket, as the upper poll layers
315*4882a593Smuzhiyun * take care of normal races (between the test and the event) and we don't
316*4882a593Smuzhiyun * go look at any of the socket buffers directly.
317*4882a593Smuzhiyun */
dccp_poll(struct file * file,struct socket * sock,poll_table * wait)318*4882a593Smuzhiyun __poll_t dccp_poll(struct file *file, struct socket *sock,
319*4882a593Smuzhiyun poll_table *wait)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun __poll_t mask;
322*4882a593Smuzhiyun struct sock *sk = sock->sk;
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun sock_poll_wait(file, sock, wait);
325*4882a593Smuzhiyun if (sk->sk_state == DCCP_LISTEN)
326*4882a593Smuzhiyun return inet_csk_listen_poll(sk);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun /* Socket is not locked. We are protected from async events
329*4882a593Smuzhiyun by poll logic and correct handling of state changes
330*4882a593Smuzhiyun made by another threads is impossible in any case.
331*4882a593Smuzhiyun */
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun mask = 0;
334*4882a593Smuzhiyun if (sk->sk_err)
335*4882a593Smuzhiyun mask = EPOLLERR;
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
338*4882a593Smuzhiyun mask |= EPOLLHUP;
339*4882a593Smuzhiyun if (sk->sk_shutdown & RCV_SHUTDOWN)
340*4882a593Smuzhiyun mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun /* Connected? */
343*4882a593Smuzhiyun if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
344*4882a593Smuzhiyun if (atomic_read(&sk->sk_rmem_alloc) > 0)
345*4882a593Smuzhiyun mask |= EPOLLIN | EPOLLRDNORM;
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
348*4882a593Smuzhiyun if (sk_stream_is_writeable(sk)) {
349*4882a593Smuzhiyun mask |= EPOLLOUT | EPOLLWRNORM;
350*4882a593Smuzhiyun } else { /* send SIGIO later */
351*4882a593Smuzhiyun sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
352*4882a593Smuzhiyun set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun /* Race breaker. If space is freed after
355*4882a593Smuzhiyun * wspace test but before the flags are set,
356*4882a593Smuzhiyun * IO signal will be lost.
357*4882a593Smuzhiyun */
358*4882a593Smuzhiyun if (sk_stream_is_writeable(sk))
359*4882a593Smuzhiyun mask |= EPOLLOUT | EPOLLWRNORM;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun return mask;
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_poll);
367*4882a593Smuzhiyun
dccp_ioctl(struct sock * sk,int cmd,unsigned long arg)368*4882a593Smuzhiyun int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
369*4882a593Smuzhiyun {
370*4882a593Smuzhiyun int rc = -ENOTCONN;
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun lock_sock(sk);
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun if (sk->sk_state == DCCP_LISTEN)
375*4882a593Smuzhiyun goto out;
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun switch (cmd) {
378*4882a593Smuzhiyun case SIOCOUTQ: {
379*4882a593Smuzhiyun int amount = sk_wmem_alloc_get(sk);
380*4882a593Smuzhiyun /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
381*4882a593Smuzhiyun * always 0, comparably to UDP.
382*4882a593Smuzhiyun */
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun rc = put_user(amount, (int __user *)arg);
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun break;
387*4882a593Smuzhiyun case SIOCINQ: {
388*4882a593Smuzhiyun struct sk_buff *skb;
389*4882a593Smuzhiyun unsigned long amount = 0;
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun skb = skb_peek(&sk->sk_receive_queue);
392*4882a593Smuzhiyun if (skb != NULL) {
393*4882a593Smuzhiyun /*
394*4882a593Smuzhiyun * We will only return the amount of this packet since
395*4882a593Smuzhiyun * that is all that will be read.
396*4882a593Smuzhiyun */
397*4882a593Smuzhiyun amount = skb->len;
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun rc = put_user(amount, (int __user *)arg);
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun break;
402*4882a593Smuzhiyun default:
403*4882a593Smuzhiyun rc = -ENOIOCTLCMD;
404*4882a593Smuzhiyun break;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun out:
407*4882a593Smuzhiyun release_sock(sk);
408*4882a593Smuzhiyun return rc;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_ioctl);
412*4882a593Smuzhiyun
dccp_setsockopt_service(struct sock * sk,const __be32 service,sockptr_t optval,unsigned int optlen)413*4882a593Smuzhiyun static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
414*4882a593Smuzhiyun sockptr_t optval, unsigned int optlen)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
417*4882a593Smuzhiyun struct dccp_service_list *sl = NULL;
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun if (service == DCCP_SERVICE_INVALID_VALUE ||
420*4882a593Smuzhiyun optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
421*4882a593Smuzhiyun return -EINVAL;
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun if (optlen > sizeof(service)) {
424*4882a593Smuzhiyun sl = kmalloc(optlen, GFP_KERNEL);
425*4882a593Smuzhiyun if (sl == NULL)
426*4882a593Smuzhiyun return -ENOMEM;
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun sl->dccpsl_nr = optlen / sizeof(u32) - 1;
429*4882a593Smuzhiyun if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
430*4882a593Smuzhiyun sizeof(service), optlen - sizeof(service)) ||
431*4882a593Smuzhiyun dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
432*4882a593Smuzhiyun kfree(sl);
433*4882a593Smuzhiyun return -EFAULT;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun lock_sock(sk);
438*4882a593Smuzhiyun dp->dccps_service = service;
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun kfree(dp->dccps_service_list);
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun dp->dccps_service_list = sl;
443*4882a593Smuzhiyun release_sock(sk);
444*4882a593Smuzhiyun return 0;
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun
dccp_setsockopt_cscov(struct sock * sk,int cscov,bool rx)447*4882a593Smuzhiyun static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
448*4882a593Smuzhiyun {
449*4882a593Smuzhiyun u8 *list, len;
450*4882a593Smuzhiyun int i, rc;
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun if (cscov < 0 || cscov > 15)
453*4882a593Smuzhiyun return -EINVAL;
454*4882a593Smuzhiyun /*
455*4882a593Smuzhiyun * Populate a list of permissible values, in the range cscov...15. This
456*4882a593Smuzhiyun * is necessary since feature negotiation of single values only works if
457*4882a593Smuzhiyun * both sides incidentally choose the same value. Since the list starts
458*4882a593Smuzhiyun * lowest-value first, negotiation will pick the smallest shared value.
459*4882a593Smuzhiyun */
460*4882a593Smuzhiyun if (cscov == 0)
461*4882a593Smuzhiyun return 0;
462*4882a593Smuzhiyun len = 16 - cscov;
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun list = kmalloc(len, GFP_KERNEL);
465*4882a593Smuzhiyun if (list == NULL)
466*4882a593Smuzhiyun return -ENOBUFS;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun for (i = 0; i < len; i++)
469*4882a593Smuzhiyun list[i] = cscov++;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun if (rc == 0) {
474*4882a593Smuzhiyun if (rx)
475*4882a593Smuzhiyun dccp_sk(sk)->dccps_pcrlen = cscov;
476*4882a593Smuzhiyun else
477*4882a593Smuzhiyun dccp_sk(sk)->dccps_pcslen = cscov;
478*4882a593Smuzhiyun }
479*4882a593Smuzhiyun kfree(list);
480*4882a593Smuzhiyun return rc;
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun
dccp_setsockopt_ccid(struct sock * sk,int type,sockptr_t optval,unsigned int optlen)483*4882a593Smuzhiyun static int dccp_setsockopt_ccid(struct sock *sk, int type,
484*4882a593Smuzhiyun sockptr_t optval, unsigned int optlen)
485*4882a593Smuzhiyun {
486*4882a593Smuzhiyun u8 *val;
487*4882a593Smuzhiyun int rc = 0;
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
490*4882a593Smuzhiyun return -EINVAL;
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun val = memdup_sockptr(optval, optlen);
493*4882a593Smuzhiyun if (IS_ERR(val))
494*4882a593Smuzhiyun return PTR_ERR(val);
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun lock_sock(sk);
497*4882a593Smuzhiyun if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
498*4882a593Smuzhiyun rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
501*4882a593Smuzhiyun rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
502*4882a593Smuzhiyun release_sock(sk);
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun kfree(val);
505*4882a593Smuzhiyun return rc;
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun
do_dccp_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)508*4882a593Smuzhiyun static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509*4882a593Smuzhiyun sockptr_t optval, unsigned int optlen)
510*4882a593Smuzhiyun {
511*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
512*4882a593Smuzhiyun int val, err = 0;
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun switch (optname) {
515*4882a593Smuzhiyun case DCCP_SOCKOPT_PACKET_SIZE:
516*4882a593Smuzhiyun DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
517*4882a593Smuzhiyun return 0;
518*4882a593Smuzhiyun case DCCP_SOCKOPT_CHANGE_L:
519*4882a593Smuzhiyun case DCCP_SOCKOPT_CHANGE_R:
520*4882a593Smuzhiyun DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
521*4882a593Smuzhiyun return 0;
522*4882a593Smuzhiyun case DCCP_SOCKOPT_CCID:
523*4882a593Smuzhiyun case DCCP_SOCKOPT_RX_CCID:
524*4882a593Smuzhiyun case DCCP_SOCKOPT_TX_CCID:
525*4882a593Smuzhiyun return dccp_setsockopt_ccid(sk, optname, optval, optlen);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun if (optlen < (int)sizeof(int))
529*4882a593Smuzhiyun return -EINVAL;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun if (copy_from_sockptr(&val, optval, sizeof(int)))
532*4882a593Smuzhiyun return -EFAULT;
533*4882a593Smuzhiyun
534*4882a593Smuzhiyun if (optname == DCCP_SOCKOPT_SERVICE)
535*4882a593Smuzhiyun return dccp_setsockopt_service(sk, val, optval, optlen);
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun lock_sock(sk);
538*4882a593Smuzhiyun switch (optname) {
539*4882a593Smuzhiyun case DCCP_SOCKOPT_SERVER_TIMEWAIT:
540*4882a593Smuzhiyun if (dp->dccps_role != DCCP_ROLE_SERVER)
541*4882a593Smuzhiyun err = -EOPNOTSUPP;
542*4882a593Smuzhiyun else
543*4882a593Smuzhiyun dp->dccps_server_timewait = (val != 0);
544*4882a593Smuzhiyun break;
545*4882a593Smuzhiyun case DCCP_SOCKOPT_SEND_CSCOV:
546*4882a593Smuzhiyun err = dccp_setsockopt_cscov(sk, val, false);
547*4882a593Smuzhiyun break;
548*4882a593Smuzhiyun case DCCP_SOCKOPT_RECV_CSCOV:
549*4882a593Smuzhiyun err = dccp_setsockopt_cscov(sk, val, true);
550*4882a593Smuzhiyun break;
551*4882a593Smuzhiyun case DCCP_SOCKOPT_QPOLICY_ID:
552*4882a593Smuzhiyun if (sk->sk_state != DCCP_CLOSED)
553*4882a593Smuzhiyun err = -EISCONN;
554*4882a593Smuzhiyun else if (val < 0 || val >= DCCPQ_POLICY_MAX)
555*4882a593Smuzhiyun err = -EINVAL;
556*4882a593Smuzhiyun else
557*4882a593Smuzhiyun dp->dccps_qpolicy = val;
558*4882a593Smuzhiyun break;
559*4882a593Smuzhiyun case DCCP_SOCKOPT_QPOLICY_TXQLEN:
560*4882a593Smuzhiyun if (val < 0)
561*4882a593Smuzhiyun err = -EINVAL;
562*4882a593Smuzhiyun else
563*4882a593Smuzhiyun dp->dccps_tx_qlen = val;
564*4882a593Smuzhiyun break;
565*4882a593Smuzhiyun default:
566*4882a593Smuzhiyun err = -ENOPROTOOPT;
567*4882a593Smuzhiyun break;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun release_sock(sk);
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun return err;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
dccp_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)574*4882a593Smuzhiyun int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
575*4882a593Smuzhiyun unsigned int optlen)
576*4882a593Smuzhiyun {
577*4882a593Smuzhiyun if (level != SOL_DCCP)
578*4882a593Smuzhiyun return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
579*4882a593Smuzhiyun optname, optval,
580*4882a593Smuzhiyun optlen);
581*4882a593Smuzhiyun return do_dccp_setsockopt(sk, level, optname, optval, optlen);
582*4882a593Smuzhiyun }
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_setsockopt);
585*4882a593Smuzhiyun
dccp_getsockopt_service(struct sock * sk,int len,__be32 __user * optval,int __user * optlen)586*4882a593Smuzhiyun static int dccp_getsockopt_service(struct sock *sk, int len,
587*4882a593Smuzhiyun __be32 __user *optval,
588*4882a593Smuzhiyun int __user *optlen)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun const struct dccp_sock *dp = dccp_sk(sk);
591*4882a593Smuzhiyun const struct dccp_service_list *sl;
592*4882a593Smuzhiyun int err = -ENOENT, slen = 0, total_len = sizeof(u32);
593*4882a593Smuzhiyun
594*4882a593Smuzhiyun lock_sock(sk);
595*4882a593Smuzhiyun if ((sl = dp->dccps_service_list) != NULL) {
596*4882a593Smuzhiyun slen = sl->dccpsl_nr * sizeof(u32);
597*4882a593Smuzhiyun total_len += slen;
598*4882a593Smuzhiyun }
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun err = -EINVAL;
601*4882a593Smuzhiyun if (total_len > len)
602*4882a593Smuzhiyun goto out;
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun err = 0;
605*4882a593Smuzhiyun if (put_user(total_len, optlen) ||
606*4882a593Smuzhiyun put_user(dp->dccps_service, optval) ||
607*4882a593Smuzhiyun (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
608*4882a593Smuzhiyun err = -EFAULT;
609*4882a593Smuzhiyun out:
610*4882a593Smuzhiyun release_sock(sk);
611*4882a593Smuzhiyun return err;
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun
do_dccp_getsockopt(struct sock * sk,int level,int optname,char __user * optval,int __user * optlen)614*4882a593Smuzhiyun static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
615*4882a593Smuzhiyun char __user *optval, int __user *optlen)
616*4882a593Smuzhiyun {
617*4882a593Smuzhiyun struct dccp_sock *dp;
618*4882a593Smuzhiyun int val, len;
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun if (get_user(len, optlen))
621*4882a593Smuzhiyun return -EFAULT;
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun if (len < (int)sizeof(int))
624*4882a593Smuzhiyun return -EINVAL;
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun dp = dccp_sk(sk);
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun switch (optname) {
629*4882a593Smuzhiyun case DCCP_SOCKOPT_PACKET_SIZE:
630*4882a593Smuzhiyun DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
631*4882a593Smuzhiyun return 0;
632*4882a593Smuzhiyun case DCCP_SOCKOPT_SERVICE:
633*4882a593Smuzhiyun return dccp_getsockopt_service(sk, len,
634*4882a593Smuzhiyun (__be32 __user *)optval, optlen);
635*4882a593Smuzhiyun case DCCP_SOCKOPT_GET_CUR_MPS:
636*4882a593Smuzhiyun val = dp->dccps_mss_cache;
637*4882a593Smuzhiyun break;
638*4882a593Smuzhiyun case DCCP_SOCKOPT_AVAILABLE_CCIDS:
639*4882a593Smuzhiyun return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
640*4882a593Smuzhiyun case DCCP_SOCKOPT_TX_CCID:
641*4882a593Smuzhiyun val = ccid_get_current_tx_ccid(dp);
642*4882a593Smuzhiyun if (val < 0)
643*4882a593Smuzhiyun return -ENOPROTOOPT;
644*4882a593Smuzhiyun break;
645*4882a593Smuzhiyun case DCCP_SOCKOPT_RX_CCID:
646*4882a593Smuzhiyun val = ccid_get_current_rx_ccid(dp);
647*4882a593Smuzhiyun if (val < 0)
648*4882a593Smuzhiyun return -ENOPROTOOPT;
649*4882a593Smuzhiyun break;
650*4882a593Smuzhiyun case DCCP_SOCKOPT_SERVER_TIMEWAIT:
651*4882a593Smuzhiyun val = dp->dccps_server_timewait;
652*4882a593Smuzhiyun break;
653*4882a593Smuzhiyun case DCCP_SOCKOPT_SEND_CSCOV:
654*4882a593Smuzhiyun val = dp->dccps_pcslen;
655*4882a593Smuzhiyun break;
656*4882a593Smuzhiyun case DCCP_SOCKOPT_RECV_CSCOV:
657*4882a593Smuzhiyun val = dp->dccps_pcrlen;
658*4882a593Smuzhiyun break;
659*4882a593Smuzhiyun case DCCP_SOCKOPT_QPOLICY_ID:
660*4882a593Smuzhiyun val = dp->dccps_qpolicy;
661*4882a593Smuzhiyun break;
662*4882a593Smuzhiyun case DCCP_SOCKOPT_QPOLICY_TXQLEN:
663*4882a593Smuzhiyun val = dp->dccps_tx_qlen;
664*4882a593Smuzhiyun break;
665*4882a593Smuzhiyun case 128 ... 191:
666*4882a593Smuzhiyun return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
667*4882a593Smuzhiyun len, (u32 __user *)optval, optlen);
668*4882a593Smuzhiyun case 192 ... 255:
669*4882a593Smuzhiyun return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
670*4882a593Smuzhiyun len, (u32 __user *)optval, optlen);
671*4882a593Smuzhiyun default:
672*4882a593Smuzhiyun return -ENOPROTOOPT;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun len = sizeof(val);
676*4882a593Smuzhiyun if (put_user(len, optlen) || copy_to_user(optval, &val, len))
677*4882a593Smuzhiyun return -EFAULT;
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun return 0;
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun
dccp_getsockopt(struct sock * sk,int level,int optname,char __user * optval,int __user * optlen)682*4882a593Smuzhiyun int dccp_getsockopt(struct sock *sk, int level, int optname,
683*4882a593Smuzhiyun char __user *optval, int __user *optlen)
684*4882a593Smuzhiyun {
685*4882a593Smuzhiyun if (level != SOL_DCCP)
686*4882a593Smuzhiyun return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687*4882a593Smuzhiyun optname, optval,
688*4882a593Smuzhiyun optlen);
689*4882a593Smuzhiyun return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_getsockopt);
693*4882a593Smuzhiyun
dccp_msghdr_parse(struct msghdr * msg,struct sk_buff * skb)694*4882a593Smuzhiyun static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun struct cmsghdr *cmsg;
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun /*
699*4882a593Smuzhiyun * Assign an (opaque) qpolicy priority value to skb->priority.
700*4882a593Smuzhiyun *
701*4882a593Smuzhiyun * We are overloading this skb field for use with the qpolicy subystem.
702*4882a593Smuzhiyun * The skb->priority is normally used for the SO_PRIORITY option, which
703*4882a593Smuzhiyun * is initialised from sk_priority. Since the assignment of sk_priority
704*4882a593Smuzhiyun * to skb->priority happens later (on layer 3), we overload this field
705*4882a593Smuzhiyun * for use with queueing priorities as long as the skb is on layer 4.
706*4882a593Smuzhiyun * The default priority value (if nothing is set) is 0.
707*4882a593Smuzhiyun */
708*4882a593Smuzhiyun skb->priority = 0;
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun for_each_cmsghdr(cmsg, msg) {
711*4882a593Smuzhiyun if (!CMSG_OK(msg, cmsg))
712*4882a593Smuzhiyun return -EINVAL;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun if (cmsg->cmsg_level != SOL_DCCP)
715*4882a593Smuzhiyun continue;
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
718*4882a593Smuzhiyun !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
719*4882a593Smuzhiyun return -EINVAL;
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun switch (cmsg->cmsg_type) {
722*4882a593Smuzhiyun case DCCP_SCM_PRIORITY:
723*4882a593Smuzhiyun if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
724*4882a593Smuzhiyun return -EINVAL;
725*4882a593Smuzhiyun skb->priority = *(__u32 *)CMSG_DATA(cmsg);
726*4882a593Smuzhiyun break;
727*4882a593Smuzhiyun default:
728*4882a593Smuzhiyun return -EINVAL;
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun return 0;
732*4882a593Smuzhiyun }
733*4882a593Smuzhiyun
dccp_sendmsg(struct sock * sk,struct msghdr * msg,size_t len)734*4882a593Smuzhiyun int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
735*4882a593Smuzhiyun {
736*4882a593Smuzhiyun const struct dccp_sock *dp = dccp_sk(sk);
737*4882a593Smuzhiyun const int flags = msg->msg_flags;
738*4882a593Smuzhiyun const int noblock = flags & MSG_DONTWAIT;
739*4882a593Smuzhiyun struct sk_buff *skb;
740*4882a593Smuzhiyun int rc, size;
741*4882a593Smuzhiyun long timeo;
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun trace_dccp_probe(sk, len);
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun if (len > dp->dccps_mss_cache)
746*4882a593Smuzhiyun return -EMSGSIZE;
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun lock_sock(sk);
749*4882a593Smuzhiyun
750*4882a593Smuzhiyun timeo = sock_sndtimeo(sk, noblock);
751*4882a593Smuzhiyun
752*4882a593Smuzhiyun /*
753*4882a593Smuzhiyun * We have to use sk_stream_wait_connect here to set sk_write_pending,
754*4882a593Smuzhiyun * so that the trick in dccp_rcv_request_sent_state_process.
755*4882a593Smuzhiyun */
756*4882a593Smuzhiyun /* Wait for a connection to finish. */
757*4882a593Smuzhiyun if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
758*4882a593Smuzhiyun if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
759*4882a593Smuzhiyun goto out_release;
760*4882a593Smuzhiyun
761*4882a593Smuzhiyun size = sk->sk_prot->max_header + len;
762*4882a593Smuzhiyun release_sock(sk);
763*4882a593Smuzhiyun skb = sock_alloc_send_skb(sk, size, noblock, &rc);
764*4882a593Smuzhiyun lock_sock(sk);
765*4882a593Smuzhiyun if (skb == NULL)
766*4882a593Smuzhiyun goto out_release;
767*4882a593Smuzhiyun
768*4882a593Smuzhiyun if (dccp_qpolicy_full(sk)) {
769*4882a593Smuzhiyun rc = -EAGAIN;
770*4882a593Smuzhiyun goto out_discard;
771*4882a593Smuzhiyun }
772*4882a593Smuzhiyun
773*4882a593Smuzhiyun if (sk->sk_state == DCCP_CLOSED) {
774*4882a593Smuzhiyun rc = -ENOTCONN;
775*4882a593Smuzhiyun goto out_discard;
776*4882a593Smuzhiyun }
777*4882a593Smuzhiyun
778*4882a593Smuzhiyun skb_reserve(skb, sk->sk_prot->max_header);
779*4882a593Smuzhiyun rc = memcpy_from_msg(skb_put(skb, len), msg, len);
780*4882a593Smuzhiyun if (rc != 0)
781*4882a593Smuzhiyun goto out_discard;
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun rc = dccp_msghdr_parse(msg, skb);
784*4882a593Smuzhiyun if (rc != 0)
785*4882a593Smuzhiyun goto out_discard;
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun dccp_qpolicy_push(sk, skb);
788*4882a593Smuzhiyun /*
789*4882a593Smuzhiyun * The xmit_timer is set if the TX CCID is rate-based and will expire
790*4882a593Smuzhiyun * when congestion control permits to release further packets into the
791*4882a593Smuzhiyun * network. Window-based CCIDs do not use this timer.
792*4882a593Smuzhiyun */
793*4882a593Smuzhiyun if (!timer_pending(&dp->dccps_xmit_timer))
794*4882a593Smuzhiyun dccp_write_xmit(sk);
795*4882a593Smuzhiyun out_release:
796*4882a593Smuzhiyun release_sock(sk);
797*4882a593Smuzhiyun return rc ? : len;
798*4882a593Smuzhiyun out_discard:
799*4882a593Smuzhiyun kfree_skb(skb);
800*4882a593Smuzhiyun goto out_release;
801*4882a593Smuzhiyun }
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_sendmsg);
804*4882a593Smuzhiyun
dccp_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int nonblock,int flags,int * addr_len)805*4882a593Smuzhiyun int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
806*4882a593Smuzhiyun int flags, int *addr_len)
807*4882a593Smuzhiyun {
808*4882a593Smuzhiyun const struct dccp_hdr *dh;
809*4882a593Smuzhiyun long timeo;
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun lock_sock(sk);
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun if (sk->sk_state == DCCP_LISTEN) {
814*4882a593Smuzhiyun len = -ENOTCONN;
815*4882a593Smuzhiyun goto out;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun timeo = sock_rcvtimeo(sk, nonblock);
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun do {
821*4882a593Smuzhiyun struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun if (skb == NULL)
824*4882a593Smuzhiyun goto verify_sock_status;
825*4882a593Smuzhiyun
826*4882a593Smuzhiyun dh = dccp_hdr(skb);
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun switch (dh->dccph_type) {
829*4882a593Smuzhiyun case DCCP_PKT_DATA:
830*4882a593Smuzhiyun case DCCP_PKT_DATAACK:
831*4882a593Smuzhiyun goto found_ok_skb;
832*4882a593Smuzhiyun
833*4882a593Smuzhiyun case DCCP_PKT_CLOSE:
834*4882a593Smuzhiyun case DCCP_PKT_CLOSEREQ:
835*4882a593Smuzhiyun if (!(flags & MSG_PEEK))
836*4882a593Smuzhiyun dccp_finish_passive_close(sk);
837*4882a593Smuzhiyun fallthrough;
838*4882a593Smuzhiyun case DCCP_PKT_RESET:
839*4882a593Smuzhiyun dccp_pr_debug("found fin (%s) ok!\n",
840*4882a593Smuzhiyun dccp_packet_name(dh->dccph_type));
841*4882a593Smuzhiyun len = 0;
842*4882a593Smuzhiyun goto found_fin_ok;
843*4882a593Smuzhiyun default:
844*4882a593Smuzhiyun dccp_pr_debug("packet_type=%s\n",
845*4882a593Smuzhiyun dccp_packet_name(dh->dccph_type));
846*4882a593Smuzhiyun sk_eat_skb(sk, skb);
847*4882a593Smuzhiyun }
848*4882a593Smuzhiyun verify_sock_status:
849*4882a593Smuzhiyun if (sock_flag(sk, SOCK_DONE)) {
850*4882a593Smuzhiyun len = 0;
851*4882a593Smuzhiyun break;
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun if (sk->sk_err) {
855*4882a593Smuzhiyun len = sock_error(sk);
856*4882a593Smuzhiyun break;
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun if (sk->sk_shutdown & RCV_SHUTDOWN) {
860*4882a593Smuzhiyun len = 0;
861*4882a593Smuzhiyun break;
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun
864*4882a593Smuzhiyun if (sk->sk_state == DCCP_CLOSED) {
865*4882a593Smuzhiyun if (!sock_flag(sk, SOCK_DONE)) {
866*4882a593Smuzhiyun /* This occurs when user tries to read
867*4882a593Smuzhiyun * from never connected socket.
868*4882a593Smuzhiyun */
869*4882a593Smuzhiyun len = -ENOTCONN;
870*4882a593Smuzhiyun break;
871*4882a593Smuzhiyun }
872*4882a593Smuzhiyun len = 0;
873*4882a593Smuzhiyun break;
874*4882a593Smuzhiyun }
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun if (!timeo) {
877*4882a593Smuzhiyun len = -EAGAIN;
878*4882a593Smuzhiyun break;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun if (signal_pending(current)) {
882*4882a593Smuzhiyun len = sock_intr_errno(timeo);
883*4882a593Smuzhiyun break;
884*4882a593Smuzhiyun }
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun sk_wait_data(sk, &timeo, NULL);
887*4882a593Smuzhiyun continue;
888*4882a593Smuzhiyun found_ok_skb:
889*4882a593Smuzhiyun if (len > skb->len)
890*4882a593Smuzhiyun len = skb->len;
891*4882a593Smuzhiyun else if (len < skb->len)
892*4882a593Smuzhiyun msg->msg_flags |= MSG_TRUNC;
893*4882a593Smuzhiyun
894*4882a593Smuzhiyun if (skb_copy_datagram_msg(skb, 0, msg, len)) {
895*4882a593Smuzhiyun /* Exception. Bailout! */
896*4882a593Smuzhiyun len = -EFAULT;
897*4882a593Smuzhiyun break;
898*4882a593Smuzhiyun }
899*4882a593Smuzhiyun if (flags & MSG_TRUNC)
900*4882a593Smuzhiyun len = skb->len;
901*4882a593Smuzhiyun found_fin_ok:
902*4882a593Smuzhiyun if (!(flags & MSG_PEEK))
903*4882a593Smuzhiyun sk_eat_skb(sk, skb);
904*4882a593Smuzhiyun break;
905*4882a593Smuzhiyun } while (1);
906*4882a593Smuzhiyun out:
907*4882a593Smuzhiyun release_sock(sk);
908*4882a593Smuzhiyun return len;
909*4882a593Smuzhiyun }
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_recvmsg);
912*4882a593Smuzhiyun
inet_dccp_listen(struct socket * sock,int backlog)913*4882a593Smuzhiyun int inet_dccp_listen(struct socket *sock, int backlog)
914*4882a593Smuzhiyun {
915*4882a593Smuzhiyun struct sock *sk = sock->sk;
916*4882a593Smuzhiyun unsigned char old_state;
917*4882a593Smuzhiyun int err;
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun lock_sock(sk);
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun err = -EINVAL;
922*4882a593Smuzhiyun if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
923*4882a593Smuzhiyun goto out;
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun old_state = sk->sk_state;
926*4882a593Smuzhiyun if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
927*4882a593Smuzhiyun goto out;
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
930*4882a593Smuzhiyun /* Really, if the socket is already in listen state
931*4882a593Smuzhiyun * we can only allow the backlog to be adjusted.
932*4882a593Smuzhiyun */
933*4882a593Smuzhiyun if (old_state != DCCP_LISTEN) {
934*4882a593Smuzhiyun /*
935*4882a593Smuzhiyun * FIXME: here it probably should be sk->sk_prot->listen_start
936*4882a593Smuzhiyun * see tcp_listen_start
937*4882a593Smuzhiyun */
938*4882a593Smuzhiyun err = dccp_listen_start(sk, backlog);
939*4882a593Smuzhiyun if (err)
940*4882a593Smuzhiyun goto out;
941*4882a593Smuzhiyun }
942*4882a593Smuzhiyun err = 0;
943*4882a593Smuzhiyun
944*4882a593Smuzhiyun out:
945*4882a593Smuzhiyun release_sock(sk);
946*4882a593Smuzhiyun return err;
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(inet_dccp_listen);
950*4882a593Smuzhiyun
dccp_terminate_connection(struct sock * sk)951*4882a593Smuzhiyun static void dccp_terminate_connection(struct sock *sk)
952*4882a593Smuzhiyun {
953*4882a593Smuzhiyun u8 next_state = DCCP_CLOSED;
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun switch (sk->sk_state) {
956*4882a593Smuzhiyun case DCCP_PASSIVE_CLOSE:
957*4882a593Smuzhiyun case DCCP_PASSIVE_CLOSEREQ:
958*4882a593Smuzhiyun dccp_finish_passive_close(sk);
959*4882a593Smuzhiyun break;
960*4882a593Smuzhiyun case DCCP_PARTOPEN:
961*4882a593Smuzhiyun dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
962*4882a593Smuzhiyun inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
963*4882a593Smuzhiyun fallthrough;
964*4882a593Smuzhiyun case DCCP_OPEN:
965*4882a593Smuzhiyun dccp_send_close(sk, 1);
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
968*4882a593Smuzhiyun !dccp_sk(sk)->dccps_server_timewait)
969*4882a593Smuzhiyun next_state = DCCP_ACTIVE_CLOSEREQ;
970*4882a593Smuzhiyun else
971*4882a593Smuzhiyun next_state = DCCP_CLOSING;
972*4882a593Smuzhiyun fallthrough;
973*4882a593Smuzhiyun default:
974*4882a593Smuzhiyun dccp_set_state(sk, next_state);
975*4882a593Smuzhiyun }
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun
dccp_close(struct sock * sk,long timeout)978*4882a593Smuzhiyun void dccp_close(struct sock *sk, long timeout)
979*4882a593Smuzhiyun {
980*4882a593Smuzhiyun struct dccp_sock *dp = dccp_sk(sk);
981*4882a593Smuzhiyun struct sk_buff *skb;
982*4882a593Smuzhiyun u32 data_was_unread = 0;
983*4882a593Smuzhiyun int state;
984*4882a593Smuzhiyun
985*4882a593Smuzhiyun lock_sock(sk);
986*4882a593Smuzhiyun
987*4882a593Smuzhiyun sk->sk_shutdown = SHUTDOWN_MASK;
988*4882a593Smuzhiyun
989*4882a593Smuzhiyun if (sk->sk_state == DCCP_LISTEN) {
990*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSED);
991*4882a593Smuzhiyun
992*4882a593Smuzhiyun /* Special case. */
993*4882a593Smuzhiyun inet_csk_listen_stop(sk);
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun goto adjudge_to_death;
996*4882a593Smuzhiyun }
997*4882a593Smuzhiyun
998*4882a593Smuzhiyun sk_stop_timer(sk, &dp->dccps_xmit_timer);
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun /*
1001*4882a593Smuzhiyun * We need to flush the recv. buffs. We do this only on the
1002*4882a593Smuzhiyun * descriptor close, not protocol-sourced closes, because the
1003*4882a593Smuzhiyun *reader process may not have drained the data yet!
1004*4882a593Smuzhiyun */
1005*4882a593Smuzhiyun while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1006*4882a593Smuzhiyun data_was_unread += skb->len;
1007*4882a593Smuzhiyun __kfree_skb(skb);
1008*4882a593Smuzhiyun }
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun /* If socket has been already reset kill it. */
1011*4882a593Smuzhiyun if (sk->sk_state == DCCP_CLOSED)
1012*4882a593Smuzhiyun goto adjudge_to_death;
1013*4882a593Smuzhiyun
1014*4882a593Smuzhiyun if (data_was_unread) {
1015*4882a593Smuzhiyun /* Unread data was tossed, send an appropriate Reset Code */
1016*4882a593Smuzhiyun DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1017*4882a593Smuzhiyun dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1018*4882a593Smuzhiyun dccp_set_state(sk, DCCP_CLOSED);
1019*4882a593Smuzhiyun } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1020*4882a593Smuzhiyun /* Check zero linger _after_ checking for unread data. */
1021*4882a593Smuzhiyun sk->sk_prot->disconnect(sk, 0);
1022*4882a593Smuzhiyun } else if (sk->sk_state != DCCP_CLOSED) {
1023*4882a593Smuzhiyun /*
1024*4882a593Smuzhiyun * Normal connection termination. May need to wait if there are
1025*4882a593Smuzhiyun * still packets in the TX queue that are delayed by the CCID.
1026*4882a593Smuzhiyun */
1027*4882a593Smuzhiyun dccp_flush_write_queue(sk, &timeout);
1028*4882a593Smuzhiyun dccp_terminate_connection(sk);
1029*4882a593Smuzhiyun }
1030*4882a593Smuzhiyun
1031*4882a593Smuzhiyun /*
1032*4882a593Smuzhiyun * Flush write queue. This may be necessary in several cases:
1033*4882a593Smuzhiyun * - we have been closed by the peer but still have application data;
1034*4882a593Smuzhiyun * - abortive termination (unread data or zero linger time),
1035*4882a593Smuzhiyun * - normal termination but queue could not be flushed within time limit
1036*4882a593Smuzhiyun */
1037*4882a593Smuzhiyun __skb_queue_purge(&sk->sk_write_queue);
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun sk_stream_wait_close(sk, timeout);
1040*4882a593Smuzhiyun
1041*4882a593Smuzhiyun adjudge_to_death:
1042*4882a593Smuzhiyun state = sk->sk_state;
1043*4882a593Smuzhiyun sock_hold(sk);
1044*4882a593Smuzhiyun sock_orphan(sk);
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun /*
1047*4882a593Smuzhiyun * It is the last release_sock in its life. It will remove backlog.
1048*4882a593Smuzhiyun */
1049*4882a593Smuzhiyun release_sock(sk);
1050*4882a593Smuzhiyun /*
1051*4882a593Smuzhiyun * Now socket is owned by kernel and we acquire BH lock
1052*4882a593Smuzhiyun * to finish close. No need to check for user refs.
1053*4882a593Smuzhiyun */
1054*4882a593Smuzhiyun local_bh_disable();
1055*4882a593Smuzhiyun bh_lock_sock(sk);
1056*4882a593Smuzhiyun WARN_ON(sock_owned_by_user(sk));
1057*4882a593Smuzhiyun
1058*4882a593Smuzhiyun percpu_counter_inc(sk->sk_prot->orphan_count);
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun /* Have we already been destroyed by a softirq or backlog? */
1061*4882a593Smuzhiyun if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1062*4882a593Smuzhiyun goto out;
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun if (sk->sk_state == DCCP_CLOSED)
1065*4882a593Smuzhiyun inet_csk_destroy_sock(sk);
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun /* Otherwise, socket is reprieved until protocol close. */
1068*4882a593Smuzhiyun
1069*4882a593Smuzhiyun out:
1070*4882a593Smuzhiyun bh_unlock_sock(sk);
1071*4882a593Smuzhiyun local_bh_enable();
1072*4882a593Smuzhiyun sock_put(sk);
1073*4882a593Smuzhiyun }
1074*4882a593Smuzhiyun
1075*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_close);
1076*4882a593Smuzhiyun
dccp_shutdown(struct sock * sk,int how)1077*4882a593Smuzhiyun void dccp_shutdown(struct sock *sk, int how)
1078*4882a593Smuzhiyun {
1079*4882a593Smuzhiyun dccp_pr_debug("called shutdown(%x)\n", how);
1080*4882a593Smuzhiyun }
1081*4882a593Smuzhiyun
1082*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_shutdown);
1083*4882a593Smuzhiyun
dccp_mib_init(void)1084*4882a593Smuzhiyun static inline int __init dccp_mib_init(void)
1085*4882a593Smuzhiyun {
1086*4882a593Smuzhiyun dccp_statistics = alloc_percpu(struct dccp_mib);
1087*4882a593Smuzhiyun if (!dccp_statistics)
1088*4882a593Smuzhiyun return -ENOMEM;
1089*4882a593Smuzhiyun return 0;
1090*4882a593Smuzhiyun }
1091*4882a593Smuzhiyun
dccp_mib_exit(void)1092*4882a593Smuzhiyun static inline void dccp_mib_exit(void)
1093*4882a593Smuzhiyun {
1094*4882a593Smuzhiyun free_percpu(dccp_statistics);
1095*4882a593Smuzhiyun }
1096*4882a593Smuzhiyun
1097*4882a593Smuzhiyun static int thash_entries;
1098*4882a593Smuzhiyun module_param(thash_entries, int, 0444);
1099*4882a593Smuzhiyun MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun #ifdef CONFIG_IP_DCCP_DEBUG
1102*4882a593Smuzhiyun bool dccp_debug;
1103*4882a593Smuzhiyun module_param(dccp_debug, bool, 0644);
1104*4882a593Smuzhiyun MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105*4882a593Smuzhiyun
1106*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dccp_debug);
1107*4882a593Smuzhiyun #endif
1108*4882a593Smuzhiyun
dccp_init(void)1109*4882a593Smuzhiyun static int __init dccp_init(void)
1110*4882a593Smuzhiyun {
1111*4882a593Smuzhiyun unsigned long goal;
1112*4882a593Smuzhiyun unsigned long nr_pages = totalram_pages();
1113*4882a593Smuzhiyun int ehash_order, bhash_order, i;
1114*4882a593Smuzhiyun int rc;
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1117*4882a593Smuzhiyun sizeof_field(struct sk_buff, cb));
1118*4882a593Smuzhiyun rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1119*4882a593Smuzhiyun if (rc)
1120*4882a593Smuzhiyun goto out_fail;
1121*4882a593Smuzhiyun inet_hashinfo_init(&dccp_hashinfo);
1122*4882a593Smuzhiyun rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1123*4882a593Smuzhiyun if (rc)
1124*4882a593Smuzhiyun goto out_free_percpu;
1125*4882a593Smuzhiyun rc = -ENOBUFS;
1126*4882a593Smuzhiyun dccp_hashinfo.bind_bucket_cachep =
1127*4882a593Smuzhiyun kmem_cache_create("dccp_bind_bucket",
1128*4882a593Smuzhiyun sizeof(struct inet_bind_bucket), 0,
1129*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN, NULL);
1130*4882a593Smuzhiyun if (!dccp_hashinfo.bind_bucket_cachep)
1131*4882a593Smuzhiyun goto out_free_hashinfo2;
1132*4882a593Smuzhiyun
1133*4882a593Smuzhiyun /*
1134*4882a593Smuzhiyun * Size and allocate the main established and bind bucket
1135*4882a593Smuzhiyun * hash tables.
1136*4882a593Smuzhiyun *
1137*4882a593Smuzhiyun * The methodology is similar to that of the buffer cache.
1138*4882a593Smuzhiyun */
1139*4882a593Smuzhiyun if (nr_pages >= (128 * 1024))
1140*4882a593Smuzhiyun goal = nr_pages >> (21 - PAGE_SHIFT);
1141*4882a593Smuzhiyun else
1142*4882a593Smuzhiyun goal = nr_pages >> (23 - PAGE_SHIFT);
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun if (thash_entries)
1145*4882a593Smuzhiyun goal = (thash_entries *
1146*4882a593Smuzhiyun sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1147*4882a593Smuzhiyun for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1148*4882a593Smuzhiyun ;
1149*4882a593Smuzhiyun do {
1150*4882a593Smuzhiyun unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1151*4882a593Smuzhiyun sizeof(struct inet_ehash_bucket);
1152*4882a593Smuzhiyun
1153*4882a593Smuzhiyun while (hash_size & (hash_size - 1))
1154*4882a593Smuzhiyun hash_size--;
1155*4882a593Smuzhiyun dccp_hashinfo.ehash_mask = hash_size - 1;
1156*4882a593Smuzhiyun dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1157*4882a593Smuzhiyun __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1158*4882a593Smuzhiyun } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1159*4882a593Smuzhiyun
1160*4882a593Smuzhiyun if (!dccp_hashinfo.ehash) {
1161*4882a593Smuzhiyun DCCP_CRIT("Failed to allocate DCCP established hash table");
1162*4882a593Smuzhiyun goto out_free_bind_bucket_cachep;
1163*4882a593Smuzhiyun }
1164*4882a593Smuzhiyun
1165*4882a593Smuzhiyun for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1166*4882a593Smuzhiyun INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun if (inet_ehash_locks_alloc(&dccp_hashinfo))
1169*4882a593Smuzhiyun goto out_free_dccp_ehash;
1170*4882a593Smuzhiyun
1171*4882a593Smuzhiyun bhash_order = ehash_order;
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun do {
1174*4882a593Smuzhiyun dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1175*4882a593Smuzhiyun sizeof(struct inet_bind_hashbucket);
1176*4882a593Smuzhiyun if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1177*4882a593Smuzhiyun bhash_order > 0)
1178*4882a593Smuzhiyun continue;
1179*4882a593Smuzhiyun dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1180*4882a593Smuzhiyun __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1181*4882a593Smuzhiyun } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun if (!dccp_hashinfo.bhash) {
1184*4882a593Smuzhiyun DCCP_CRIT("Failed to allocate DCCP bind hash table");
1185*4882a593Smuzhiyun goto out_free_dccp_locks;
1186*4882a593Smuzhiyun }
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1189*4882a593Smuzhiyun spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1190*4882a593Smuzhiyun INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1191*4882a593Smuzhiyun }
1192*4882a593Smuzhiyun
1193*4882a593Smuzhiyun rc = dccp_mib_init();
1194*4882a593Smuzhiyun if (rc)
1195*4882a593Smuzhiyun goto out_free_dccp_bhash;
1196*4882a593Smuzhiyun
1197*4882a593Smuzhiyun rc = dccp_ackvec_init();
1198*4882a593Smuzhiyun if (rc)
1199*4882a593Smuzhiyun goto out_free_dccp_mib;
1200*4882a593Smuzhiyun
1201*4882a593Smuzhiyun rc = dccp_sysctl_init();
1202*4882a593Smuzhiyun if (rc)
1203*4882a593Smuzhiyun goto out_ackvec_exit;
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun rc = ccid_initialize_builtins();
1206*4882a593Smuzhiyun if (rc)
1207*4882a593Smuzhiyun goto out_sysctl_exit;
1208*4882a593Smuzhiyun
1209*4882a593Smuzhiyun dccp_timestamping_init();
1210*4882a593Smuzhiyun
1211*4882a593Smuzhiyun return 0;
1212*4882a593Smuzhiyun
1213*4882a593Smuzhiyun out_sysctl_exit:
1214*4882a593Smuzhiyun dccp_sysctl_exit();
1215*4882a593Smuzhiyun out_ackvec_exit:
1216*4882a593Smuzhiyun dccp_ackvec_exit();
1217*4882a593Smuzhiyun out_free_dccp_mib:
1218*4882a593Smuzhiyun dccp_mib_exit();
1219*4882a593Smuzhiyun out_free_dccp_bhash:
1220*4882a593Smuzhiyun free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1221*4882a593Smuzhiyun out_free_dccp_locks:
1222*4882a593Smuzhiyun inet_ehash_locks_free(&dccp_hashinfo);
1223*4882a593Smuzhiyun out_free_dccp_ehash:
1224*4882a593Smuzhiyun free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1225*4882a593Smuzhiyun out_free_bind_bucket_cachep:
1226*4882a593Smuzhiyun kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1227*4882a593Smuzhiyun out_free_hashinfo2:
1228*4882a593Smuzhiyun inet_hashinfo2_free_mod(&dccp_hashinfo);
1229*4882a593Smuzhiyun out_free_percpu:
1230*4882a593Smuzhiyun percpu_counter_destroy(&dccp_orphan_count);
1231*4882a593Smuzhiyun out_fail:
1232*4882a593Smuzhiyun dccp_hashinfo.bhash = NULL;
1233*4882a593Smuzhiyun dccp_hashinfo.ehash = NULL;
1234*4882a593Smuzhiyun dccp_hashinfo.bind_bucket_cachep = NULL;
1235*4882a593Smuzhiyun return rc;
1236*4882a593Smuzhiyun }
1237*4882a593Smuzhiyun
dccp_fini(void)1238*4882a593Smuzhiyun static void __exit dccp_fini(void)
1239*4882a593Smuzhiyun {
1240*4882a593Smuzhiyun ccid_cleanup_builtins();
1241*4882a593Smuzhiyun dccp_mib_exit();
1242*4882a593Smuzhiyun free_pages((unsigned long)dccp_hashinfo.bhash,
1243*4882a593Smuzhiyun get_order(dccp_hashinfo.bhash_size *
1244*4882a593Smuzhiyun sizeof(struct inet_bind_hashbucket)));
1245*4882a593Smuzhiyun free_pages((unsigned long)dccp_hashinfo.ehash,
1246*4882a593Smuzhiyun get_order((dccp_hashinfo.ehash_mask + 1) *
1247*4882a593Smuzhiyun sizeof(struct inet_ehash_bucket)));
1248*4882a593Smuzhiyun inet_ehash_locks_free(&dccp_hashinfo);
1249*4882a593Smuzhiyun kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1250*4882a593Smuzhiyun dccp_ackvec_exit();
1251*4882a593Smuzhiyun dccp_sysctl_exit();
1252*4882a593Smuzhiyun inet_hashinfo2_free_mod(&dccp_hashinfo);
1253*4882a593Smuzhiyun percpu_counter_destroy(&dccp_orphan_count);
1254*4882a593Smuzhiyun }
1255*4882a593Smuzhiyun
1256*4882a593Smuzhiyun module_init(dccp_init);
1257*4882a593Smuzhiyun module_exit(dccp_fini);
1258*4882a593Smuzhiyun
1259*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1260*4882a593Smuzhiyun MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1261*4882a593Smuzhiyun MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1262