xref: /OK3568_Linux_fs/kernel/net/sched/sch_tbf.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * net/sched/sch_tbf.c	Token Bucket Filter queue.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun  *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
7*4882a593Smuzhiyun  *						 original idea by Martin Devera
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/module.h>
11*4882a593Smuzhiyun #include <linux/types.h>
12*4882a593Smuzhiyun #include <linux/kernel.h>
13*4882a593Smuzhiyun #include <linux/string.h>
14*4882a593Smuzhiyun #include <linux/errno.h>
15*4882a593Smuzhiyun #include <linux/skbuff.h>
16*4882a593Smuzhiyun #include <net/netlink.h>
17*4882a593Smuzhiyun #include <net/sch_generic.h>
18*4882a593Smuzhiyun #include <net/pkt_cls.h>
19*4882a593Smuzhiyun #include <net/pkt_sched.h>
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun /*	Simple Token Bucket Filter.
23*4882a593Smuzhiyun 	=======================================
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun 	SOURCE.
26*4882a593Smuzhiyun 	-------
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun 	None.
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun 	Description.
31*4882a593Smuzhiyun 	------------
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun 	A data flow obeys TBF with rate R and depth B, if for any
34*4882a593Smuzhiyun 	time interval t_i...t_f the number of transmitted bits
35*4882a593Smuzhiyun 	does not exceed B + R*(t_f-t_i).
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	Packetized version of this definition:
38*4882a593Smuzhiyun 	The sequence of packets of sizes s_i served at moments t_i
39*4882a593Smuzhiyun 	obeys TBF, if for any i<=k:
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	s_i+....+s_k <= B + R*(t_k - t_i)
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	Algorithm.
44*4882a593Smuzhiyun 	----------
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	N(t+delta) = min{B/R, N(t) + delta}
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun 	If the first packet in queue has length S, it may be
51*4882a593Smuzhiyun 	transmitted only at the time t_* when S/R <= N(t_*),
52*4882a593Smuzhiyun 	and in this case N(t) jumps:
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	N(t_* + 0) = N(t_* - 0) - S/R.
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	Actually, QoS requires two TBF to be applied to a data stream.
59*4882a593Smuzhiyun 	One of them controls steady state burst size, another
60*4882a593Smuzhiyun 	one with rate P (peak rate) and depth M (equal to link MTU)
61*4882a593Smuzhiyun 	limits bursts at a smaller time scale.
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	It is easy to see that P>R, and B>M. If P is infinity, this double
64*4882a593Smuzhiyun 	TBF is equivalent to a single one.
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	When TBF works in reshaping mode, latency is estimated as:
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	lat = max ((L-B)/R, (L-M)/P)
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	NOTES.
72*4882a593Smuzhiyun 	------
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	If TBF throttles, it starts a watchdog timer, which will wake it up
75*4882a593Smuzhiyun 	when it is ready to transmit.
76*4882a593Smuzhiyun 	Note that the minimal timer resolution is 1/HZ.
77*4882a593Smuzhiyun 	If no new packets arrive during this period,
78*4882a593Smuzhiyun 	or if the device is not awaken by EOI for some previous packet,
79*4882a593Smuzhiyun 	TBF can stop its activity for 1/HZ.
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	This means, that with depth B, the maximal rate is
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	R_crit = B*HZ
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 	Note that the peak rate TBF is much more tough: with MTU 1500
89*4882a593Smuzhiyun 	P_crit = 150Kbytes/sec. So, if you need greater peak
90*4882a593Smuzhiyun 	rates, use alpha with HZ=1000 :-)
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	With classful TBF, limit is just kept for backwards compatibility.
93*4882a593Smuzhiyun 	It is passed to the default bfifo qdisc - if the inner qdisc is
94*4882a593Smuzhiyun 	changed the limit is not effective anymore.
95*4882a593Smuzhiyun */
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun struct tbf_sched_data {
98*4882a593Smuzhiyun /* Parameters */
99*4882a593Smuzhiyun 	u32		limit;		/* Maximal length of backlog: bytes */
100*4882a593Smuzhiyun 	u32		max_size;
101*4882a593Smuzhiyun 	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
102*4882a593Smuzhiyun 	s64		mtu;
103*4882a593Smuzhiyun 	struct psched_ratecfg rate;
104*4882a593Smuzhiyun 	struct psched_ratecfg peak;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun /* Variables */
107*4882a593Smuzhiyun 	s64	tokens;			/* Current number of B tokens */
108*4882a593Smuzhiyun 	s64	ptokens;		/* Current number of P tokens */
109*4882a593Smuzhiyun 	s64	t_c;			/* Time check-point */
110*4882a593Smuzhiyun 	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
111*4882a593Smuzhiyun 	struct qdisc_watchdog watchdog;	/* Watchdog timer */
112*4882a593Smuzhiyun };
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun /* Time to Length, convert time in ns to length in bytes
116*4882a593Smuzhiyun  * to determinate how many bytes can be sent in given time.
117*4882a593Smuzhiyun  */
psched_ns_t2l(const struct psched_ratecfg * r,u64 time_in_ns)118*4882a593Smuzhiyun static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119*4882a593Smuzhiyun 			 u64 time_in_ns)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun 	/* The formula is :
122*4882a593Smuzhiyun 	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
123*4882a593Smuzhiyun 	 */
124*4882a593Smuzhiyun 	u64 len = time_in_ns * r->rate_bytes_ps;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	do_div(len, NSEC_PER_SEC);
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129*4882a593Smuzhiyun 		do_div(len, 53);
130*4882a593Smuzhiyun 		len = len * 48;
131*4882a593Smuzhiyun 	}
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	if (len > r->overhead)
134*4882a593Smuzhiyun 		len -= r->overhead;
135*4882a593Smuzhiyun 	else
136*4882a593Smuzhiyun 		len = 0;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	return len;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
tbf_offload_change(struct Qdisc * sch)141*4882a593Smuzhiyun static void tbf_offload_change(struct Qdisc *sch)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
144*4882a593Smuzhiyun 	struct net_device *dev = qdisc_dev(sch);
145*4882a593Smuzhiyun 	struct tc_tbf_qopt_offload qopt;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148*4882a593Smuzhiyun 		return;
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	qopt.command = TC_TBF_REPLACE;
151*4882a593Smuzhiyun 	qopt.handle = sch->handle;
152*4882a593Smuzhiyun 	qopt.parent = sch->parent;
153*4882a593Smuzhiyun 	qopt.replace_params.rate = q->rate;
154*4882a593Smuzhiyun 	qopt.replace_params.max_size = q->max_size;
155*4882a593Smuzhiyun 	qopt.replace_params.qstats = &sch->qstats;
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun 
tbf_offload_destroy(struct Qdisc * sch)160*4882a593Smuzhiyun static void tbf_offload_destroy(struct Qdisc *sch)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	struct net_device *dev = qdisc_dev(sch);
163*4882a593Smuzhiyun 	struct tc_tbf_qopt_offload qopt;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166*4882a593Smuzhiyun 		return;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	qopt.command = TC_TBF_DESTROY;
169*4882a593Smuzhiyun 	qopt.handle = sch->handle;
170*4882a593Smuzhiyun 	qopt.parent = sch->parent;
171*4882a593Smuzhiyun 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun 
tbf_offload_dump(struct Qdisc * sch)174*4882a593Smuzhiyun static int tbf_offload_dump(struct Qdisc *sch)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	struct tc_tbf_qopt_offload qopt;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	qopt.command = TC_TBF_STATS;
179*4882a593Smuzhiyun 	qopt.handle = sch->handle;
180*4882a593Smuzhiyun 	qopt.parent = sch->parent;
181*4882a593Smuzhiyun 	qopt.stats.bstats = &sch->bstats;
182*4882a593Smuzhiyun 	qopt.stats.qstats = &sch->qstats;
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun /* GSO packet is too big, segment it so that tbf can transmit
188*4882a593Smuzhiyun  * each segment in time
189*4882a593Smuzhiyun  */
tbf_segment(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)190*4882a593Smuzhiyun static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
191*4882a593Smuzhiyun 		       struct sk_buff **to_free)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
194*4882a593Smuzhiyun 	struct sk_buff *segs, *nskb;
195*4882a593Smuzhiyun 	netdev_features_t features = netif_skb_features(skb);
196*4882a593Smuzhiyun 	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
197*4882a593Smuzhiyun 	int ret, nb;
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	if (IS_ERR_OR_NULL(segs))
202*4882a593Smuzhiyun 		return qdisc_drop(skb, sch, to_free);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	nb = 0;
205*4882a593Smuzhiyun 	skb_list_walk_safe(segs, segs, nskb) {
206*4882a593Smuzhiyun 		skb_mark_not_on_list(segs);
207*4882a593Smuzhiyun 		qdisc_skb_cb(segs)->pkt_len = segs->len;
208*4882a593Smuzhiyun 		len += segs->len;
209*4882a593Smuzhiyun 		ret = qdisc_enqueue(segs, q->qdisc, to_free);
210*4882a593Smuzhiyun 		if (ret != NET_XMIT_SUCCESS) {
211*4882a593Smuzhiyun 			if (net_xmit_drop_count(ret))
212*4882a593Smuzhiyun 				qdisc_qstats_drop(sch);
213*4882a593Smuzhiyun 		} else {
214*4882a593Smuzhiyun 			nb++;
215*4882a593Smuzhiyun 		}
216*4882a593Smuzhiyun 	}
217*4882a593Smuzhiyun 	sch->q.qlen += nb;
218*4882a593Smuzhiyun 	if (nb > 1)
219*4882a593Smuzhiyun 		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
220*4882a593Smuzhiyun 	consume_skb(skb);
221*4882a593Smuzhiyun 	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun 
tbf_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)224*4882a593Smuzhiyun static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
225*4882a593Smuzhiyun 		       struct sk_buff **to_free)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
228*4882a593Smuzhiyun 	unsigned int len = qdisc_pkt_len(skb);
229*4882a593Smuzhiyun 	int ret;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	if (qdisc_pkt_len(skb) > q->max_size) {
232*4882a593Smuzhiyun 		if (skb_is_gso(skb) &&
233*4882a593Smuzhiyun 		    skb_gso_validate_mac_len(skb, q->max_size))
234*4882a593Smuzhiyun 			return tbf_segment(skb, sch, to_free);
235*4882a593Smuzhiyun 		return qdisc_drop(skb, sch, to_free);
236*4882a593Smuzhiyun 	}
237*4882a593Smuzhiyun 	ret = qdisc_enqueue(skb, q->qdisc, to_free);
238*4882a593Smuzhiyun 	if (ret != NET_XMIT_SUCCESS) {
239*4882a593Smuzhiyun 		if (net_xmit_drop_count(ret))
240*4882a593Smuzhiyun 			qdisc_qstats_drop(sch);
241*4882a593Smuzhiyun 		return ret;
242*4882a593Smuzhiyun 	}
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	sch->qstats.backlog += len;
245*4882a593Smuzhiyun 	sch->q.qlen++;
246*4882a593Smuzhiyun 	return NET_XMIT_SUCCESS;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun 
tbf_peak_present(const struct tbf_sched_data * q)249*4882a593Smuzhiyun static bool tbf_peak_present(const struct tbf_sched_data *q)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun 	return q->peak.rate_bytes_ps;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun 
tbf_dequeue(struct Qdisc * sch)254*4882a593Smuzhiyun static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
257*4882a593Smuzhiyun 	struct sk_buff *skb;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	skb = q->qdisc->ops->peek(q->qdisc);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	if (skb) {
262*4882a593Smuzhiyun 		s64 now;
263*4882a593Smuzhiyun 		s64 toks;
264*4882a593Smuzhiyun 		s64 ptoks = 0;
265*4882a593Smuzhiyun 		unsigned int len = qdisc_pkt_len(skb);
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 		now = ktime_get_ns();
268*4882a593Smuzhiyun 		toks = min_t(s64, now - q->t_c, q->buffer);
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 		if (tbf_peak_present(q)) {
271*4882a593Smuzhiyun 			ptoks = toks + q->ptokens;
272*4882a593Smuzhiyun 			if (ptoks > q->mtu)
273*4882a593Smuzhiyun 				ptoks = q->mtu;
274*4882a593Smuzhiyun 			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
275*4882a593Smuzhiyun 		}
276*4882a593Smuzhiyun 		toks += q->tokens;
277*4882a593Smuzhiyun 		if (toks > q->buffer)
278*4882a593Smuzhiyun 			toks = q->buffer;
279*4882a593Smuzhiyun 		toks -= (s64) psched_l2t_ns(&q->rate, len);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 		if ((toks|ptoks) >= 0) {
282*4882a593Smuzhiyun 			skb = qdisc_dequeue_peeked(q->qdisc);
283*4882a593Smuzhiyun 			if (unlikely(!skb))
284*4882a593Smuzhiyun 				return NULL;
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 			q->t_c = now;
287*4882a593Smuzhiyun 			q->tokens = toks;
288*4882a593Smuzhiyun 			q->ptokens = ptoks;
289*4882a593Smuzhiyun 			qdisc_qstats_backlog_dec(sch, skb);
290*4882a593Smuzhiyun 			sch->q.qlen--;
291*4882a593Smuzhiyun 			qdisc_bstats_update(sch, skb);
292*4882a593Smuzhiyun 			return skb;
293*4882a593Smuzhiyun 		}
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 		qdisc_watchdog_schedule_ns(&q->watchdog,
296*4882a593Smuzhiyun 					   now + max_t(long, -toks, -ptoks));
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 		/* Maybe we have a shorter packet in the queue,
299*4882a593Smuzhiyun 		   which can be sent now. It sounds cool,
300*4882a593Smuzhiyun 		   but, however, this is wrong in principle.
301*4882a593Smuzhiyun 		   We MUST NOT reorder packets under these circumstances.
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		   Really, if we split the flow into independent
304*4882a593Smuzhiyun 		   subflows, it would be a very good solution.
305*4882a593Smuzhiyun 		   This is the main idea of all FQ algorithms
306*4882a593Smuzhiyun 		   (cf. CSZ, HPFQ, HFSC)
307*4882a593Smuzhiyun 		 */
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 		qdisc_qstats_overlimit(sch);
310*4882a593Smuzhiyun 	}
311*4882a593Smuzhiyun 	return NULL;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun 
tbf_reset(struct Qdisc * sch)314*4882a593Smuzhiyun static void tbf_reset(struct Qdisc *sch)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	qdisc_reset(q->qdisc);
319*4882a593Smuzhiyun 	q->t_c = ktime_get_ns();
320*4882a593Smuzhiyun 	q->tokens = q->buffer;
321*4882a593Smuzhiyun 	q->ptokens = q->mtu;
322*4882a593Smuzhiyun 	qdisc_watchdog_cancel(&q->watchdog);
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
326*4882a593Smuzhiyun 	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
327*4882a593Smuzhiyun 	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
328*4882a593Smuzhiyun 	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
329*4882a593Smuzhiyun 	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
330*4882a593Smuzhiyun 	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
331*4882a593Smuzhiyun 	[TCA_TBF_BURST] = { .type = NLA_U32 },
332*4882a593Smuzhiyun 	[TCA_TBF_PBURST] = { .type = NLA_U32 },
333*4882a593Smuzhiyun };
334*4882a593Smuzhiyun 
tbf_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)335*4882a593Smuzhiyun static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
336*4882a593Smuzhiyun 		      struct netlink_ext_ack *extack)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	int err;
339*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
340*4882a593Smuzhiyun 	struct nlattr *tb[TCA_TBF_MAX + 1];
341*4882a593Smuzhiyun 	struct tc_tbf_qopt *qopt;
342*4882a593Smuzhiyun 	struct Qdisc *child = NULL;
343*4882a593Smuzhiyun 	struct Qdisc *old = NULL;
344*4882a593Smuzhiyun 	struct psched_ratecfg rate;
345*4882a593Smuzhiyun 	struct psched_ratecfg peak;
346*4882a593Smuzhiyun 	u64 max_size;
347*4882a593Smuzhiyun 	s64 buffer, mtu;
348*4882a593Smuzhiyun 	u64 rate64 = 0, prate64 = 0;
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
351*4882a593Smuzhiyun 					  NULL);
352*4882a593Smuzhiyun 	if (err < 0)
353*4882a593Smuzhiyun 		return err;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	err = -EINVAL;
356*4882a593Smuzhiyun 	if (tb[TCA_TBF_PARMS] == NULL)
357*4882a593Smuzhiyun 		goto done;
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	qopt = nla_data(tb[TCA_TBF_PARMS]);
360*4882a593Smuzhiyun 	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
361*4882a593Smuzhiyun 		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
362*4882a593Smuzhiyun 					      tb[TCA_TBF_RTAB],
363*4882a593Smuzhiyun 					      NULL));
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
366*4882a593Smuzhiyun 			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
367*4882a593Smuzhiyun 						      tb[TCA_TBF_PTAB],
368*4882a593Smuzhiyun 						      NULL));
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
371*4882a593Smuzhiyun 	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	if (tb[TCA_TBF_RATE64])
374*4882a593Smuzhiyun 		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
375*4882a593Smuzhiyun 	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	if (tb[TCA_TBF_BURST]) {
378*4882a593Smuzhiyun 		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
379*4882a593Smuzhiyun 		buffer = psched_l2t_ns(&rate, max_size);
380*4882a593Smuzhiyun 	} else {
381*4882a593Smuzhiyun 		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
382*4882a593Smuzhiyun 	}
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	if (qopt->peakrate.rate) {
385*4882a593Smuzhiyun 		if (tb[TCA_TBF_PRATE64])
386*4882a593Smuzhiyun 			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
387*4882a593Smuzhiyun 		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
388*4882a593Smuzhiyun 		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
389*4882a593Smuzhiyun 			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
390*4882a593Smuzhiyun 					peak.rate_bytes_ps, rate.rate_bytes_ps);
391*4882a593Smuzhiyun 			err = -EINVAL;
392*4882a593Smuzhiyun 			goto done;
393*4882a593Smuzhiyun 		}
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 		if (tb[TCA_TBF_PBURST]) {
396*4882a593Smuzhiyun 			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
397*4882a593Smuzhiyun 			max_size = min_t(u32, max_size, pburst);
398*4882a593Smuzhiyun 			mtu = psched_l2t_ns(&peak, pburst);
399*4882a593Smuzhiyun 		} else {
400*4882a593Smuzhiyun 			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
401*4882a593Smuzhiyun 		}
402*4882a593Smuzhiyun 	} else {
403*4882a593Smuzhiyun 		memset(&peak, 0, sizeof(peak));
404*4882a593Smuzhiyun 	}
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	if (max_size < psched_mtu(qdisc_dev(sch)))
407*4882a593Smuzhiyun 		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
408*4882a593Smuzhiyun 				    max_size, qdisc_dev(sch)->name,
409*4882a593Smuzhiyun 				    psched_mtu(qdisc_dev(sch)));
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	if (!max_size) {
412*4882a593Smuzhiyun 		err = -EINVAL;
413*4882a593Smuzhiyun 		goto done;
414*4882a593Smuzhiyun 	}
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 	if (q->qdisc != &noop_qdisc) {
417*4882a593Smuzhiyun 		err = fifo_set_limit(q->qdisc, qopt->limit);
418*4882a593Smuzhiyun 		if (err)
419*4882a593Smuzhiyun 			goto done;
420*4882a593Smuzhiyun 	} else if (qopt->limit > 0) {
421*4882a593Smuzhiyun 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
422*4882a593Smuzhiyun 					 extack);
423*4882a593Smuzhiyun 		if (IS_ERR(child)) {
424*4882a593Smuzhiyun 			err = PTR_ERR(child);
425*4882a593Smuzhiyun 			goto done;
426*4882a593Smuzhiyun 		}
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 		/* child is fifo, no need to check for noop_qdisc */
429*4882a593Smuzhiyun 		qdisc_hash_add(child, true);
430*4882a593Smuzhiyun 	}
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	sch_tree_lock(sch);
433*4882a593Smuzhiyun 	if (child) {
434*4882a593Smuzhiyun 		qdisc_tree_flush_backlog(q->qdisc);
435*4882a593Smuzhiyun 		old = q->qdisc;
436*4882a593Smuzhiyun 		q->qdisc = child;
437*4882a593Smuzhiyun 	}
438*4882a593Smuzhiyun 	q->limit = qopt->limit;
439*4882a593Smuzhiyun 	if (tb[TCA_TBF_PBURST])
440*4882a593Smuzhiyun 		q->mtu = mtu;
441*4882a593Smuzhiyun 	else
442*4882a593Smuzhiyun 		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
443*4882a593Smuzhiyun 	q->max_size = max_size;
444*4882a593Smuzhiyun 	if (tb[TCA_TBF_BURST])
445*4882a593Smuzhiyun 		q->buffer = buffer;
446*4882a593Smuzhiyun 	else
447*4882a593Smuzhiyun 		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
448*4882a593Smuzhiyun 	q->tokens = q->buffer;
449*4882a593Smuzhiyun 	q->ptokens = q->mtu;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
452*4882a593Smuzhiyun 	memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	sch_tree_unlock(sch);
455*4882a593Smuzhiyun 	qdisc_put(old);
456*4882a593Smuzhiyun 	err = 0;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	tbf_offload_change(sch);
459*4882a593Smuzhiyun done:
460*4882a593Smuzhiyun 	return err;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun 
tbf_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)463*4882a593Smuzhiyun static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
464*4882a593Smuzhiyun 		    struct netlink_ext_ack *extack)
465*4882a593Smuzhiyun {
466*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	qdisc_watchdog_init(&q->watchdog, sch);
469*4882a593Smuzhiyun 	q->qdisc = &noop_qdisc;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	if (!opt)
472*4882a593Smuzhiyun 		return -EINVAL;
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	q->t_c = ktime_get_ns();
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun 	return tbf_change(sch, opt, extack);
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun 
tbf_destroy(struct Qdisc * sch)479*4882a593Smuzhiyun static void tbf_destroy(struct Qdisc *sch)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 	qdisc_watchdog_cancel(&q->watchdog);
484*4882a593Smuzhiyun 	tbf_offload_destroy(sch);
485*4882a593Smuzhiyun 	qdisc_put(q->qdisc);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun 
tbf_dump(struct Qdisc * sch,struct sk_buff * skb)488*4882a593Smuzhiyun static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
491*4882a593Smuzhiyun 	struct nlattr *nest;
492*4882a593Smuzhiyun 	struct tc_tbf_qopt opt;
493*4882a593Smuzhiyun 	int err;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 	err = tbf_offload_dump(sch);
496*4882a593Smuzhiyun 	if (err)
497*4882a593Smuzhiyun 		return err;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
500*4882a593Smuzhiyun 	if (nest == NULL)
501*4882a593Smuzhiyun 		goto nla_put_failure;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	opt.limit = q->limit;
504*4882a593Smuzhiyun 	psched_ratecfg_getrate(&opt.rate, &q->rate);
505*4882a593Smuzhiyun 	if (tbf_peak_present(q))
506*4882a593Smuzhiyun 		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
507*4882a593Smuzhiyun 	else
508*4882a593Smuzhiyun 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
509*4882a593Smuzhiyun 	opt.mtu = PSCHED_NS2TICKS(q->mtu);
510*4882a593Smuzhiyun 	opt.buffer = PSCHED_NS2TICKS(q->buffer);
511*4882a593Smuzhiyun 	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
512*4882a593Smuzhiyun 		goto nla_put_failure;
513*4882a593Smuzhiyun 	if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
514*4882a593Smuzhiyun 	    nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
515*4882a593Smuzhiyun 			      TCA_TBF_PAD))
516*4882a593Smuzhiyun 		goto nla_put_failure;
517*4882a593Smuzhiyun 	if (tbf_peak_present(q) &&
518*4882a593Smuzhiyun 	    q->peak.rate_bytes_ps >= (1ULL << 32) &&
519*4882a593Smuzhiyun 	    nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
520*4882a593Smuzhiyun 			      TCA_TBF_PAD))
521*4882a593Smuzhiyun 		goto nla_put_failure;
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun 	return nla_nest_end(skb, nest);
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun nla_put_failure:
526*4882a593Smuzhiyun 	nla_nest_cancel(skb, nest);
527*4882a593Smuzhiyun 	return -1;
528*4882a593Smuzhiyun }
529*4882a593Smuzhiyun 
tbf_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)530*4882a593Smuzhiyun static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
531*4882a593Smuzhiyun 			  struct sk_buff *skb, struct tcmsg *tcm)
532*4882a593Smuzhiyun {
533*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 	tcm->tcm_handle |= TC_H_MIN(1);
536*4882a593Smuzhiyun 	tcm->tcm_info = q->qdisc->handle;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	return 0;
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun 
tbf_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)541*4882a593Smuzhiyun static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
542*4882a593Smuzhiyun 		     struct Qdisc **old, struct netlink_ext_ack *extack)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 	if (new == NULL)
547*4882a593Smuzhiyun 		new = &noop_qdisc;
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	*old = qdisc_replace(sch, new, &q->qdisc);
550*4882a593Smuzhiyun 	return 0;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun 
tbf_leaf(struct Qdisc * sch,unsigned long arg)553*4882a593Smuzhiyun static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun 	struct tbf_sched_data *q = qdisc_priv(sch);
556*4882a593Smuzhiyun 	return q->qdisc;
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun 
tbf_find(struct Qdisc * sch,u32 classid)559*4882a593Smuzhiyun static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
560*4882a593Smuzhiyun {
561*4882a593Smuzhiyun 	return 1;
562*4882a593Smuzhiyun }
563*4882a593Smuzhiyun 
tbf_walk(struct Qdisc * sch,struct qdisc_walker * walker)564*4882a593Smuzhiyun static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
565*4882a593Smuzhiyun {
566*4882a593Smuzhiyun 	if (!walker->stop) {
567*4882a593Smuzhiyun 		if (walker->count >= walker->skip)
568*4882a593Smuzhiyun 			if (walker->fn(sch, 1, walker) < 0) {
569*4882a593Smuzhiyun 				walker->stop = 1;
570*4882a593Smuzhiyun 				return;
571*4882a593Smuzhiyun 			}
572*4882a593Smuzhiyun 		walker->count++;
573*4882a593Smuzhiyun 	}
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun static const struct Qdisc_class_ops tbf_class_ops = {
577*4882a593Smuzhiyun 	.graft		=	tbf_graft,
578*4882a593Smuzhiyun 	.leaf		=	tbf_leaf,
579*4882a593Smuzhiyun 	.find		=	tbf_find,
580*4882a593Smuzhiyun 	.walk		=	tbf_walk,
581*4882a593Smuzhiyun 	.dump		=	tbf_dump_class,
582*4882a593Smuzhiyun };
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
585*4882a593Smuzhiyun 	.next		=	NULL,
586*4882a593Smuzhiyun 	.cl_ops		=	&tbf_class_ops,
587*4882a593Smuzhiyun 	.id		=	"tbf",
588*4882a593Smuzhiyun 	.priv_size	=	sizeof(struct tbf_sched_data),
589*4882a593Smuzhiyun 	.enqueue	=	tbf_enqueue,
590*4882a593Smuzhiyun 	.dequeue	=	tbf_dequeue,
591*4882a593Smuzhiyun 	.peek		=	qdisc_peek_dequeued,
592*4882a593Smuzhiyun 	.init		=	tbf_init,
593*4882a593Smuzhiyun 	.reset		=	tbf_reset,
594*4882a593Smuzhiyun 	.destroy	=	tbf_destroy,
595*4882a593Smuzhiyun 	.change		=	tbf_change,
596*4882a593Smuzhiyun 	.dump		=	tbf_dump,
597*4882a593Smuzhiyun 	.owner		=	THIS_MODULE,
598*4882a593Smuzhiyun };
599*4882a593Smuzhiyun 
tbf_module_init(void)600*4882a593Smuzhiyun static int __init tbf_module_init(void)
601*4882a593Smuzhiyun {
602*4882a593Smuzhiyun 	return register_qdisc(&tbf_qdisc_ops);
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun 
tbf_module_exit(void)605*4882a593Smuzhiyun static void __exit tbf_module_exit(void)
606*4882a593Smuzhiyun {
607*4882a593Smuzhiyun 	unregister_qdisc(&tbf_qdisc_ops);
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun module_init(tbf_module_init)
610*4882a593Smuzhiyun module_exit(tbf_module_exit)
611*4882a593Smuzhiyun MODULE_LICENSE("GPL");
612