1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * net/sched/sch_tbf.c Token Bucket Filter queue.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
7*4882a593Smuzhiyun * original idea by Martin Devera
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/module.h>
11*4882a593Smuzhiyun #include <linux/types.h>
12*4882a593Smuzhiyun #include <linux/kernel.h>
13*4882a593Smuzhiyun #include <linux/string.h>
14*4882a593Smuzhiyun #include <linux/errno.h>
15*4882a593Smuzhiyun #include <linux/skbuff.h>
16*4882a593Smuzhiyun #include <net/netlink.h>
17*4882a593Smuzhiyun #include <net/sch_generic.h>
18*4882a593Smuzhiyun #include <net/pkt_cls.h>
19*4882a593Smuzhiyun #include <net/pkt_sched.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /* Simple Token Bucket Filter.
23*4882a593Smuzhiyun =======================================
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun SOURCE.
26*4882a593Smuzhiyun -------
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun None.
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun Description.
31*4882a593Smuzhiyun ------------
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun A data flow obeys TBF with rate R and depth B, if for any
34*4882a593Smuzhiyun time interval t_i...t_f the number of transmitted bits
35*4882a593Smuzhiyun does not exceed B + R*(t_f-t_i).
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun Packetized version of this definition:
38*4882a593Smuzhiyun The sequence of packets of sizes s_i served at moments t_i
39*4882a593Smuzhiyun obeys TBF, if for any i<=k:
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun s_i+....+s_k <= B + R*(t_k - t_i)
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun Algorithm.
44*4882a593Smuzhiyun ----------
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun Let N(t_i) be B/R initially and N(t) grow continuously with time as:
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun N(t+delta) = min{B/R, N(t) + delta}
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun If the first packet in queue has length S, it may be
51*4882a593Smuzhiyun transmitted only at the time t_* when S/R <= N(t_*),
52*4882a593Smuzhiyun and in this case N(t) jumps:
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun N(t_* + 0) = N(t_* - 0) - S/R.
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun Actually, QoS requires two TBF to be applied to a data stream.
59*4882a593Smuzhiyun One of them controls steady state burst size, another
60*4882a593Smuzhiyun one with rate P (peak rate) and depth M (equal to link MTU)
61*4882a593Smuzhiyun limits bursts at a smaller time scale.
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun It is easy to see that P>R, and B>M. If P is infinity, this double
64*4882a593Smuzhiyun TBF is equivalent to a single one.
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun When TBF works in reshaping mode, latency is estimated as:
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun lat = max ((L-B)/R, (L-M)/P)
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun NOTES.
72*4882a593Smuzhiyun ------
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun If TBF throttles, it starts a watchdog timer, which will wake it up
75*4882a593Smuzhiyun when it is ready to transmit.
76*4882a593Smuzhiyun Note that the minimal timer resolution is 1/HZ.
77*4882a593Smuzhiyun If no new packets arrive during this period,
78*4882a593Smuzhiyun or if the device is not awaken by EOI for some previous packet,
79*4882a593Smuzhiyun TBF can stop its activity for 1/HZ.
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun This means, that with depth B, the maximal rate is
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun R_crit = B*HZ
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun Note that the peak rate TBF is much more tough: with MTU 1500
89*4882a593Smuzhiyun P_crit = 150Kbytes/sec. So, if you need greater peak
90*4882a593Smuzhiyun rates, use alpha with HZ=1000 :-)
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun With classful TBF, limit is just kept for backwards compatibility.
93*4882a593Smuzhiyun It is passed to the default bfifo qdisc - if the inner qdisc is
94*4882a593Smuzhiyun changed the limit is not effective anymore.
95*4882a593Smuzhiyun */
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun struct tbf_sched_data {
98*4882a593Smuzhiyun /* Parameters */
99*4882a593Smuzhiyun u32 limit; /* Maximal length of backlog: bytes */
100*4882a593Smuzhiyun u32 max_size;
101*4882a593Smuzhiyun s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
102*4882a593Smuzhiyun s64 mtu;
103*4882a593Smuzhiyun struct psched_ratecfg rate;
104*4882a593Smuzhiyun struct psched_ratecfg peak;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /* Variables */
107*4882a593Smuzhiyun s64 tokens; /* Current number of B tokens */
108*4882a593Smuzhiyun s64 ptokens; /* Current number of P tokens */
109*4882a593Smuzhiyun s64 t_c; /* Time check-point */
110*4882a593Smuzhiyun struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
111*4882a593Smuzhiyun struct qdisc_watchdog watchdog; /* Watchdog timer */
112*4882a593Smuzhiyun };
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /* Time to Length, convert time in ns to length in bytes
116*4882a593Smuzhiyun * to determinate how many bytes can be sent in given time.
117*4882a593Smuzhiyun */
psched_ns_t2l(const struct psched_ratecfg * r,u64 time_in_ns)118*4882a593Smuzhiyun static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119*4882a593Smuzhiyun u64 time_in_ns)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun /* The formula is :
122*4882a593Smuzhiyun * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
123*4882a593Smuzhiyun */
124*4882a593Smuzhiyun u64 len = time_in_ns * r->rate_bytes_ps;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun do_div(len, NSEC_PER_SEC);
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129*4882a593Smuzhiyun do_div(len, 53);
130*4882a593Smuzhiyun len = len * 48;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun if (len > r->overhead)
134*4882a593Smuzhiyun len -= r->overhead;
135*4882a593Smuzhiyun else
136*4882a593Smuzhiyun len = 0;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun return len;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
tbf_offload_change(struct Qdisc * sch)141*4882a593Smuzhiyun static void tbf_offload_change(struct Qdisc *sch)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
144*4882a593Smuzhiyun struct net_device *dev = qdisc_dev(sch);
145*4882a593Smuzhiyun struct tc_tbf_qopt_offload qopt;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148*4882a593Smuzhiyun return;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun qopt.command = TC_TBF_REPLACE;
151*4882a593Smuzhiyun qopt.handle = sch->handle;
152*4882a593Smuzhiyun qopt.parent = sch->parent;
153*4882a593Smuzhiyun qopt.replace_params.rate = q->rate;
154*4882a593Smuzhiyun qopt.replace_params.max_size = q->max_size;
155*4882a593Smuzhiyun qopt.replace_params.qstats = &sch->qstats;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
tbf_offload_destroy(struct Qdisc * sch)160*4882a593Smuzhiyun static void tbf_offload_destroy(struct Qdisc *sch)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun struct net_device *dev = qdisc_dev(sch);
163*4882a593Smuzhiyun struct tc_tbf_qopt_offload qopt;
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166*4882a593Smuzhiyun return;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun qopt.command = TC_TBF_DESTROY;
169*4882a593Smuzhiyun qopt.handle = sch->handle;
170*4882a593Smuzhiyun qopt.parent = sch->parent;
171*4882a593Smuzhiyun dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
tbf_offload_dump(struct Qdisc * sch)174*4882a593Smuzhiyun static int tbf_offload_dump(struct Qdisc *sch)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun struct tc_tbf_qopt_offload qopt;
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun qopt.command = TC_TBF_STATS;
179*4882a593Smuzhiyun qopt.handle = sch->handle;
180*4882a593Smuzhiyun qopt.parent = sch->parent;
181*4882a593Smuzhiyun qopt.stats.bstats = &sch->bstats;
182*4882a593Smuzhiyun qopt.stats.qstats = &sch->qstats;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun /* GSO packet is too big, segment it so that tbf can transmit
188*4882a593Smuzhiyun * each segment in time
189*4882a593Smuzhiyun */
tbf_segment(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)190*4882a593Smuzhiyun static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
191*4882a593Smuzhiyun struct sk_buff **to_free)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
194*4882a593Smuzhiyun struct sk_buff *segs, *nskb;
195*4882a593Smuzhiyun netdev_features_t features = netif_skb_features(skb);
196*4882a593Smuzhiyun unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
197*4882a593Smuzhiyun int ret, nb;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun if (IS_ERR_OR_NULL(segs))
202*4882a593Smuzhiyun return qdisc_drop(skb, sch, to_free);
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun nb = 0;
205*4882a593Smuzhiyun skb_list_walk_safe(segs, segs, nskb) {
206*4882a593Smuzhiyun skb_mark_not_on_list(segs);
207*4882a593Smuzhiyun qdisc_skb_cb(segs)->pkt_len = segs->len;
208*4882a593Smuzhiyun len += segs->len;
209*4882a593Smuzhiyun ret = qdisc_enqueue(segs, q->qdisc, to_free);
210*4882a593Smuzhiyun if (ret != NET_XMIT_SUCCESS) {
211*4882a593Smuzhiyun if (net_xmit_drop_count(ret))
212*4882a593Smuzhiyun qdisc_qstats_drop(sch);
213*4882a593Smuzhiyun } else {
214*4882a593Smuzhiyun nb++;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun sch->q.qlen += nb;
218*4882a593Smuzhiyun if (nb > 1)
219*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
220*4882a593Smuzhiyun consume_skb(skb);
221*4882a593Smuzhiyun return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun
tbf_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)224*4882a593Smuzhiyun static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
225*4882a593Smuzhiyun struct sk_buff **to_free)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
228*4882a593Smuzhiyun unsigned int len = qdisc_pkt_len(skb);
229*4882a593Smuzhiyun int ret;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun if (qdisc_pkt_len(skb) > q->max_size) {
232*4882a593Smuzhiyun if (skb_is_gso(skb) &&
233*4882a593Smuzhiyun skb_gso_validate_mac_len(skb, q->max_size))
234*4882a593Smuzhiyun return tbf_segment(skb, sch, to_free);
235*4882a593Smuzhiyun return qdisc_drop(skb, sch, to_free);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun ret = qdisc_enqueue(skb, q->qdisc, to_free);
238*4882a593Smuzhiyun if (ret != NET_XMIT_SUCCESS) {
239*4882a593Smuzhiyun if (net_xmit_drop_count(ret))
240*4882a593Smuzhiyun qdisc_qstats_drop(sch);
241*4882a593Smuzhiyun return ret;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun sch->qstats.backlog += len;
245*4882a593Smuzhiyun sch->q.qlen++;
246*4882a593Smuzhiyun return NET_XMIT_SUCCESS;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
tbf_peak_present(const struct tbf_sched_data * q)249*4882a593Smuzhiyun static bool tbf_peak_present(const struct tbf_sched_data *q)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun return q->peak.rate_bytes_ps;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
tbf_dequeue(struct Qdisc * sch)254*4882a593Smuzhiyun static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
257*4882a593Smuzhiyun struct sk_buff *skb;
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun skb = q->qdisc->ops->peek(q->qdisc);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun if (skb) {
262*4882a593Smuzhiyun s64 now;
263*4882a593Smuzhiyun s64 toks;
264*4882a593Smuzhiyun s64 ptoks = 0;
265*4882a593Smuzhiyun unsigned int len = qdisc_pkt_len(skb);
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun now = ktime_get_ns();
268*4882a593Smuzhiyun toks = min_t(s64, now - q->t_c, q->buffer);
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (tbf_peak_present(q)) {
271*4882a593Smuzhiyun ptoks = toks + q->ptokens;
272*4882a593Smuzhiyun if (ptoks > q->mtu)
273*4882a593Smuzhiyun ptoks = q->mtu;
274*4882a593Smuzhiyun ptoks -= (s64) psched_l2t_ns(&q->peak, len);
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun toks += q->tokens;
277*4882a593Smuzhiyun if (toks > q->buffer)
278*4882a593Smuzhiyun toks = q->buffer;
279*4882a593Smuzhiyun toks -= (s64) psched_l2t_ns(&q->rate, len);
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun if ((toks|ptoks) >= 0) {
282*4882a593Smuzhiyun skb = qdisc_dequeue_peeked(q->qdisc);
283*4882a593Smuzhiyun if (unlikely(!skb))
284*4882a593Smuzhiyun return NULL;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun q->t_c = now;
287*4882a593Smuzhiyun q->tokens = toks;
288*4882a593Smuzhiyun q->ptokens = ptoks;
289*4882a593Smuzhiyun qdisc_qstats_backlog_dec(sch, skb);
290*4882a593Smuzhiyun sch->q.qlen--;
291*4882a593Smuzhiyun qdisc_bstats_update(sch, skb);
292*4882a593Smuzhiyun return skb;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun qdisc_watchdog_schedule_ns(&q->watchdog,
296*4882a593Smuzhiyun now + max_t(long, -toks, -ptoks));
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /* Maybe we have a shorter packet in the queue,
299*4882a593Smuzhiyun which can be sent now. It sounds cool,
300*4882a593Smuzhiyun but, however, this is wrong in principle.
301*4882a593Smuzhiyun We MUST NOT reorder packets under these circumstances.
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun Really, if we split the flow into independent
304*4882a593Smuzhiyun subflows, it would be a very good solution.
305*4882a593Smuzhiyun This is the main idea of all FQ algorithms
306*4882a593Smuzhiyun (cf. CSZ, HPFQ, HFSC)
307*4882a593Smuzhiyun */
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun qdisc_qstats_overlimit(sch);
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun return NULL;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
tbf_reset(struct Qdisc * sch)314*4882a593Smuzhiyun static void tbf_reset(struct Qdisc *sch)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun qdisc_reset(q->qdisc);
319*4882a593Smuzhiyun q->t_c = ktime_get_ns();
320*4882a593Smuzhiyun q->tokens = q->buffer;
321*4882a593Smuzhiyun q->ptokens = q->mtu;
322*4882a593Smuzhiyun qdisc_watchdog_cancel(&q->watchdog);
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
326*4882a593Smuzhiyun [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
327*4882a593Smuzhiyun [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
328*4882a593Smuzhiyun [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
329*4882a593Smuzhiyun [TCA_TBF_RATE64] = { .type = NLA_U64 },
330*4882a593Smuzhiyun [TCA_TBF_PRATE64] = { .type = NLA_U64 },
331*4882a593Smuzhiyun [TCA_TBF_BURST] = { .type = NLA_U32 },
332*4882a593Smuzhiyun [TCA_TBF_PBURST] = { .type = NLA_U32 },
333*4882a593Smuzhiyun };
334*4882a593Smuzhiyun
tbf_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)335*4882a593Smuzhiyun static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
336*4882a593Smuzhiyun struct netlink_ext_ack *extack)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun int err;
339*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
340*4882a593Smuzhiyun struct nlattr *tb[TCA_TBF_MAX + 1];
341*4882a593Smuzhiyun struct tc_tbf_qopt *qopt;
342*4882a593Smuzhiyun struct Qdisc *child = NULL;
343*4882a593Smuzhiyun struct Qdisc *old = NULL;
344*4882a593Smuzhiyun struct psched_ratecfg rate;
345*4882a593Smuzhiyun struct psched_ratecfg peak;
346*4882a593Smuzhiyun u64 max_size;
347*4882a593Smuzhiyun s64 buffer, mtu;
348*4882a593Smuzhiyun u64 rate64 = 0, prate64 = 0;
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
351*4882a593Smuzhiyun NULL);
352*4882a593Smuzhiyun if (err < 0)
353*4882a593Smuzhiyun return err;
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun err = -EINVAL;
356*4882a593Smuzhiyun if (tb[TCA_TBF_PARMS] == NULL)
357*4882a593Smuzhiyun goto done;
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun qopt = nla_data(tb[TCA_TBF_PARMS]);
360*4882a593Smuzhiyun if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
361*4882a593Smuzhiyun qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
362*4882a593Smuzhiyun tb[TCA_TBF_RTAB],
363*4882a593Smuzhiyun NULL));
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
366*4882a593Smuzhiyun qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
367*4882a593Smuzhiyun tb[TCA_TBF_PTAB],
368*4882a593Smuzhiyun NULL));
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
371*4882a593Smuzhiyun mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun if (tb[TCA_TBF_RATE64])
374*4882a593Smuzhiyun rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
375*4882a593Smuzhiyun psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun if (tb[TCA_TBF_BURST]) {
378*4882a593Smuzhiyun max_size = nla_get_u32(tb[TCA_TBF_BURST]);
379*4882a593Smuzhiyun buffer = psched_l2t_ns(&rate, max_size);
380*4882a593Smuzhiyun } else {
381*4882a593Smuzhiyun max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun if (qopt->peakrate.rate) {
385*4882a593Smuzhiyun if (tb[TCA_TBF_PRATE64])
386*4882a593Smuzhiyun prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
387*4882a593Smuzhiyun psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
388*4882a593Smuzhiyun if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
389*4882a593Smuzhiyun pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
390*4882a593Smuzhiyun peak.rate_bytes_ps, rate.rate_bytes_ps);
391*4882a593Smuzhiyun err = -EINVAL;
392*4882a593Smuzhiyun goto done;
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun if (tb[TCA_TBF_PBURST]) {
396*4882a593Smuzhiyun u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
397*4882a593Smuzhiyun max_size = min_t(u32, max_size, pburst);
398*4882a593Smuzhiyun mtu = psched_l2t_ns(&peak, pburst);
399*4882a593Smuzhiyun } else {
400*4882a593Smuzhiyun max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun } else {
403*4882a593Smuzhiyun memset(&peak, 0, sizeof(peak));
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun if (max_size < psched_mtu(qdisc_dev(sch)))
407*4882a593Smuzhiyun pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
408*4882a593Smuzhiyun max_size, qdisc_dev(sch)->name,
409*4882a593Smuzhiyun psched_mtu(qdisc_dev(sch)));
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun if (!max_size) {
412*4882a593Smuzhiyun err = -EINVAL;
413*4882a593Smuzhiyun goto done;
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun if (q->qdisc != &noop_qdisc) {
417*4882a593Smuzhiyun err = fifo_set_limit(q->qdisc, qopt->limit);
418*4882a593Smuzhiyun if (err)
419*4882a593Smuzhiyun goto done;
420*4882a593Smuzhiyun } else if (qopt->limit > 0) {
421*4882a593Smuzhiyun child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
422*4882a593Smuzhiyun extack);
423*4882a593Smuzhiyun if (IS_ERR(child)) {
424*4882a593Smuzhiyun err = PTR_ERR(child);
425*4882a593Smuzhiyun goto done;
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun /* child is fifo, no need to check for noop_qdisc */
429*4882a593Smuzhiyun qdisc_hash_add(child, true);
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun sch_tree_lock(sch);
433*4882a593Smuzhiyun if (child) {
434*4882a593Smuzhiyun qdisc_tree_flush_backlog(q->qdisc);
435*4882a593Smuzhiyun old = q->qdisc;
436*4882a593Smuzhiyun q->qdisc = child;
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun q->limit = qopt->limit;
439*4882a593Smuzhiyun if (tb[TCA_TBF_PBURST])
440*4882a593Smuzhiyun q->mtu = mtu;
441*4882a593Smuzhiyun else
442*4882a593Smuzhiyun q->mtu = PSCHED_TICKS2NS(qopt->mtu);
443*4882a593Smuzhiyun q->max_size = max_size;
444*4882a593Smuzhiyun if (tb[TCA_TBF_BURST])
445*4882a593Smuzhiyun q->buffer = buffer;
446*4882a593Smuzhiyun else
447*4882a593Smuzhiyun q->buffer = PSCHED_TICKS2NS(qopt->buffer);
448*4882a593Smuzhiyun q->tokens = q->buffer;
449*4882a593Smuzhiyun q->ptokens = q->mtu;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
452*4882a593Smuzhiyun memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun sch_tree_unlock(sch);
455*4882a593Smuzhiyun qdisc_put(old);
456*4882a593Smuzhiyun err = 0;
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun tbf_offload_change(sch);
459*4882a593Smuzhiyun done:
460*4882a593Smuzhiyun return err;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun
tbf_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)463*4882a593Smuzhiyun static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
464*4882a593Smuzhiyun struct netlink_ext_ack *extack)
465*4882a593Smuzhiyun {
466*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun qdisc_watchdog_init(&q->watchdog, sch);
469*4882a593Smuzhiyun q->qdisc = &noop_qdisc;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun if (!opt)
472*4882a593Smuzhiyun return -EINVAL;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun q->t_c = ktime_get_ns();
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun return tbf_change(sch, opt, extack);
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
tbf_destroy(struct Qdisc * sch)479*4882a593Smuzhiyun static void tbf_destroy(struct Qdisc *sch)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun qdisc_watchdog_cancel(&q->watchdog);
484*4882a593Smuzhiyun tbf_offload_destroy(sch);
485*4882a593Smuzhiyun qdisc_put(q->qdisc);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
tbf_dump(struct Qdisc * sch,struct sk_buff * skb)488*4882a593Smuzhiyun static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
491*4882a593Smuzhiyun struct nlattr *nest;
492*4882a593Smuzhiyun struct tc_tbf_qopt opt;
493*4882a593Smuzhiyun int err;
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun err = tbf_offload_dump(sch);
496*4882a593Smuzhiyun if (err)
497*4882a593Smuzhiyun return err;
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
500*4882a593Smuzhiyun if (nest == NULL)
501*4882a593Smuzhiyun goto nla_put_failure;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun opt.limit = q->limit;
504*4882a593Smuzhiyun psched_ratecfg_getrate(&opt.rate, &q->rate);
505*4882a593Smuzhiyun if (tbf_peak_present(q))
506*4882a593Smuzhiyun psched_ratecfg_getrate(&opt.peakrate, &q->peak);
507*4882a593Smuzhiyun else
508*4882a593Smuzhiyun memset(&opt.peakrate, 0, sizeof(opt.peakrate));
509*4882a593Smuzhiyun opt.mtu = PSCHED_NS2TICKS(q->mtu);
510*4882a593Smuzhiyun opt.buffer = PSCHED_NS2TICKS(q->buffer);
511*4882a593Smuzhiyun if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
512*4882a593Smuzhiyun goto nla_put_failure;
513*4882a593Smuzhiyun if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
514*4882a593Smuzhiyun nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
515*4882a593Smuzhiyun TCA_TBF_PAD))
516*4882a593Smuzhiyun goto nla_put_failure;
517*4882a593Smuzhiyun if (tbf_peak_present(q) &&
518*4882a593Smuzhiyun q->peak.rate_bytes_ps >= (1ULL << 32) &&
519*4882a593Smuzhiyun nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
520*4882a593Smuzhiyun TCA_TBF_PAD))
521*4882a593Smuzhiyun goto nla_put_failure;
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun return nla_nest_end(skb, nest);
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun nla_put_failure:
526*4882a593Smuzhiyun nla_nest_cancel(skb, nest);
527*4882a593Smuzhiyun return -1;
528*4882a593Smuzhiyun }
529*4882a593Smuzhiyun
tbf_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)530*4882a593Smuzhiyun static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
531*4882a593Smuzhiyun struct sk_buff *skb, struct tcmsg *tcm)
532*4882a593Smuzhiyun {
533*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun tcm->tcm_handle |= TC_H_MIN(1);
536*4882a593Smuzhiyun tcm->tcm_info = q->qdisc->handle;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun return 0;
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun
tbf_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)541*4882a593Smuzhiyun static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
542*4882a593Smuzhiyun struct Qdisc **old, struct netlink_ext_ack *extack)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun if (new == NULL)
547*4882a593Smuzhiyun new = &noop_qdisc;
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun *old = qdisc_replace(sch, new, &q->qdisc);
550*4882a593Smuzhiyun return 0;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun
tbf_leaf(struct Qdisc * sch,unsigned long arg)553*4882a593Smuzhiyun static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun struct tbf_sched_data *q = qdisc_priv(sch);
556*4882a593Smuzhiyun return q->qdisc;
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun
tbf_find(struct Qdisc * sch,u32 classid)559*4882a593Smuzhiyun static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
560*4882a593Smuzhiyun {
561*4882a593Smuzhiyun return 1;
562*4882a593Smuzhiyun }
563*4882a593Smuzhiyun
tbf_walk(struct Qdisc * sch,struct qdisc_walker * walker)564*4882a593Smuzhiyun static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
565*4882a593Smuzhiyun {
566*4882a593Smuzhiyun if (!walker->stop) {
567*4882a593Smuzhiyun if (walker->count >= walker->skip)
568*4882a593Smuzhiyun if (walker->fn(sch, 1, walker) < 0) {
569*4882a593Smuzhiyun walker->stop = 1;
570*4882a593Smuzhiyun return;
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun walker->count++;
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun static const struct Qdisc_class_ops tbf_class_ops = {
577*4882a593Smuzhiyun .graft = tbf_graft,
578*4882a593Smuzhiyun .leaf = tbf_leaf,
579*4882a593Smuzhiyun .find = tbf_find,
580*4882a593Smuzhiyun .walk = tbf_walk,
581*4882a593Smuzhiyun .dump = tbf_dump_class,
582*4882a593Smuzhiyun };
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
585*4882a593Smuzhiyun .next = NULL,
586*4882a593Smuzhiyun .cl_ops = &tbf_class_ops,
587*4882a593Smuzhiyun .id = "tbf",
588*4882a593Smuzhiyun .priv_size = sizeof(struct tbf_sched_data),
589*4882a593Smuzhiyun .enqueue = tbf_enqueue,
590*4882a593Smuzhiyun .dequeue = tbf_dequeue,
591*4882a593Smuzhiyun .peek = qdisc_peek_dequeued,
592*4882a593Smuzhiyun .init = tbf_init,
593*4882a593Smuzhiyun .reset = tbf_reset,
594*4882a593Smuzhiyun .destroy = tbf_destroy,
595*4882a593Smuzhiyun .change = tbf_change,
596*4882a593Smuzhiyun .dump = tbf_dump,
597*4882a593Smuzhiyun .owner = THIS_MODULE,
598*4882a593Smuzhiyun };
599*4882a593Smuzhiyun
tbf_module_init(void)600*4882a593Smuzhiyun static int __init tbf_module_init(void)
601*4882a593Smuzhiyun {
602*4882a593Smuzhiyun return register_qdisc(&tbf_qdisc_ops);
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun
tbf_module_exit(void)605*4882a593Smuzhiyun static void __exit tbf_module_exit(void)
606*4882a593Smuzhiyun {
607*4882a593Smuzhiyun unregister_qdisc(&tbf_qdisc_ops);
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun module_init(tbf_module_init)
610*4882a593Smuzhiyun module_exit(tbf_module_exit)
611*4882a593Smuzhiyun MODULE_LICENSE("GPL");
612